| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Display health information on commit queue. | |
| 3 | |
| 4 import Queue | |
| 5 import multiprocessing | |
| 6 import os | |
| 7 import re | |
| 8 import subprocess | |
| 9 import threading | |
| 10 import time | |
| 11 | |
| 12 CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log', | |
| 13 '/b/commit-queue/logs-chromium_deps/commit_queue.log', | |
| 14 '/b/commit-queue/logs-nacl/commit_queue.log', | |
| 15 '/b/commit-queue/logs-skia/commit_queue.log', | |
| 16 '/b/commit-queue/logs-tools/commit_queue.log',] | |
| 17 | |
| 18 | |
| 19 def call(args, timeout=None, shell=False): | |
| 20 """Returns (code, stdout, stderr)""" | |
| 21 def _run_proc(args, output): | |
| 22 proc = subprocess.Popen( | |
| 23 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, | |
| 24 stderr=subprocess.PIPE, shell=shell) | |
| 25 output.put(proc) | |
| 26 output.put((proc.wait(),) + proc.communicate()) | |
| 27 | |
| 28 def _timer(output, timeout): | |
| 29 time.sleep(timeout) | |
| 30 output.put([618, '', 'Process timed out.']) | |
| 31 | |
| 32 output = Queue.Queue() | |
| 33 thr = threading.Thread(target=_run_proc, args=[args, output]) | |
| 34 thr.daemon = True | |
| 35 thr.start() | |
| 36 # First item passed through output is always the Popen object. | |
| 37 proc = output.get() | |
| 38 | |
| 39 # Wait for process to finish, or timeout. | |
| 40 if timeout: | |
| 41 timer_thread = threading.Thread(target=_timer, args=[output, timeout]) | |
| 42 timer_thread.daemon = True | |
| 43 timer_thread.start() | |
| 44 | |
| 45 # Get the first output that comes out, which is either an error from _timer() | |
| 46 # or the desired output from the process. | |
| 47 code, out, err = output.get() | |
| 48 if code == 618: | |
| 49 # Kill the child process if it timed out. | |
| 50 try: | |
| 51 proc.terminate() | |
| 52 time.sleep(0.5) | |
| 53 if proc.poll() is None: | |
| 54 proc.kill() | |
| 55 except OSError: | |
| 56 pass | |
| 57 | |
| 58 return code, out, err | |
| 59 | |
| 60 def test_num_proc_factory(proc_name): | |
| 61 def test_num_proc(): | |
| 62 cmd = 'pgrep %s' % proc_name | |
| 63 _, out, _ = call(cmd, 15, True) | |
| 64 numproc = len(out.splitlines()) | |
| 65 if numproc < 300: | |
| 66 return (0, 'OK - %d' % numproc) | |
| 67 else: | |
| 68 return (1, 'FAIL - %d. This CQ is probably overloaded.' % numproc) | |
| 69 return test_num_proc | |
| 70 | |
| 71 def test_load(): | |
| 72 code , out, _ = call('uptime', 15, True) | |
| 73 if code == 618: | |
| 74 return (1, 'FAIL - Process timed out.') | |
| 75 | |
| 76 cpuload_m = re.search(r'(\d+\.\d+)\s*$', out) | |
| 77 if cpuload_m: | |
| 78 cpuload = float(cpuload_m.group(1)) | |
| 79 if cpuload < multiprocessing.cpu_count(): | |
| 80 return (0, 'OK - %2f' % cpuload) | |
| 81 else: | |
| 82 return (1, 'FAIL - %2f. This CQ is probably overloaded.' % cpuload) | |
| 83 else: | |
| 84 return (1, 'FAIL - Can\'t find cpu load: %s' % out) | |
| 85 | |
| 86 def test_log_mod_time_factory(name): | |
| 87 def test_log_mod_time(): | |
| 88 if not os.path.exists(name): | |
| 89 return (1, 'FAIL - %s does not exist' % name) | |
| 90 time_since_modified = time.time() - os.path.getmtime(name) | |
| 91 if time_since_modified > 120.0: | |
| 92 return (1, 'FAIL - %d seconds ago.' % time_since_modified) | |
| 93 return (0, 'OK - %d seconds ago.' % time_since_modified) | |
| 94 return test_log_mod_time | |
| 95 | |
| 96 tests = [ | |
| 97 ('number of python_runtime procs', | |
| 98 test_num_proc_factory('_python_runtime')), | |
| 99 ('cpu load', test_load)] | |
| 100 for log_name in CQ_LOGS: | |
| 101 tests.append(('%s last modified' % log_name, | |
| 102 test_log_mod_time_factory(log_name))) | |
| 103 | |
| 104 def main(): | |
| 105 return_code = 0 | |
| 106 for test_name, test in tests: | |
| 107 code, msg = test() | |
| 108 return_code += code | |
| 109 print '%s: %s' % (test_name, msg) | |
| 110 print 'status: %d' % return_code | |
| 111 | |
| 112 if __name__ == '__main__': | |
| 113 main() | |
| OLD | NEW |