| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 #!/usr/bin/env python |  | 
| 2 # Display health information on commit queue. |  | 
| 3 |  | 
| 4 import Queue |  | 
| 5 import multiprocessing |  | 
| 6 import os |  | 
| 7 import re |  | 
| 8 import subprocess |  | 
| 9 import threading |  | 
| 10 import time |  | 
| 11 |  | 
| 12 CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log', |  | 
| 13            '/b/commit-queue/logs-chromium_deps/commit_queue.log', |  | 
| 14            '/b/commit-queue/logs-nacl/commit_queue.log', |  | 
| 15            '/b/commit-queue/logs-skia/commit_queue.log', |  | 
| 16            '/b/commit-queue/logs-tools/commit_queue.log',] |  | 
| 17 |  | 
| 18 |  | 
| 19 def call(args, timeout=None, shell=False): |  | 
| 20   """Returns (code, stdout, stderr)""" |  | 
| 21   def _run_proc(args, output): |  | 
| 22     proc = subprocess.Popen( |  | 
| 23         args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, |  | 
| 24         stderr=subprocess.PIPE, shell=shell) |  | 
| 25     output.put(proc) |  | 
| 26     output.put((proc.wait(),) + proc.communicate()) |  | 
| 27 |  | 
| 28   def _timer(output, timeout): |  | 
| 29     time.sleep(timeout) |  | 
| 30     output.put([618, '', 'Process timed out.']) |  | 
| 31 |  | 
| 32   output = Queue.Queue() |  | 
| 33   thr = threading.Thread(target=_run_proc, args=[args, output]) |  | 
| 34   thr.daemon = True |  | 
| 35   thr.start() |  | 
| 36   # First item passed through output is always the Popen object. |  | 
| 37   proc = output.get() |  | 
| 38 |  | 
| 39   # Wait for process to finish, or timeout. |  | 
| 40   if timeout: |  | 
| 41     timer_thread = threading.Thread(target=_timer, args=[output, timeout]) |  | 
| 42     timer_thread.daemon = True |  | 
| 43     timer_thread.start() |  | 
| 44 |  | 
| 45   # Get the first output that comes out, which is either an error from _timer() |  | 
| 46   # or the desired output from the process. |  | 
| 47   code, out, err = output.get() |  | 
| 48   if code == 618: |  | 
| 49     # Kill the child process if it timed out. |  | 
| 50     try: |  | 
| 51       proc.terminate() |  | 
| 52       time.sleep(0.5) |  | 
| 53       if proc.poll() is None: |  | 
| 54         proc.kill() |  | 
| 55     except OSError: |  | 
| 56       pass |  | 
| 57 |  | 
| 58   return code, out, err |  | 
| 59 |  | 
| 60 def test_num_proc_factory(proc_name): |  | 
| 61   def test_num_proc(): |  | 
| 62     cmd = 'pgrep %s' % proc_name |  | 
| 63     _, out, _ = call(cmd, 15, True) |  | 
| 64     numproc = len(out.splitlines()) |  | 
| 65     if numproc < 300: |  | 
| 66       return (0, 'OK - %d'  % numproc) |  | 
| 67     else: |  | 
| 68       return (1, 'FAIL - %d.  This CQ is probably overloaded.' % numproc) |  | 
| 69   return test_num_proc |  | 
| 70 |  | 
| 71 def test_load(): |  | 
| 72   code , out, _ = call('uptime', 15, True) |  | 
| 73   if code == 618: |  | 
| 74     return (1, 'FAIL - Process timed out.') |  | 
| 75 |  | 
| 76   cpuload_m = re.search(r'(\d+\.\d+)\s*$', out) |  | 
| 77   if cpuload_m: |  | 
| 78     cpuload = float(cpuload_m.group(1)) |  | 
| 79     if cpuload < multiprocessing.cpu_count(): |  | 
| 80       return (0, 'OK - %2f' % cpuload) |  | 
| 81     else: |  | 
| 82       return (1, 'FAIL - %2f.  This CQ is probably overloaded.' % cpuload) |  | 
| 83   else: |  | 
| 84     return (1, 'FAIL - Can\'t find cpu load: %s' % out) |  | 
| 85 |  | 
| 86 def test_log_mod_time_factory(name): |  | 
| 87   def test_log_mod_time(): |  | 
| 88     if not os.path.exists(name): |  | 
| 89       return (1, 'FAIL - %s does not exist' % name) |  | 
| 90     time_since_modified = time.time() - os.path.getmtime(name) |  | 
| 91     if time_since_modified > 120.0: |  | 
| 92       return (1, 'FAIL - %d seconds ago.' % time_since_modified) |  | 
| 93     return (0, 'OK - %d seconds ago.' % time_since_modified) |  | 
| 94   return test_log_mod_time |  | 
| 95 |  | 
| 96 tests = [ |  | 
| 97     ('number of python_runtime procs', |  | 
| 98      test_num_proc_factory('_python_runtime')), |  | 
| 99     ('cpu load', test_load)] |  | 
| 100 for log_name in CQ_LOGS: |  | 
| 101   tests.append(('%s last modified' % log_name, |  | 
| 102                 test_log_mod_time_factory(log_name))) |  | 
| 103 |  | 
| 104 def main(): |  | 
| 105   return_code = 0 |  | 
| 106   for test_name, test in tests: |  | 
| 107     code, msg = test() |  | 
| 108     return_code += code |  | 
| 109     print '%s: %s' % (test_name, msg) |  | 
| 110   print 'status: %d' % return_code |  | 
| 111 |  | 
| 112 if __name__ == '__main__': |  | 
| 113   main() |  | 
| OLD | NEW | 
|---|