OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Display health information on commit queue. | |
3 | |
4 import Queue | |
5 import multiprocessing | |
6 import os | |
7 import re | |
8 import subprocess | |
9 import threading | |
10 import time | |
11 | |
12 CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log', | |
13 '/b/commit-queue/logs-chromium_deps/commit_queue.log', | |
14 '/b/commit-queue/logs-nacl/commit_queue.log', | |
15 '/b/commit-queue/logs-skia/commit_queue.log', | |
16 '/b/commit-queue/logs-tools/commit_queue.log',] | |
17 | |
18 | |
19 def call(args, timeout=None, shell=False): | |
20 """Returns (code, stdout, stderr)""" | |
21 def _run_proc(args, output): | |
22 proc = subprocess.Popen( | |
23 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, | |
24 stderr=subprocess.PIPE, shell=shell) | |
25 output.put(proc) | |
26 output.put((proc.wait(),) + proc.communicate()) | |
27 | |
28 def _timer(output, timeout): | |
29 time.sleep(timeout) | |
30 output.put([618, '', 'Process timed out.']) | |
31 | |
32 output = Queue.Queue() | |
33 thr = threading.Thread(target=_run_proc, args=[args, output]) | |
34 thr.daemon = True | |
35 thr.start() | |
36 # First item passed through output is always the Popen object. | |
37 proc = output.get() | |
38 | |
39 # Wait for process to finish, or timeout. | |
40 if timeout: | |
41 timer_thread = threading.Thread(target=_timer, args=[output, timeout]) | |
42 timer_thread.daemon = True | |
43 timer_thread.start() | |
44 | |
45 # Get the first output that comes out, which is either an error from _timer() | |
46 # or the desired output from the process. | |
47 code, out, err = output.get() | |
48 if code == 618: | |
49 # Kill the child process if it timed out. | |
50 try: | |
51 proc.terminate() | |
52 time.sleep(0.5) | |
53 if proc.poll() is None: | |
54 proc.kill() | |
55 except OSError: | |
56 pass | |
57 | |
58 return code, out, err | |
59 | |
60 def test_num_proc_factory(proc_name): | |
61 def test_num_proc(): | |
62 cmd = 'pgrep %s' % proc_name | |
63 _, out, _ = call(cmd, 15, True) | |
64 numproc = len(out.splitlines()) | |
65 if numproc < 300: | |
66 return (0, 'OK - %d' % numproc) | |
67 else: | |
68 return (1, 'FAIL - %d. This CQ is probably overloaded.' % numproc) | |
69 return test_num_proc | |
70 | |
71 def test_load(): | |
72 code , out, _ = call('uptime', 15, True) | |
73 if code == 618: | |
74 return (1, 'FAIL - Process timed out.') | |
75 | |
76 cpuload_m = re.search(r'(\d+\.\d+)\s*$', out) | |
77 if cpuload_m: | |
78 cpuload = float(cpuload_m.group(1)) | |
79 if cpuload < multiprocessing.cpu_count(): | |
80 return (0, 'OK - %2f' % cpuload) | |
81 else: | |
82 return (1, 'FAIL - %2f. This CQ is probably overloaded.' % cpuload) | |
83 else: | |
84 return (1, 'FAIL - Can\'t find cpu load: %s' % out) | |
85 | |
86 def test_log_mod_time_factory(name): | |
87 def test_log_mod_time(): | |
88 if not os.path.exists(name): | |
89 return (1, 'FAIL - %s does not exist' % name) | |
90 time_since_modified = time.time() - os.path.getmtime(name) | |
91 if time_since_modified > 120.0: | |
92 return (1, 'FAIL - %d seconds ago.' % time_since_modified) | |
93 return (0, 'OK - %d seconds ago.' % time_since_modified) | |
94 return test_log_mod_time | |
95 | |
96 tests = [ | |
97 ('number of python_runtime procs', | |
98 test_num_proc_factory('_python_runtime')), | |
99 ('cpu load', test_load)] | |
100 for log_name in CQ_LOGS: | |
101 tests.append(('%s last modified' % log_name, | |
102 test_log_mod_time_factory(log_name))) | |
103 | |
104 def main(): | |
105 return_code = 0 | |
106 for test_name, test in tests: | |
107 code, msg = test() | |
108 return_code += code | |
109 print '%s: %s' % (test_name, msg) | |
110 print 'status: %d' % return_code | |
111 | |
112 if __name__ == '__main__': | |
113 main() | |
OLD | NEW |