Index: commit-queue/heartbeats/cq-heartbeat.py |
=================================================================== |
--- commit-queue/heartbeats/cq-heartbeat.py (revision 249146) |
+++ commit-queue/heartbeats/cq-heartbeat.py (working copy) |
@@ -1,113 +0,0 @@ |
-#!/usr/bin/env python |
-# Display health information on commit queue. |
- |
-import Queue |
-import multiprocessing |
-import os |
-import re |
-import subprocess |
-import threading |
-import time |
- |
-CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log', |
- '/b/commit-queue/logs-chromium_deps/commit_queue.log', |
- '/b/commit-queue/logs-nacl/commit_queue.log', |
- '/b/commit-queue/logs-skia/commit_queue.log', |
- '/b/commit-queue/logs-tools/commit_queue.log',] |
- |
- |
-def call(args, timeout=None, shell=False): |
- """Returns (code, stdout, stderr)""" |
- def _run_proc(args, output): |
- proc = subprocess.Popen( |
- args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
- stderr=subprocess.PIPE, shell=shell) |
- output.put(proc) |
- output.put((proc.wait(),) + proc.communicate()) |
- |
- def _timer(output, timeout): |
- time.sleep(timeout) |
- output.put([618, '', 'Process timed out.']) |
- |
- output = Queue.Queue() |
- thr = threading.Thread(target=_run_proc, args=[args, output]) |
- thr.daemon = True |
- thr.start() |
- # First item passed through output is always the Popen object. |
- proc = output.get() |
- |
- # Wait for process to finish, or timeout. |
- if timeout: |
- timer_thread = threading.Thread(target=_timer, args=[output, timeout]) |
- timer_thread.daemon = True |
- timer_thread.start() |
- |
- # Get the first output that comes out, which is either an error from _timer() |
- # or the desired output from the process. |
- code, out, err = output.get() |
- if code == 618: |
- # Kill the child process if it timed out. |
- try: |
- proc.terminate() |
- time.sleep(0.5) |
- if proc.poll() is None: |
- proc.kill() |
- except OSError: |
- pass |
- |
- return code, out, err |
- |
-def test_num_proc_factory(proc_name): |
- def test_num_proc(): |
- cmd = 'pgrep %s' % proc_name |
- _, out, _ = call(cmd, 15, True) |
- numproc = len(out.splitlines()) |
- if numproc < 300: |
- return (0, 'OK - %d' % numproc) |
- else: |
- return (1, 'FAIL - %d. This CQ is probably overloaded.' % numproc) |
- return test_num_proc |
- |
-def test_load(): |
- code , out, _ = call('uptime', 15, True) |
- if code == 618: |
- return (1, 'FAIL - Process timed out.') |
- |
- cpuload_m = re.search(r'(\d+\.\d+)\s*$', out) |
- if cpuload_m: |
- cpuload = float(cpuload_m.group(1)) |
- if cpuload < multiprocessing.cpu_count(): |
- return (0, 'OK - %2f' % cpuload) |
- else: |
- return (1, 'FAIL - %2f. This CQ is probably overloaded.' % cpuload) |
- else: |
- return (1, 'FAIL - Can\'t find cpu load: %s' % out) |
- |
-def test_log_mod_time_factory(name): |
- def test_log_mod_time(): |
- if not os.path.exists(name): |
- return (1, 'FAIL - %s does not exist' % name) |
- time_since_modified = time.time() - os.path.getmtime(name) |
- if time_since_modified > 120.0: |
- return (1, 'FAIL - %d seconds ago.' % time_since_modified) |
- return (0, 'OK - %d seconds ago.' % time_since_modified) |
- return test_log_mod_time |
- |
-tests = [ |
- ('number of python_runtime procs', |
- test_num_proc_factory('_python_runtime')), |
- ('cpu load', test_load)] |
-for log_name in CQ_LOGS: |
- tests.append(('%s last modified' % log_name, |
- test_log_mod_time_factory(log_name))) |
- |
-def main(): |
- return_code = 0 |
- for test_name, test in tests: |
- code, msg = test() |
- return_code += code |
- print '%s: %s' % (test_name, msg) |
- print 'status: %d' % return_code |
- |
-if __name__ == '__main__': |
- main() |