| Index: commit-queue/heartbeats/cq-heartbeat.py
|
| ===================================================================
|
| --- commit-queue/heartbeats/cq-heartbeat.py (revision 249146)
|
| +++ commit-queue/heartbeats/cq-heartbeat.py (working copy)
|
| @@ -1,113 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Display health information on commit queue.
|
| -
|
| -import Queue
|
| -import multiprocessing
|
| -import os
|
| -import re
|
| -import subprocess
|
| -import threading
|
| -import time
|
| -
|
| -CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log',
|
| - '/b/commit-queue/logs-chromium_deps/commit_queue.log',
|
| - '/b/commit-queue/logs-nacl/commit_queue.log',
|
| - '/b/commit-queue/logs-skia/commit_queue.log',
|
| - '/b/commit-queue/logs-tools/commit_queue.log',]
|
| -
|
| -
|
| -def call(args, timeout=None, shell=False):
|
| - """Returns (code, stdout, stderr)"""
|
| - def _run_proc(args, output):
|
| - proc = subprocess.Popen(
|
| - args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
| - stderr=subprocess.PIPE, shell=shell)
|
| - output.put(proc)
|
| - output.put((proc.wait(),) + proc.communicate())
|
| -
|
| - def _timer(output, timeout):
|
| - time.sleep(timeout)
|
| - output.put([618, '', 'Process timed out.'])
|
| -
|
| - output = Queue.Queue()
|
| - thr = threading.Thread(target=_run_proc, args=[args, output])
|
| - thr.daemon = True
|
| - thr.start()
|
| - # First item passed through output is always the Popen object.
|
| - proc = output.get()
|
| -
|
| - # Wait for process to finish, or timeout.
|
| - if timeout:
|
| - timer_thread = threading.Thread(target=_timer, args=[output, timeout])
|
| - timer_thread.daemon = True
|
| - timer_thread.start()
|
| -
|
| - # Get the first output that comes out, which is either an error from _timer()
|
| - # or the desired output from the process.
|
| - code, out, err = output.get()
|
| - if code == 618:
|
| - # Kill the child process if it timed out.
|
| - try:
|
| - proc.terminate()
|
| - time.sleep(0.5)
|
| - if proc.poll() is None:
|
| - proc.kill()
|
| - except OSError:
|
| - pass
|
| -
|
| - return code, out, err
|
| -
|
| -def test_num_proc_factory(proc_name):
|
| - def test_num_proc():
|
| - cmd = 'pgrep %s' % proc_name
|
| - _, out, _ = call(cmd, 15, True)
|
| - numproc = len(out.splitlines())
|
| - if numproc < 300:
|
| - return (0, 'OK - %d' % numproc)
|
| - else:
|
| - return (1, 'FAIL - %d. This CQ is probably overloaded.' % numproc)
|
| - return test_num_proc
|
| -
|
| -def test_load():
|
| - code , out, _ = call('uptime', 15, True)
|
| - if code == 618:
|
| - return (1, 'FAIL - Process timed out.')
|
| -
|
| - cpuload_m = re.search(r'(\d+\.\d+)\s*$', out)
|
| - if cpuload_m:
|
| - cpuload = float(cpuload_m.group(1))
|
| - if cpuload < multiprocessing.cpu_count():
|
| - return (0, 'OK - %2f' % cpuload)
|
| - else:
|
| - return (1, 'FAIL - %2f. This CQ is probably overloaded.' % cpuload)
|
| - else:
|
| - return (1, 'FAIL - Can\'t find cpu load: %s' % out)
|
| -
|
| -def test_log_mod_time_factory(name):
|
| - def test_log_mod_time():
|
| - if not os.path.exists(name):
|
| - return (1, 'FAIL - %s does not exist' % name)
|
| - time_since_modified = time.time() - os.path.getmtime(name)
|
| - if time_since_modified > 120.0:
|
| - return (1, 'FAIL - %d seconds ago.' % time_since_modified)
|
| - return (0, 'OK - %d seconds ago.' % time_since_modified)
|
| - return test_log_mod_time
|
| -
|
| -tests = [
|
| - ('number of python_runtime procs',
|
| - test_num_proc_factory('_python_runtime')),
|
| - ('cpu load', test_load)]
|
| -for log_name in CQ_LOGS:
|
| - tests.append(('%s last modified' % log_name,
|
| - test_log_mod_time_factory(log_name)))
|
| -
|
| -def main():
|
| - return_code = 0
|
| - for test_name, test in tests:
|
| - code, msg = test()
|
| - return_code += code
|
| - print '%s: %s' % (test_name, msg)
|
| - print 'status: %d' % return_code
|
| -
|
| -if __name__ == '__main__':
|
| - main()
|
|
|