| Index: commit-queue/heartbeats/cq-heartbeat.py | 
| =================================================================== | 
| --- commit-queue/heartbeats/cq-heartbeat.py	(revision 249146) | 
| +++ commit-queue/heartbeats/cq-heartbeat.py	(working copy) | 
| @@ -1,113 +0,0 @@ | 
| -#!/usr/bin/env python | 
| -# Display health information on commit queue. | 
| - | 
| -import Queue | 
| -import multiprocessing | 
| -import os | 
| -import re | 
| -import subprocess | 
| -import threading | 
| -import time | 
| - | 
| -CQ_LOGS = ['/b/commit-queue/logs-chromium/commit_queue.log', | 
| -           '/b/commit-queue/logs-chromium_deps/commit_queue.log', | 
| -           '/b/commit-queue/logs-nacl/commit_queue.log', | 
| -           '/b/commit-queue/logs-skia/commit_queue.log', | 
| -           '/b/commit-queue/logs-tools/commit_queue.log',] | 
| - | 
| - | 
| -def call(args, timeout=None, shell=False): | 
| -  """Returns (code, stdout, stderr)""" | 
| -  def _run_proc(args, output): | 
| -    proc = subprocess.Popen( | 
| -        args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, | 
| -        stderr=subprocess.PIPE, shell=shell) | 
| -    output.put(proc) | 
| -    output.put((proc.wait(),) + proc.communicate()) | 
| - | 
| -  def _timer(output, timeout): | 
| -    time.sleep(timeout) | 
| -    output.put([618, '', 'Process timed out.']) | 
| - | 
| -  output = Queue.Queue() | 
| -  thr = threading.Thread(target=_run_proc, args=[args, output]) | 
| -  thr.daemon = True | 
| -  thr.start() | 
| -  # First item passed through output is always the Popen object. | 
| -  proc = output.get() | 
| - | 
| -  # Wait for process to finish, or timeout. | 
| -  if timeout: | 
| -    timer_thread = threading.Thread(target=_timer, args=[output, timeout]) | 
| -    timer_thread.daemon = True | 
| -    timer_thread.start() | 
| - | 
| -  # Get the first output that comes out, which is either an error from _timer() | 
| -  # or the desired output from the process. | 
| -  code, out, err = output.get() | 
| -  if code == 618: | 
| -    # Kill the child process if it timed out. | 
| -    try: | 
| -      proc.terminate() | 
| -      time.sleep(0.5) | 
| -      if proc.poll() is None: | 
| -        proc.kill() | 
| -    except OSError: | 
| -      pass | 
| - | 
| -  return code, out, err | 
| - | 
| -def test_num_proc_factory(proc_name): | 
| -  def test_num_proc(): | 
| -    cmd = 'pgrep %s' % proc_name | 
| -    _, out, _ = call(cmd, 15, True) | 
| -    numproc = len(out.splitlines()) | 
| -    if numproc < 300: | 
| -      return (0, 'OK - %d'  % numproc) | 
| -    else: | 
| -      return (1, 'FAIL - %d.  This CQ is probably overloaded.' % numproc) | 
| -  return test_num_proc | 
| - | 
| -def test_load(): | 
| -  code , out, _ = call('uptime', 15, True) | 
| -  if code == 618: | 
| -    return (1, 'FAIL - Process timed out.') | 
| - | 
| -  cpuload_m = re.search(r'(\d+\.\d+)\s*$', out) | 
| -  if cpuload_m: | 
| -    cpuload = float(cpuload_m.group(1)) | 
| -    if cpuload < multiprocessing.cpu_count(): | 
| -      return (0, 'OK - %2f' % cpuload) | 
| -    else: | 
| -      return (1, 'FAIL - %2f.  This CQ is probably overloaded.' % cpuload) | 
| -  else: | 
| -    return (1, 'FAIL - Can\'t find cpu load: %s' % out) | 
| - | 
| -def test_log_mod_time_factory(name): | 
| -  def test_log_mod_time(): | 
| -    if not os.path.exists(name): | 
| -      return (1, 'FAIL - %s does not exist' % name) | 
| -    time_since_modified = time.time() - os.path.getmtime(name) | 
| -    if time_since_modified > 120.0: | 
| -      return (1, 'FAIL - %d seconds ago.' % time_since_modified) | 
| -    return (0, 'OK - %d seconds ago.' % time_since_modified) | 
| -  return test_log_mod_time | 
| - | 
| -tests = [ | 
| -    ('number of python_runtime procs', | 
| -     test_num_proc_factory('_python_runtime')), | 
| -    ('cpu load', test_load)] | 
| -for log_name in CQ_LOGS: | 
| -  tests.append(('%s last modified' % log_name, | 
| -                test_log_mod_time_factory(log_name))) | 
| - | 
| -def main(): | 
| -  return_code = 0 | 
| -  for test_name, test in tests: | 
| -    code, msg = test() | 
| -    return_code += code | 
| -    print '%s: %s' % (test_name, msg) | 
| -  print 'status: %d' % return_code | 
| - | 
| -if __name__ == '__main__': | 
| -  main() | 
|  |