OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 |
| 7 """Restart the build masters.""" |
| 8 |
| 9 |
| 10 import os |
| 11 import posixpath |
| 12 import sys |
| 13 import threading |
| 14 import time |
| 15 import urllib2 |
| 16 |
| 17 BUILDBOT_PATH = os.path.realpath(os.path.join(os.path.dirname(__file__), |
| 18 os.pardir)) |
| 19 sys.path.append(BUILDBOT_PATH) |
| 20 sys.path.append(os.path.join(BUILDBOT_PATH, 'site_config')) |
| 21 sys.path.append(os.path.join(BUILDBOT_PATH, 'third_party', 'chromium_buildbot', |
| 22 'site_config')) |
| 23 |
| 24 from common.py.utils import shell_utils |
| 25 import config_private |
| 26 import slave_hosts_cfg |
| 27 |
| 28 |
| 29 # File where the PID of the running master is stored. |
| 30 # TODO(borenet): Store the master information in slave_hosts_cfg.py (rename it). |
| 31 PID_FILE = posixpath.join('skia-repo', 'buildbot', 'master', 'twistd.pid') |
| 32 |
| 33 # Number of seconds to wait between checks of whether the master has restarted. |
| 34 RESTART_POLL_INTERVAL = 5 |
| 35 |
| 36 # Maximum number of seconds allowed for the master to restart. |
| 37 RESTART_TIMEOUT = 180 |
| 38 |
| 39 |
| 40 class NoRedirectHandler(urllib2.HTTPErrorProcessor): |
| 41 """Handler which does not follow redirects.""" |
| 42 def http_response(self, req, resp): |
| 43 return resp |
| 44 |
| 45 |
| 46 def get_running_pid(master): |
| 47 master_hostname = master.master_fqdn.split('.')[0] |
| 48 pid_cmd = slave_hosts_cfg.compute_engine_login(master_hostname, None) |
| 49 pid_cmd.extend(['cat', PID_FILE]) |
| 50 try: |
| 51 return shell_utils.run(pid_cmd, echo=False).splitlines()[-1] |
| 52 except shell_utils.CommandFailedException as e: |
| 53 if 'No such file or directory' in e.output: |
| 54 return None |
| 55 raise |
| 56 |
| 57 |
| 58 def restart_master(master): |
| 59 """Restart the given master. |
| 60 |
| 61 Log in to the master host, read the PID file, submit a "clean restart" |
| 62 request, and wait until the master restarts. |
| 63 |
| 64 Args: |
| 65 master: config_private.Master.*; the master to restart. |
| 66 """ |
| 67 # Obtain the master process PID. |
| 68 old_pid = get_running_pid(master) |
| 69 |
| 70 # Submit the "clean restart" request. |
| 71 shutdown_url = 'http://%s:%s/shutdown' % (master.master_host, |
| 72 master.master_port) |
| 73 print '%s: Sending shutdown request to %s' % (master.project_name, |
| 74 shutdown_url) |
| 75 # Don't follow redirects, since the master might shut down before we can get |
| 76 # a response from the page to which we're redirected. |
| 77 urllib2.build_opener(NoRedirectHandler).open(shutdown_url) |
| 78 |
| 79 # Wait until the master restarts. |
| 80 start = time.time() |
| 81 while True: |
| 82 new_pid = get_running_pid(master) |
| 83 if new_pid: |
| 84 if new_pid != old_pid: |
| 85 print '%s finished restarting.' % master.project_name |
| 86 return |
| 87 print '%s is still running.' % master.project_name |
| 88 else: |
| 89 print '%s has shut down but has not yet restarted.' % master.project_name |
| 90 if time.time() - start > RESTART_TIMEOUT: |
| 91 if new_pid: |
| 92 msg = ('%s failed to shut down within %d seconds.' % ( |
| 93 master.project_name, RESTART_TIMEOUT)) |
| 94 else: |
| 95 msg = ('%s shut down but failed to restart within %d seconds' % ( |
| 96 master.project_name, RESTART_TIMEOUT)) |
| 97 raise Exception(msg) |
| 98 time.sleep(RESTART_POLL_INTERVAL) |
| 99 |
| 100 |
| 101 def main(): |
| 102 """Restart the build masters.""" |
| 103 threads = [] |
| 104 |
| 105 for master in config_private.Master.valid_masters: |
| 106 thread = threading.Thread(target=restart_master, args=(master,)) |
| 107 thread.daemon = True |
| 108 threads.append(thread) |
| 109 thread.start() |
| 110 |
| 111 for thread in threads: |
| 112 thread.join() |
| 113 |
| 114 |
| 115 if '__main__' == __name__: |
| 116 main() |
OLD | NEW |