Index: infra/tools/master_cleaner/__main__.py |
diff --git a/infra/tools/master_cleaner/__main__.py b/infra/tools/master_cleaner/__main__.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..87db7c6c0e8b43385557ddd8f3699d07e554abb3 |
--- /dev/null |
+++ b/infra/tools/master_cleaner/__main__.py |
@@ -0,0 +1,239 @@ |
+#!/usr/bin/python |
+# Copyright 2016 Google Inc. All Rights Reserved. |
+# pylint: disable=F0401 |
+ |
+"""Cleanup directories on BuildBot master systems.""" |
+ |
+import argparse |
+import bisect |
+import datetime |
+import json |
+import logging |
+import os |
+import shutil |
+import subprocess |
+import sys |
+import time |
+ |
+from infra_libs import logs |
+from infra_libs.time_functions.parser import argparse_timedelta_type |
+ |
+ |
+LOGGER = logging.getLogger(__name__) |
+ |
+ |
+def _check_run(cmd, dry_run=True, cwd=None): |
+ if cwd is None: |
+ cwd = os.getcwd() |
+ |
+ if dry_run: |
+ LOGGER.info('(Dry run) Running command %s (cwd=%s)', cmd, cwd) |
+ return '', '' |
+ |
+ LOGGER.debug('Running command %s (cwd=%s)', cmd, cwd) |
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
+ cwd=cwd) |
+ stdout, stderr = proc.communicate() |
+ |
+ rc = proc.returncode |
+ if rc != 0: |
+ LOGGER.error('Output for process %s (rc=%d, cwd=%s):\n' |
+ 'STDOUT:\n%s\nSTDERR:\n%s', |
+ cmd, rc, cwd, stdout, stderr) |
+ raise subprocess.CalledProcessError(rc, cmd, None) |
+ return stdout, stderr |
+ |
+ |
+def parse_args(argv): |
+ parser = argparse.ArgumentParser(description=__doc__) |
+ parser.add_argument('master', nargs='+', |
+ help='Name of masters (*, master.*) to clean.') |
+ parser.add_argument('--max-twistd-log-age', metavar='AGE-TOKENS', |
+ default=None, type=argparse_timedelta_type, |
+ help='If set, "twistd.log" files older than this will be purged.') |
+ parser.add_argument('--production', action='store_true', |
+ help='If set, actually delete the files instead of listing them.') |
+ parser.add_argument('--gclient-root', |
+ help='The path to the directory containing the master checkout ' |
+ '".gclient" file. If omitted, an attempt will be made to probe ' |
+ 'one.') |
+ |
+ logs.add_argparse_options(parser) |
+ |
+ opts = parser.parse_args(argv) |
+ logs.process_argparse_options(opts) |
+ return opts |
+ |
+ |
+def _process_master(opts, master_cfg): |
+ LOGGER.info('Cleaning up master: %s', master_cfg['mastername']) |
+ |
+ # Get a list of all files within the master directory. |
+ master_dir = master_cfg['master_dir'] |
+ files, dirs = _list_untracked_files(master_dir) |
+ |
+ # Run a filter to identify all "builder" directories that are not currently |
+ # configured to the master. |
+ dirs = [x for x in dirs if ( |
+ x not in master_cfg['builddirs'] and |
+ _is_builder_dir(os.path.join(master_dir, x)))] |
+ LOGGER.info('Identified %d superfluous build directories.', len(dirs)) |
+ |
+ # Find old "twistd.log" files. |
+ old_twistd_logs = _find_old_twistd_logs(master_dir, files, |
+ opts.max_twistd_log_age) |
+ if len(old_twistd_logs) > 0: |
+ LOGGER.info('Identified %d old twistd.log files, starting with %s.', |
+ len(old_twistd_logs), old_twistd_logs[-1]) |
+ |
+ for d in dirs: |
+ d = os.path.join(master_dir, d) |
+ LOGGER.info('Deleting superfluous directory: [%s]', d) |
+ if not opts.production: |
+ LOGGER.info('(Dry Run) Not deleting.') |
+ continue |
+ shutil.rmtree(d) |
+ |
+ for f in old_twistd_logs: |
+ f = os.path.join(master_dir, f) |
+ LOGGER.info('Removing old "twistd.log" file: [%s]', f) |
+ if not opts.production: |
+ LOGGER.info('(Dry Run) Not deleting.') |
+ continue |
+ os.remove(f) |
+ |
+ |
+def _find_old_twistd_logs(base, files, max_age): |
+ twistd_log_files = [] |
+ if max_age is None: |
+ return twistd_log_files |
+ |
+ # Identify all "twistd.log" files to delete. We will do this by binary |
+ # searching the "twistd.log" space under the assumption that any log files |
+ # with higher generation than the specified file are older than files with |
+ # lower index. |
+ for f in files: |
+ gen = _parse_twistd_log_generation(f) |
+ if gen is not None: |
+ twistd_log_files.append((f, gen)) |
+ twistd_log_files.sort(key=lambda x: x[1], reverse=True) |
+ |
+ threshold = datetime.datetime.now() - max_age |
+ lo, hi = 0, len(twistd_log_files) |
+ while lo < hi: |
+ mid = (lo+hi)//2 |
+ path = os.path.join(base, twistd_log_files[mid][0]) |
+ create_time = datetime.datetime.fromtimestamp(os.path.getctime(path)) |
+ if create_time < threshold: |
+ hi = mid |
+ else: |
+ lo = mid+1 |
+ return [x[0] for x in twistd_log_files[:lo]] |
+ |
+ |
+def _parse_twistd_log_generation(v): |
+ # Format is: "twistd.log[.###]" |
+ pieces = v.split('.') |
+ if len(pieces) != 3 or not (pieces[0] == 'twistd' and pieces[1] == 'log'): |
+ return None |
+ |
+ try: |
+ return int(pieces[2]) |
+ except ValueError: |
+ return None |
+ |
+ |
+def _list_untracked_files(path): |
+ cmd = ['git', '-C', path, 'ls-files', '.', '--others', '--directory', '-z'] |
+ stdout, _ = _check_run(cmd, dry_run=False) |
+ files, dirs = [], [] |
+ |
+ def iter_null_terminated(data): |
+ while True: |
+ idx = data.find('\0') |
+ if idx < 0: |
+ yield data |
+ return |
+ v, data = data[:idx], data[idx+1:] |
+ yield v |
+ |
+ for name in iter_null_terminated(stdout): |
+ if name.endswith('/'): |
+ dirs.append(name.rstrip('/')) |
+ else: |
+ files.append(name) |
+ return files, dirs |
+ |
+ |
+def _is_builder_dir(dirname): |
+ return os.path.isfile(os.path.join(dirname, 'builder')) |
+ |
+ |
+def _load_master_cfg(gclient_root, master_dir): |
+ dump_master_cfg = os.path.join(gclient_root, 'build', 'scripts', 'tools', |
+ 'dump_master_cfg.py') |
+ |
+ cmd = [sys.executable, dump_master_cfg, master_dir, '-'] |
+ config, _ = _check_run(cmd, dry_run=False) |
+ config = json.loads(config) |
+ |
+ result = { |
+ 'mastername': os.path.split(master_dir)[-1], |
+ 'master_dir': master_dir, |
+ 'builddirs': set(), |
+ } |
+ for bcfg in config.get('builders', ()): |
+ result['builddirs'].add(bcfg.get('builddir') or bcfg['name']) |
+ return result |
+ |
+ |
+def _find_master(gclient_root, mastername): |
+ if not mastername.startswith('master.'): |
+ mastername = 'master.' + mastername |
+ |
+ for candidate in ( |
+ os.path.join(gclient_root, 'build', 'masters'), |
+ os.path.join(gclient_root, 'build_internal', 'masters'), |
+ ): |
+ candidate = os.path.join(candidate, mastername) |
+ if os.path.isdir(candidate): |
+ return candidate |
+ raise ValueError('Unable to locate master %s' % (mastername,)) |
+ |
+ |
+def _find_gclient_root(opts): |
+ for candidate in ( |
+ opts.gclient_root, |
+ os.path.join(os.path.expanduser('~'), 'buildbot'), |
+ ): |
+ if not candidate: |
+ continue |
+ candidate = os.path.abspath(candidate) |
+ if os.path.isfile(os.path.join(candidate, '.gclient')): |
+ return candidate |
+ raise Exception('Unable to find ".gclient" root.') |
+ |
+ |
+def _trim_prefix(v, prefix): |
+ if v.startswith(prefix): |
+ v = v[len(prefix)] |
+ return v |
+ |
+ |
+def _main(argv): |
+ opts = parse_args(argv) |
+ |
+ # Locate our gclient file root. |
+ gclient_root = _find_gclient_root(opts) |
+ |
+ # Dump the builders configured for each master. |
+ for master in sorted(set(opts.master)): |
+ LOGGER.info('Loading configuration for master "%s"...', master) |
+ master_dir = _find_master(gclient_root, master) |
+ master_cfg = _load_master_cfg(gclient_root, master_dir) |
+ _process_master(opts, master_cfg) |
+ |
+ return 0 |
+ |
+if __name__ == '__main__': |
+ sys.exit(_main(sys.argv[1:])) |