Chromium Code Reviews| Index: infra/tools/master_cleaner/__main__.py |
| diff --git a/infra/tools/master_cleaner/__main__.py b/infra/tools/master_cleaner/__main__.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8650b070ba20d4c10c49c138ab025e67da41dfce |
| --- /dev/null |
| +++ b/infra/tools/master_cleaner/__main__.py |
| @@ -0,0 +1,237 @@ |
| +#!/usr/bin/python |
| +# Copyright 2015 Google Inc. All Rights Reserved. |
|
nodir
2016/06/10 17:50:36
2016
|
| +# pylint: disable=F0401 |
| + |
| +"""Cleanup directories on BuildBot master systems.""" |
| + |
| +import argparse |
| +import bisect |
| +import datetime |
| +import json |
| +import logging |
| +import os |
| +import shutil |
| +import subprocess |
| +import sys |
| +import time |
| + |
| +from infra_libs import logs |
| +from infra_libs.time_functions.parser import timedelta_type |
| + |
| + |
| +LOGGER = logging.getLogger(__name__) |
| + |
| + |
| +def _check_run(cmd, dry_run=True, cwd=None): |
| + if cwd is None: |
| + cwd = os.getcwd() |
| + |
| + if dry_run: |
| + LOGGER.info('(Dry run) Running command %s (cwd=%s)', cmd, cwd) |
| + return '', '' |
| + |
| + LOGGER.debug('Running command %s (cwd=%s)', cmd, cwd) |
| + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| + cwd=cwd) |
| + stdout, stderr = proc.communicate() |
| + |
| + rc = proc.returncode |
| + if rc != 0: |
| + LOGGER.error('Output for process %s (cwd=%s):\nSTDOUT:\n%s\nSTDERR:\n%s', |
| + cmd, cwd, stdout, stderr) |
|
nodir
2016/06/10 17:50:36
include exit code
dnj (Google)
2016/06/10 18:00:52
Done.
|
| + raise subprocess.CalledProcessError(rc, cmd, None) |
| + return stdout, stderr |
| + |
| + |
| +def parse_args(argv): |
| + parser = argparse.ArgumentParser( |
| + description='Cleanup directories on BuildBot master systems.') |
|
nodir
2016/06/10 17:50:36
__doc__
dnj (Google)
2016/06/10 18:00:52
Done.
|
| + parser.add_argument('master', nargs='+', |
| + help='Name of masters (*, master.*) to clean.') |
| + parser.add_argument('--max-twistd-log-age', default=None, type=timedelta_type, |
| + help='If set, "twistd.log" files older than this will be purged.') |
| + parser.add_argument('--production', action='store_true', |
| + help='If set, actually delete the files instead of listing them.') |
| + parser.add_argument('--gclient-root', |
| + help='The path to the directory containing the master checkout ' |
| + '".gclient" file. If omitted, an attempt will be made to probe ' |
| + 'one.') |
| + |
| + logs.add_argparse_options(parser) |
| + |
| + opts = parser.parse_args(argv) |
| + logs.process_argparse_options(opts) |
| + return opts |
| + |
| + |
| +def _process_master(opts, master_cfg): |
| + LOGGER.info('Cleaning up master: %s', master_cfg['mastername']) |
| + |
| + # Get a list of all files within the master directory. |
| + master_dir = master_cfg['master_dir'] |
| + files, dirs = _list_untracked_files(master_dir) |
| + |
| + # Filter out all directories that are current builddirs or aren't builder |
| + # directories. |
|
nodir
2016/06/10 17:50:36
two negations (filter and aren't), hard to underst
dnj (Google)
2016/06/10 18:00:52
Done.
|
| + dirs = [x for x in dirs if ( |
| + x not in master_cfg['builddirs'] and |
| + _is_builder_dir(os.path.join(master_dir, x)))] |
| + LOGGER.info('Identified %d superfluous build directories.', len(dirs)) |
| + |
| + # Find old "twistd.log" files. |
| + old_twistd_logs = _find_old_twistd_logs(master_dir, files, |
| + opts.max_twistd_log_age) |
| + if len(old_twistd_logs) > 0: |
| + LOGGER.info('Identified %d old twistd.log files, starting with %s.', |
| + len(old_twistd_logs), old_twistd_logs[-1]) |
| + |
| + for d in dirs: |
| + d = os.path.join(master_dir, d) |
| + LOGGER.info('Deleting superfluous directory: [%s]', d) |
| + if not opts.production: |
| + LOGGER.info('(Dry Run) Not deleting.') |
| + continue |
| + shutil.rmtree(d) |
| + |
| + for f in old_twistd_logs: |
| + f = os.path.join(master_dir, f) |
| + LOGGER.info('Removing old "twistd.log" file: [%s]', f) |
| + if not opts.production: |
| + LOGGER.info('(Dry Run) Not deleting.') |
| + continue |
| + os.remove(f) |
| + |
| + |
| +def _find_old_twistd_logs(base, files, max_age): |
| + twistd_log_files = [] |
| + if max_age is None: |
| + return twistd_log_files |
| + |
| + # Identify all "twistd.log" files to delete. We will do this by binary |
| + # searching the "twistd.log" space under the assumption that any log files |
| + # with higher suffix than the specified file are older than it. |
| + for f in files: |
| + gen = _parse_twistd_log_generation(f) |
| + if gen is not None: |
| + twistd_log_files.append((f, gen)) |
| + twistd_log_files.sort(key=lambda x: x[1]) |
| + |
| + threshold = datetime.datetime.now() - max_age |
| + lo, hi = 0, len(twistd_log_files) |
| + while lo < hi: |
| + mid = (lo+hi)//2 |
| + path = os.path.join(base, twistd_log_files[mid][0]) |
| + age = datetime.datetime.fromtimestamp(os.path.getctime(path)) |
|
nodir
2016/06/10 17:50:36
it is not age (the older the file, the bigger the
nodir
2016/06/10 17:50:36
I think this should use twistd_log_files[mid][1]
o
dnj (Google)
2016/06/10 18:00:52
I think it's correct now.
|
| + if age < threshold: |
| + hi = mid |
| + else: |
| + lo = mid+1 |
| + return [x[0] for x in twistd_log_files[:lo]] |
| + |
| + |
| +def _parse_twistd_log_generation(v): |
| + # Format is: "twistd.log[.###]" |
| + pieces = v.split('.') |
| + if len(pieces) != 3 or not (pieces[0] == 'twistd' and pieces[1] == 'log'): |
| + return None |
| + |
| + try: |
| + return int(pieces[2]) |
| + except ValueError: |
| + return None |
| + |
| + |
| +def _list_untracked_files(path): |
| + cmd = ['git', '-C', path, 'ls-files', '.', '--others', '--directory', '-z'] |
| + stdout, _ = _check_run(cmd, dry_run=False) |
| + files, dirs = [], [] |
| + |
| + def iter_null_terminated(data): |
| + while True: |
| + idx = data.find('\0') |
| + if idx < 0: |
| + yield data |
| + return |
| + v, data = data[:idx], data[idx+1:] |
| + yield v |
| + |
| + for name in iter_null_terminated(stdout): |
| + if name.endswith('/'): |
| + dirs.append(name.rstrip('/')) |
| + else: |
| + files.append(name) |
| + return files, dirs |
| + |
| + |
| +def _is_builder_dir(dirname): |
| + return os.path.isfile(os.path.join(dirname, 'builder')) |
| + |
| + |
| +def _load_master_cfg(gclient_root, master_dir): |
| + dump_master_cfg = os.path.join(gclient_root, 'build', 'scripts', 'tools', |
| + 'dump_master_cfg.py') |
| + |
| + cmd = [sys.executable, dump_master_cfg, master_dir, '-'] |
| + config, _ = _check_run(cmd, dry_run=False) |
| + config = json.loads(config) |
| + |
| + result = { |
| + 'mastername': os.path.split(master_dir)[1], |
|
nodir
2016/06/10 17:50:36
this should be -1
dnj (Google)
2016/06/10 18:00:52
Done.
|
| + 'master_dir': master_dir, |
| + 'builddirs': set(), |
| + } |
| + for bcfg in config.get('builders', ()): |
| + result['builddirs'].add(bcfg['builddir']) |
|
nodir
2016/06/10 17:50:36
builddir is not always there, e.g.
./scripts/to
dnj (Google)
2016/06/10 18:00:52
Done.
|
| + return result |
| + |
| + |
| +def _find_master(gclient_root, mastername): |
| + if not mastername.startswith('master.'): |
| + mastername = 'master.' + mastername |
| + |
| + for candidate in ( |
| + os.path.join(gclient_root, 'build', 'masters'), |
| + os.path.join(gclient_root, 'build_internal', 'masters'), |
| + ): |
| + candidate = os.path.join(candidate, mastername) |
| + if os.path.isdir(candidate): |
| + return candidate |
| + raise ValueError('Unable to locate master %s' % (mastername,)) |
| + |
| + |
| +def _find_gclient_root(opts): |
| + for candidate in ( |
| + opts.gclient_root, |
| + os.path.join(os.path.expanduser('~'), 'buildbot'), |
| + ): |
| + if not candidate: |
| + continue |
| + candidate = os.path.abspath(candidate) |
| + if os.path.isfile(os.path.join(candidate, '.gclient')): |
| + return candidate |
| + raise Exception('Unable to find ".gclient" root.') |
| + |
| + |
| +def _trim_prefix(v, prefix): |
| + if v.startswith(prefix): |
| + v = v[len(prefix)] |
| + return v |
| + |
| + |
| +def _main(argv): |
| + opts = parse_args(argv) |
| + |
| + # Locate our gclient file root. |
| + gclient_root = _find_gclient_root(opts) |
| + |
| + # Dump the builders configured for each master. |
| + for master in sorted(set(opts.master)): |
|
nodir
2016/06/10 17:50:36
why set? you sort it anyway
dnj (Google)
2016/06/10 18:00:52
Just in case some person specifies the same master
|
| + LOGGER.info('Loading configuration for master "%s"...', master) |
| + master_dir = _find_master(gclient_root, master) |
| + master_cfg = _load_master_cfg(gclient_root, master_dir) |
| + _process_master(opts, master_cfg) |
| + |
| + return 0 |
| + |
| +if __name__ == '__main__': |
| + sys.exit(_main(sys.argv[1:])) |