Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Unified Diff: infra/tools/master_cleaner/__main__.py

Issue 2059833002: Add master_cleaner tool. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: infra/tools/master_cleaner/__main__.py
diff --git a/infra/tools/master_cleaner/__main__.py b/infra/tools/master_cleaner/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8650b070ba20d4c10c49c138ab025e67da41dfce
--- /dev/null
+++ b/infra/tools/master_cleaner/__main__.py
@@ -0,0 +1,237 @@
+#!/usr/bin/python
+# Copyright 2015 Google Inc. All Rights Reserved.
nodir 2016/06/10 17:50:36 2016
+# pylint: disable=F0401
+
+"""Cleanup directories on BuildBot master systems."""
+
+import argparse
+import bisect
+import datetime
+import json
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import time
+
+from infra_libs import logs
+from infra_libs.time_functions.parser import timedelta_type
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _check_run(cmd, dry_run=True, cwd=None):
+ if cwd is None:
+ cwd = os.getcwd()
+
+ if dry_run:
+ LOGGER.info('(Dry run) Running command %s (cwd=%s)', cmd, cwd)
+ return '', ''
+
+ LOGGER.debug('Running command %s (cwd=%s)', cmd, cwd)
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=cwd)
+ stdout, stderr = proc.communicate()
+
+ rc = proc.returncode
+ if rc != 0:
+ LOGGER.error('Output for process %s (cwd=%s):\nSTDOUT:\n%s\nSTDERR:\n%s',
+ cmd, cwd, stdout, stderr)
nodir 2016/06/10 17:50:36 include exit code
dnj (Google) 2016/06/10 18:00:52 Done.
+ raise subprocess.CalledProcessError(rc, cmd, None)
+ return stdout, stderr
+
+
+def parse_args(argv):
+ parser = argparse.ArgumentParser(
+ description='Cleanup directories on BuildBot master systems.')
nodir 2016/06/10 17:50:36 __doc__
dnj (Google) 2016/06/10 18:00:52 Done.
+ parser.add_argument('master', nargs='+',
+ help='Name of masters (*, master.*) to clean.')
+ parser.add_argument('--max-twistd-log-age', default=None, type=timedelta_type,
+ help='If set, "twistd.log" files older than this will be purged.')
+ parser.add_argument('--production', action='store_true',
+ help='If set, actually delete the files instead of listing them.')
+ parser.add_argument('--gclient-root',
+ help='The path to the directory containing the master checkout '
+ '".gclient" file. If omitted, an attempt will be made to probe '
+ 'one.')
+
+ logs.add_argparse_options(parser)
+
+ opts = parser.parse_args(argv)
+ logs.process_argparse_options(opts)
+ return opts
+
+
+def _process_master(opts, master_cfg):
+ LOGGER.info('Cleaning up master: %s', master_cfg['mastername'])
+
+ # Get a list of all files within the master directory.
+ master_dir = master_cfg['master_dir']
+ files, dirs = _list_untracked_files(master_dir)
+
+ # Filter out all directories that are current builddirs or aren't builder
+ # directories.
nodir 2016/06/10 17:50:36 two negations (filter and aren't), hard to underst
dnj (Google) 2016/06/10 18:00:52 Done.
+ dirs = [x for x in dirs if (
+ x not in master_cfg['builddirs'] and
+ _is_builder_dir(os.path.join(master_dir, x)))]
+ LOGGER.info('Identified %d superfluous build directories.', len(dirs))
+
+ # Find old "twistd.log" files.
+ old_twistd_logs = _find_old_twistd_logs(master_dir, files,
+ opts.max_twistd_log_age)
+ if len(old_twistd_logs) > 0:
+ LOGGER.info('Identified %d old twistd.log files, starting with %s.',
+ len(old_twistd_logs), old_twistd_logs[-1])
+
+ for d in dirs:
+ d = os.path.join(master_dir, d)
+ LOGGER.info('Deleting superfluous directory: [%s]', d)
+ if not opts.production:
+ LOGGER.info('(Dry Run) Not deleting.')
+ continue
+ shutil.rmtree(d)
+
+ for f in old_twistd_logs:
+ f = os.path.join(master_dir, f)
+ LOGGER.info('Removing old "twistd.log" file: [%s]', f)
+ if not opts.production:
+ LOGGER.info('(Dry Run) Not deleting.')
+ continue
+ os.remove(f)
+
+
+def _find_old_twistd_logs(base, files, max_age):
+ twistd_log_files = []
+ if max_age is None:
+ return twistd_log_files
+
+ # Identify all "twistd.log" files to delete. We will do this by binary
+ # searching the "twistd.log" space under the assumption that any log files
+ # with higher suffix than the specified file are older than it.
+ for f in files:
+ gen = _parse_twistd_log_generation(f)
+ if gen is not None:
+ twistd_log_files.append((f, gen))
+ twistd_log_files.sort(key=lambda x: x[1])
+
+ threshold = datetime.datetime.now() - max_age
+ lo, hi = 0, len(twistd_log_files)
+ while lo < hi:
+ mid = (lo+hi)//2
+ path = os.path.join(base, twistd_log_files[mid][0])
+ age = datetime.datetime.fromtimestamp(os.path.getctime(path))
nodir 2016/06/10 17:50:36 it is not age (the older the file, the bigger the
nodir 2016/06/10 17:50:36 I think this should use twistd_log_files[mid][1] o
dnj (Google) 2016/06/10 18:00:52 I think it's correct now.
+ if age < threshold:
+ hi = mid
+ else:
+ lo = mid+1
+ return [x[0] for x in twistd_log_files[:lo]]
+
+
+def _parse_twistd_log_generation(v):
+ # Format is: "twistd.log[.###]"
+ pieces = v.split('.')
+ if len(pieces) != 3 or not (pieces[0] == 'twistd' and pieces[1] == 'log'):
+ return None
+
+ try:
+ return int(pieces[2])
+ except ValueError:
+ return None
+
+
+def _list_untracked_files(path):
+ cmd = ['git', '-C', path, 'ls-files', '.', '--others', '--directory', '-z']
+ stdout, _ = _check_run(cmd, dry_run=False)
+ files, dirs = [], []
+
+ def iter_null_terminated(data):
+ while True:
+ idx = data.find('\0')
+ if idx < 0:
+ yield data
+ return
+ v, data = data[:idx], data[idx+1:]
+ yield v
+
+ for name in iter_null_terminated(stdout):
+ if name.endswith('/'):
+ dirs.append(name.rstrip('/'))
+ else:
+ files.append(name)
+ return files, dirs
+
+
+def _is_builder_dir(dirname):
+ return os.path.isfile(os.path.join(dirname, 'builder'))
+
+
+def _load_master_cfg(gclient_root, master_dir):
+ dump_master_cfg = os.path.join(gclient_root, 'build', 'scripts', 'tools',
+ 'dump_master_cfg.py')
+
+ cmd = [sys.executable, dump_master_cfg, master_dir, '-']
+ config, _ = _check_run(cmd, dry_run=False)
+ config = json.loads(config)
+
+ result = {
+ 'mastername': os.path.split(master_dir)[1],
nodir 2016/06/10 17:50:36 this should be -1
dnj (Google) 2016/06/10 18:00:52 Done.
+ 'master_dir': master_dir,
+ 'builddirs': set(),
+ }
+ for bcfg in config.get('builders', ()):
+ result['builddirs'].add(bcfg['builddir'])
nodir 2016/06/10 17:50:36 builddir is not always there, e.g. ./scripts/to
dnj (Google) 2016/06/10 18:00:52 Done.
+ return result
+
+
+def _find_master(gclient_root, mastername):
+ if not mastername.startswith('master.'):
+ mastername = 'master.' + mastername
+
+ for candidate in (
+ os.path.join(gclient_root, 'build', 'masters'),
+ os.path.join(gclient_root, 'build_internal', 'masters'),
+ ):
+ candidate = os.path.join(candidate, mastername)
+ if os.path.isdir(candidate):
+ return candidate
+ raise ValueError('Unable to locate master %s' % (mastername,))
+
+
+def _find_gclient_root(opts):
+ for candidate in (
+ opts.gclient_root,
+ os.path.join(os.path.expanduser('~'), 'buildbot'),
+ ):
+ if not candidate:
+ continue
+ candidate = os.path.abspath(candidate)
+ if os.path.isfile(os.path.join(candidate, '.gclient')):
+ return candidate
+ raise Exception('Unable to find ".gclient" root.')
+
+
+def _trim_prefix(v, prefix):
+ if v.startswith(prefix):
+ v = v[len(prefix)]
+ return v
+
+
+def _main(argv):
+ opts = parse_args(argv)
+
+ # Locate our gclient file root.
+ gclient_root = _find_gclient_root(opts)
+
+ # Dump the builders configured for each master.
+ for master in sorted(set(opts.master)):
nodir 2016/06/10 17:50:36 why set? you sort it anyway
dnj (Google) 2016/06/10 18:00:52 Just in case some person specifies the same master
+ LOGGER.info('Loading configuration for master "%s"...', master)
+ master_dir = _find_master(gclient_root, master)
+ master_cfg = _load_master_cfg(gclient_root, master_dir)
+ _process_master(opts, master_cfg)
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(_main(sys.argv[1:]))

Powered by Google App Engine
This is Rietveld 408576698