Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(477)

Side by Side Diff: infra/tools/master_cleaner/__main__.py

Issue 2059833002: Add master_cleaner tool. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2015 Google Inc. All Rights Reserved.
nodir 2016/06/10 17:50:36 2016
3 # pylint: disable=F0401
4
5 """Cleanup directories on BuildBot master systems."""
6
7 import argparse
8 import bisect
9 import datetime
10 import json
11 import logging
12 import os
13 import shutil
14 import subprocess
15 import sys
16 import time
17
18 from infra_libs import logs
19 from infra_libs.time_functions.parser import timedelta_type
20
21
22 LOGGER = logging.getLogger(__name__)
23
24
25 def _check_run(cmd, dry_run=True, cwd=None):
26 if cwd is None:
27 cwd = os.getcwd()
28
29 if dry_run:
30 LOGGER.info('(Dry run) Running command %s (cwd=%s)', cmd, cwd)
31 return '', ''
32
33 LOGGER.debug('Running command %s (cwd=%s)', cmd, cwd)
34 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
35 cwd=cwd)
36 stdout, stderr = proc.communicate()
37
38 rc = proc.returncode
39 if rc != 0:
40 LOGGER.error('Output for process %s (cwd=%s):\nSTDOUT:\n%s\nSTDERR:\n%s',
41 cmd, cwd, stdout, stderr)
nodir 2016/06/10 17:50:36 include exit code
dnj (Google) 2016/06/10 18:00:52 Done.
42 raise subprocess.CalledProcessError(rc, cmd, None)
43 return stdout, stderr
44
45
46 def parse_args(argv):
47 parser = argparse.ArgumentParser(
48 description='Cleanup directories on BuildBot master systems.')
nodir 2016/06/10 17:50:36 __doc__
dnj (Google) 2016/06/10 18:00:52 Done.
49 parser.add_argument('master', nargs='+',
50 help='Name of masters (*, master.*) to clean.')
51 parser.add_argument('--max-twistd-log-age', default=None, type=timedelta_type,
52 help='If set, "twistd.log" files older than this will be purged.')
53 parser.add_argument('--production', action='store_true',
54 help='If set, actually delete the files instead of listing them.')
55 parser.add_argument('--gclient-root',
56 help='The path to the directory containing the master checkout '
57 '".gclient" file. If omitted, an attempt will be made to probe '
58 'one.')
59
60 logs.add_argparse_options(parser)
61
62 opts = parser.parse_args(argv)
63 logs.process_argparse_options(opts)
64 return opts
65
66
67 def _process_master(opts, master_cfg):
68 LOGGER.info('Cleaning up master: %s', master_cfg['mastername'])
69
70 # Get a list of all files within the master directory.
71 master_dir = master_cfg['master_dir']
72 files, dirs = _list_untracked_files(master_dir)
73
74 # Filter out all directories that are current builddirs or aren't builder
75 # directories.
nodir 2016/06/10 17:50:36 two negations (filter and aren't), hard to underst
dnj (Google) 2016/06/10 18:00:52 Done.
76 dirs = [x for x in dirs if (
77 x not in master_cfg['builddirs'] and
78 _is_builder_dir(os.path.join(master_dir, x)))]
79 LOGGER.info('Identified %d superfluous build directories.', len(dirs))
80
81 # Find old "twistd.log" files.
82 old_twistd_logs = _find_old_twistd_logs(master_dir, files,
83 opts.max_twistd_log_age)
84 if len(old_twistd_logs) > 0:
85 LOGGER.info('Identified %d old twistd.log files, starting with %s.',
86 len(old_twistd_logs), old_twistd_logs[-1])
87
88 for d in dirs:
89 d = os.path.join(master_dir, d)
90 LOGGER.info('Deleting superfluous directory: [%s]', d)
91 if not opts.production:
92 LOGGER.info('(Dry Run) Not deleting.')
93 continue
94 shutil.rmtree(d)
95
96 for f in old_twistd_logs:
97 f = os.path.join(master_dir, f)
98 LOGGER.info('Removing old "twistd.log" file: [%s]', f)
99 if not opts.production:
100 LOGGER.info('(Dry Run) Not deleting.')
101 continue
102 os.remove(f)
103
104
105 def _find_old_twistd_logs(base, files, max_age):
106 twistd_log_files = []
107 if max_age is None:
108 return twistd_log_files
109
110 # Identify all "twistd.log" files to delete. We will do this by binary
111 # searching the "twistd.log" space under the assumption that any log files
112 # with higher suffix than the specified file are older than it.
113 for f in files:
114 gen = _parse_twistd_log_generation(f)
115 if gen is not None:
116 twistd_log_files.append((f, gen))
117 twistd_log_files.sort(key=lambda x: x[1])
118
119 threshold = datetime.datetime.now() - max_age
120 lo, hi = 0, len(twistd_log_files)
121 while lo < hi:
122 mid = (lo+hi)//2
123 path = os.path.join(base, twistd_log_files[mid][0])
124 age = datetime.datetime.fromtimestamp(os.path.getctime(path))
nodir 2016/06/10 17:50:36 it is not age (the older the file, the bigger the
nodir 2016/06/10 17:50:36 I think this should use twistd_log_files[mid][1] o
dnj (Google) 2016/06/10 18:00:52 I think it's correct now.
125 if age < threshold:
126 hi = mid
127 else:
128 lo = mid+1
129 return [x[0] for x in twistd_log_files[:lo]]
130
131
132 def _parse_twistd_log_generation(v):
133 # Format is: "twistd.log[.###]"
134 pieces = v.split('.')
135 if len(pieces) != 3 or not (pieces[0] == 'twistd' and pieces[1] == 'log'):
136 return None
137
138 try:
139 return int(pieces[2])
140 except ValueError:
141 return None
142
143
144 def _list_untracked_files(path):
145 cmd = ['git', '-C', path, 'ls-files', '.', '--others', '--directory', '-z']
146 stdout, _ = _check_run(cmd, dry_run=False)
147 files, dirs = [], []
148
149 def iter_null_terminated(data):
150 while True:
151 idx = data.find('\0')
152 if idx < 0:
153 yield data
154 return
155 v, data = data[:idx], data[idx+1:]
156 yield v
157
158 for name in iter_null_terminated(stdout):
159 if name.endswith('/'):
160 dirs.append(name.rstrip('/'))
161 else:
162 files.append(name)
163 return files, dirs
164
165
166 def _is_builder_dir(dirname):
167 return os.path.isfile(os.path.join(dirname, 'builder'))
168
169
170 def _load_master_cfg(gclient_root, master_dir):
171 dump_master_cfg = os.path.join(gclient_root, 'build', 'scripts', 'tools',
172 'dump_master_cfg.py')
173
174 cmd = [sys.executable, dump_master_cfg, master_dir, '-']
175 config, _ = _check_run(cmd, dry_run=False)
176 config = json.loads(config)
177
178 result = {
179 'mastername': os.path.split(master_dir)[1],
nodir 2016/06/10 17:50:36 this should be -1
dnj (Google) 2016/06/10 18:00:52 Done.
180 'master_dir': master_dir,
181 'builddirs': set(),
182 }
183 for bcfg in config.get('builders', ()):
184 result['builddirs'].add(bcfg['builddir'])
nodir 2016/06/10 17:50:36 builddir is not always there, e.g. ./scripts/to
dnj (Google) 2016/06/10 18:00:52 Done.
185 return result
186
187
188 def _find_master(gclient_root, mastername):
189 if not mastername.startswith('master.'):
190 mastername = 'master.' + mastername
191
192 for candidate in (
193 os.path.join(gclient_root, 'build', 'masters'),
194 os.path.join(gclient_root, 'build_internal', 'masters'),
195 ):
196 candidate = os.path.join(candidate, mastername)
197 if os.path.isdir(candidate):
198 return candidate
199 raise ValueError('Unable to locate master %s' % (mastername,))
200
201
202 def _find_gclient_root(opts):
203 for candidate in (
204 opts.gclient_root,
205 os.path.join(os.path.expanduser('~'), 'buildbot'),
206 ):
207 if not candidate:
208 continue
209 candidate = os.path.abspath(candidate)
210 if os.path.isfile(os.path.join(candidate, '.gclient')):
211 return candidate
212 raise Exception('Unable to find ".gclient" root.')
213
214
215 def _trim_prefix(v, prefix):
216 if v.startswith(prefix):
217 v = v[len(prefix)]
218 return v
219
220
221 def _main(argv):
222 opts = parse_args(argv)
223
224 # Locate our gclient file root.
225 gclient_root = _find_gclient_root(opts)
226
227 # Dump the builders configured for each master.
228 for master in sorted(set(opts.master)):
nodir 2016/06/10 17:50:36 why set? you sort it anyway
dnj (Google) 2016/06/10 18:00:52 Just in case some person specifies the same master
229 LOGGER.info('Loading configuration for master "%s"...', master)
230 master_dir = _find_master(gclient_root, master)
231 master_cfg = _load_master_cfg(gclient_root, master_dir)
232 _process_master(opts, master_cfg)
233
234 return 0
235
236 if __name__ == '__main__':
237 sys.exit(_main(sys.argv[1:]))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698