OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright 2015 Google Inc. All Rights Reserved. | |
nodir
2016/06/10 17:50:36
2016
| |
3 # pylint: disable=F0401 | |
4 | |
5 """Cleanup directories on BuildBot master systems.""" | |
6 | |
7 import argparse | |
8 import bisect | |
9 import datetime | |
10 import json | |
11 import logging | |
12 import os | |
13 import shutil | |
14 import subprocess | |
15 import sys | |
16 import time | |
17 | |
18 from infra_libs import logs | |
19 from infra_libs.time_functions.parser import timedelta_type | |
20 | |
21 | |
22 LOGGER = logging.getLogger(__name__) | |
23 | |
24 | |
25 def _check_run(cmd, dry_run=True, cwd=None): | |
26 if cwd is None: | |
27 cwd = os.getcwd() | |
28 | |
29 if dry_run: | |
30 LOGGER.info('(Dry run) Running command %s (cwd=%s)', cmd, cwd) | |
31 return '', '' | |
32 | |
33 LOGGER.debug('Running command %s (cwd=%s)', cmd, cwd) | |
34 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, | |
35 cwd=cwd) | |
36 stdout, stderr = proc.communicate() | |
37 | |
38 rc = proc.returncode | |
39 if rc != 0: | |
40 LOGGER.error('Output for process %s (cwd=%s):\nSTDOUT:\n%s\nSTDERR:\n%s', | |
41 cmd, cwd, stdout, stderr) | |
nodir
2016/06/10 17:50:36
include exit code
dnj (Google)
2016/06/10 18:00:52
Done.
| |
42 raise subprocess.CalledProcessError(rc, cmd, None) | |
43 return stdout, stderr | |
44 | |
45 | |
46 def parse_args(argv): | |
47 parser = argparse.ArgumentParser( | |
48 description='Cleanup directories on BuildBot master systems.') | |
nodir
2016/06/10 17:50:36
__doc__
dnj (Google)
2016/06/10 18:00:52
Done.
| |
49 parser.add_argument('master', nargs='+', | |
50 help='Name of masters (*, master.*) to clean.') | |
51 parser.add_argument('--max-twistd-log-age', default=None, type=timedelta_type, | |
52 help='If set, "twistd.log" files older than this will be purged.') | |
53 parser.add_argument('--production', action='store_true', | |
54 help='If set, actually delete the files instead of listing them.') | |
55 parser.add_argument('--gclient-root', | |
56 help='The path to the directory containing the master checkout ' | |
57 '".gclient" file. If omitted, an attempt will be made to probe ' | |
58 'one.') | |
59 | |
60 logs.add_argparse_options(parser) | |
61 | |
62 opts = parser.parse_args(argv) | |
63 logs.process_argparse_options(opts) | |
64 return opts | |
65 | |
66 | |
67 def _process_master(opts, master_cfg): | |
68 LOGGER.info('Cleaning up master: %s', master_cfg['mastername']) | |
69 | |
70 # Get a list of all files within the master directory. | |
71 master_dir = master_cfg['master_dir'] | |
72 files, dirs = _list_untracked_files(master_dir) | |
73 | |
74 # Filter out all directories that are current builddirs or aren't builder | |
75 # directories. | |
nodir
2016/06/10 17:50:36
two negations (filter and aren't), hard to underst
dnj (Google)
2016/06/10 18:00:52
Done.
| |
76 dirs = [x for x in dirs if ( | |
77 x not in master_cfg['builddirs'] and | |
78 _is_builder_dir(os.path.join(master_dir, x)))] | |
79 LOGGER.info('Identified %d superfluous build directories.', len(dirs)) | |
80 | |
81 # Find old "twistd.log" files. | |
82 old_twistd_logs = _find_old_twistd_logs(master_dir, files, | |
83 opts.max_twistd_log_age) | |
84 if len(old_twistd_logs) > 0: | |
85 LOGGER.info('Identified %d old twistd.log files, starting with %s.', | |
86 len(old_twistd_logs), old_twistd_logs[-1]) | |
87 | |
88 for d in dirs: | |
89 d = os.path.join(master_dir, d) | |
90 LOGGER.info('Deleting superfluous directory: [%s]', d) | |
91 if not opts.production: | |
92 LOGGER.info('(Dry Run) Not deleting.') | |
93 continue | |
94 shutil.rmtree(d) | |
95 | |
96 for f in old_twistd_logs: | |
97 f = os.path.join(master_dir, f) | |
98 LOGGER.info('Removing old "twistd.log" file: [%s]', f) | |
99 if not opts.production: | |
100 LOGGER.info('(Dry Run) Not deleting.') | |
101 continue | |
102 os.remove(f) | |
103 | |
104 | |
105 def _find_old_twistd_logs(base, files, max_age): | |
106 twistd_log_files = [] | |
107 if max_age is None: | |
108 return twistd_log_files | |
109 | |
110 # Identify all "twistd.log" files to delete. We will do this by binary | |
111 # searching the "twistd.log" space under the assumption that any log files | |
112 # with higher suffix than the specified file are older than it. | |
113 for f in files: | |
114 gen = _parse_twistd_log_generation(f) | |
115 if gen is not None: | |
116 twistd_log_files.append((f, gen)) | |
117 twistd_log_files.sort(key=lambda x: x[1]) | |
118 | |
119 threshold = datetime.datetime.now() - max_age | |
120 lo, hi = 0, len(twistd_log_files) | |
121 while lo < hi: | |
122 mid = (lo+hi)//2 | |
123 path = os.path.join(base, twistd_log_files[mid][0]) | |
124 age = datetime.datetime.fromtimestamp(os.path.getctime(path)) | |
nodir
2016/06/10 17:50:36
it is not age (the older the file, the bigger the
nodir
2016/06/10 17:50:36
I think this should use twistd_log_files[mid][1]
o
dnj (Google)
2016/06/10 18:00:52
I think it's correct now.
| |
125 if age < threshold: | |
126 hi = mid | |
127 else: | |
128 lo = mid+1 | |
129 return [x[0] for x in twistd_log_files[:lo]] | |
130 | |
131 | |
132 def _parse_twistd_log_generation(v): | |
133 # Format is: "twistd.log[.###]" | |
134 pieces = v.split('.') | |
135 if len(pieces) != 3 or not (pieces[0] == 'twistd' and pieces[1] == 'log'): | |
136 return None | |
137 | |
138 try: | |
139 return int(pieces[2]) | |
140 except ValueError: | |
141 return None | |
142 | |
143 | |
144 def _list_untracked_files(path): | |
145 cmd = ['git', '-C', path, 'ls-files', '.', '--others', '--directory', '-z'] | |
146 stdout, _ = _check_run(cmd, dry_run=False) | |
147 files, dirs = [], [] | |
148 | |
149 def iter_null_terminated(data): | |
150 while True: | |
151 idx = data.find('\0') | |
152 if idx < 0: | |
153 yield data | |
154 return | |
155 v, data = data[:idx], data[idx+1:] | |
156 yield v | |
157 | |
158 for name in iter_null_terminated(stdout): | |
159 if name.endswith('/'): | |
160 dirs.append(name.rstrip('/')) | |
161 else: | |
162 files.append(name) | |
163 return files, dirs | |
164 | |
165 | |
166 def _is_builder_dir(dirname): | |
167 return os.path.isfile(os.path.join(dirname, 'builder')) | |
168 | |
169 | |
170 def _load_master_cfg(gclient_root, master_dir): | |
171 dump_master_cfg = os.path.join(gclient_root, 'build', 'scripts', 'tools', | |
172 'dump_master_cfg.py') | |
173 | |
174 cmd = [sys.executable, dump_master_cfg, master_dir, '-'] | |
175 config, _ = _check_run(cmd, dry_run=False) | |
176 config = json.loads(config) | |
177 | |
178 result = { | |
179 'mastername': os.path.split(master_dir)[1], | |
nodir
2016/06/10 17:50:36
this should be -1
dnj (Google)
2016/06/10 18:00:52
Done.
| |
180 'master_dir': master_dir, | |
181 'builddirs': set(), | |
182 } | |
183 for bcfg in config.get('builders', ()): | |
184 result['builddirs'].add(bcfg['builddir']) | |
nodir
2016/06/10 17:50:36
builddir is not always there, e.g.
./scripts/to
dnj (Google)
2016/06/10 18:00:52
Done.
| |
185 return result | |
186 | |
187 | |
188 def _find_master(gclient_root, mastername): | |
189 if not mastername.startswith('master.'): | |
190 mastername = 'master.' + mastername | |
191 | |
192 for candidate in ( | |
193 os.path.join(gclient_root, 'build', 'masters'), | |
194 os.path.join(gclient_root, 'build_internal', 'masters'), | |
195 ): | |
196 candidate = os.path.join(candidate, mastername) | |
197 if os.path.isdir(candidate): | |
198 return candidate | |
199 raise ValueError('Unable to locate master %s' % (mastername,)) | |
200 | |
201 | |
202 def _find_gclient_root(opts): | |
203 for candidate in ( | |
204 opts.gclient_root, | |
205 os.path.join(os.path.expanduser('~'), 'buildbot'), | |
206 ): | |
207 if not candidate: | |
208 continue | |
209 candidate = os.path.abspath(candidate) | |
210 if os.path.isfile(os.path.join(candidate, '.gclient')): | |
211 return candidate | |
212 raise Exception('Unable to find ".gclient" root.') | |
213 | |
214 | |
215 def _trim_prefix(v, prefix): | |
216 if v.startswith(prefix): | |
217 v = v[len(prefix)] | |
218 return v | |
219 | |
220 | |
221 def _main(argv): | |
222 opts = parse_args(argv) | |
223 | |
224 # Locate our gclient file root. | |
225 gclient_root = _find_gclient_root(opts) | |
226 | |
227 # Dump the builders configured for each master. | |
228 for master in sorted(set(opts.master)): | |
nodir
2016/06/10 17:50:36
why set? you sort it anyway
dnj (Google)
2016/06/10 18:00:52
Just in case some person specifies the same master
| |
229 LOGGER.info('Loading configuration for master "%s"...', master) | |
230 master_dir = _find_master(gclient_root, master) | |
231 master_cfg = _load_master_cfg(gclient_root, master_dir) | |
232 _process_master(opts, master_cfg) | |
233 | |
234 return 0 | |
235 | |
236 if __name__ == '__main__': | |
237 sys.exit(_main(sys.argv[1:])) | |
OLD | NEW |