OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 import argparse | |
7 import datetime | |
8 import json | |
9 import logging | |
10 import operator | |
11 import os.path | |
12 import sys | |
13 | |
14 import requests | |
15 import requests_cache | |
16 | |
17 import analysis | |
18 import buildbot | |
19 import gatekeeper_extras | |
20 import reasons | |
21 import string_helpers | |
22 | |
23 # This is relative to build/scripts: | |
24 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts | |
25 BUILD_SCRIPTS_PATH = "/src/build/scripts" | |
26 sys.path.append(BUILD_SCRIPTS_PATH) | |
27 from slave import gatekeeper_ng_config | |
28 | |
29 | |
30 CACHE_PATH = '/src/build_cache' | |
31 | |
32 | |
33 # Python logging is stupidly verbose to configure. | |
34 def setup_logging(): | |
ojan
2014/07/22 02:01:24
Move this to a shared file instead of copy-pasting
| |
35 logger = logging.getLogger(__name__) | |
36 logger.setLevel(logging.DEBUG) | |
37 handler = logging.StreamHandler() | |
38 handler.setLevel(logging.DEBUG) | |
39 formatter = logging.Formatter('%(levelname)s: %(message)s') | |
40 handler.setFormatter(formatter) | |
41 logger.addHandler(handler) | |
42 return logger, handler | |
43 | |
44 | |
45 log, logging_handler = setup_logging() | |
46 | |
47 # FIXME: Pull from: | |
48 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/ gatekeeper.json?format=TEXT | |
49 CONFIG_PATH = os.path.join(BUILD_SCRIPTS_PATH, 'slave', 'gatekeeper.json') | |
50 | |
51 # Success or Warnings or None (didn't run) don't count as 'failing'. | |
52 NON_FAILING_RESULTS = (0, 1, None) | |
53 | |
54 | |
55 def compute_transition_and_failure_count(failure, build, recent_builds): | |
56 '''Returns last_pass_build, first_fail_build, fail_count''' | |
ojan
2014/07/22 02:01:25
Meh. This comment doesn't tell me anything more th
| |
57 | |
58 step_name = failure['step_name'] | |
59 reason = failure['reason'] | |
60 | |
61 first_fail = recent_builds[0] | |
62 last_pass = None | |
63 fail_count = 1 | |
64 builds_missing_steps = [] | |
65 for build in recent_builds[1:]: | |
66 matching_steps = [s for s in build['steps'] if s['name'] == step_name] | |
67 if len(matching_steps) != 1: | |
68 if not matching_steps: | |
69 # This case is pretty common, so just warn all at once at the end. | |
70 builds_missing_steps.append(build['number']) | |
71 else: | |
72 log.error("%s has unexpected number of %s steps: %s" % (build['number'], step_name, matching_steps)) | |
73 continue | |
74 | |
75 step = matching_steps[0] | |
76 step_result = step['results'][0] | |
77 if step_result not in NON_FAILING_RESULTS: | |
78 if reason: | |
79 reasons = reasons_for_failure(step, build, | |
80 failure['builder_name'], failure['master_url']) | |
81 # This build doesn't seem to have this step reason, ignore it. | |
82 if not reasons: | |
83 continue | |
84 # Failed, but our failure reason wasn't present! | |
85 # FIXME: This is wrong for compile failures, and possibly | |
86 # for test failures as well if not all tests are run... | |
87 if reason not in reasons: | |
88 break | |
89 | |
90 first_fail = build | |
91 fail_count += 1 | |
92 continue | |
93 | |
94 # None is 'didn't run', not a passing result. | |
95 if step_result is None: | |
96 continue | |
97 | |
98 last_pass = build | |
99 break | |
100 | |
101 if builds_missing_steps: | |
102 log.warn("builds %s missing %s" % (string_helpers.re_range(builds_missing_st eps), step_name)) | |
103 | |
104 return last_pass, first_fail, fail_count | |
105 | |
106 | |
107 def failing_steps_for_build(build): | |
108 if build.get('results') is None: | |
109 log.error('Bad build: %s %s %s' % (build.get('number'), build.get('eta'), bu ild.get('currentStep', {}).get('name'))) | |
110 # This check is probably not necessary. | |
111 if build.get('results', 0) == 0: | |
112 return [] | |
113 | |
114 failing_steps = [step for step in build['steps'] if step['results'][0] not in NON_FAILING_RESULTS] | |
115 | |
116 # Some builders use a sub-step pattern which just generates noise. | |
117 # FIXME: This code shouldn't contain constants like these. | |
118 IGNORED_STEPS = ['steps', 'trigger', 'slave_steps'] | |
119 return [step for step in failing_steps if step['name'] not in IGNORED_STEPS] | |
120 | |
121 | |
122 def reasons_for_failure(step, build, builder_name, master_url): | |
123 splitter = next((splitter for splitter in reasons.STEP_SPLITTERS if splitter .handles_step(step)), None) | |
124 if not splitter: | |
125 return None | |
126 return splitter.split_step(step, build, builder_name, master_url) | |
127 | |
128 | |
129 def failures_for_build(build, master_url, builder_name): | |
130 failures = [] | |
131 for step in failing_steps_for_build(build): | |
132 step_template = { | |
133 'master_url': master_url, | |
134 'last_result_time': step['times'][1], | |
135 'builder_name': builder_name, | |
136 'step_name': step['name'], | |
137 'latest_revisions': buildbot.revisions_from_build(build), | |
138 } | |
139 reasons = reasons_for_failure(step, build, builder_name, master_url) | |
140 if not reasons: | |
141 failure = dict(step_template) | |
142 failure['reason'] = None | |
143 failures.append(failure) | |
144 else: | |
145 for reason in reasons: | |
146 failure = dict(step_template) | |
147 failure['reason'] = reason | |
148 failures.append(failure) | |
149 | |
150 return failures | |
151 | |
152 | |
153 # FIXME: This should merge with compute_transition_and_failure_count. | |
154 def fill_in_transition(failure, build, recent_builds): | |
155 last_pass_build, first_fail_build, fail_count = \ | |
ojan
2014/07/22 02:01:25
Nit: tc tells me that idiomatic python is to use p
| |
156 compute_transition_and_failure_count(failure, build, recent_builds) | |
157 | |
158 failing = buildbot.revisions_from_build(first_fail_build) | |
159 passing = buildbot.revisions_from_build(last_pass_build) if last_pass_build el se None | |
160 | |
161 failure.update({ | |
162 'failing_build_count': fail_count, | |
163 'passing_build': last_pass_build['number'] if last_pass_build else None, | |
164 'failing_build': first_fail_build['number'], | |
165 'failing_revisions': failing, | |
166 'passing_revisions': passing, | |
167 }) | |
168 return failure | |
169 | |
170 | |
171 def alerts_for_builder(cache, master_url, builder_name, recent_build_ids, active _builds): | |
172 recent_build_ids = sorted(recent_build_ids, reverse=True) | |
173 | |
174 active_build_ids = [b['number'] for b in active_builds] | |
175 # recent_build_ids includes active ones. | |
176 recent_build_ids = [b for b in recent_build_ids if b not in active_build_ids] | |
177 | |
178 cache_key = buildbot.cache_key_for_build(master_url, builder_name, recent_buil d_ids[0]) | |
179 if not cache.get(cache_key): | |
180 buildbot.prefill_builds_cache(cache, master_url, builder_name) | |
181 | |
182 # Limit to 100 for now to match the prefill. | |
183 recent_build_ids = recent_build_ids[:100] | |
ojan
2014/07/22 02:01:24
Move this into a constant at the top?
| |
184 | |
185 recent_builds = [buildbot.fetch_build_json(cache, master_url, builder_name, nu m) for num in recent_build_ids] | |
186 # Some fetches may fail. | |
187 recent_builds = filter(None, recent_builds) | |
188 if not recent_builds: | |
189 log.warn("No recent builds for %s, skipping." % builder_name) | |
190 return [] | |
191 | |
192 build = recent_builds[0] | |
193 failures = failures_for_build(build, master_url, builder_name) | |
194 return [fill_in_transition(failure, build, recent_builds) for failure in failu res] | |
195 | |
196 | |
197 def alerts_for_master(cache, master_url, master_json): | |
198 active_builds = [] | |
199 for slave in master_json['slaves'].values(): | |
200 for build in slave['runningBuilds']: | |
201 active_builds.append(build) | |
202 | |
203 alerts = [] | |
204 for builder_name, builder_json in master_json['builders'].items(): | |
205 actives = filter(lambda build: build['builderName'] == builder_name, active_ builds) | |
206 # cachedBuilds will include runningBuilds. | |
207 recent_build_ids = builder_json['cachedBuilds'] | |
208 master_name = buildbot.master_name_from_url(master_url) | |
209 log.debug("%s %s" % (master_name, builder_name)) | |
210 alerts.extend(alerts_for_builder(cache, master_url, builder_name, recent_bui ld_ids, actives)) | |
211 | |
212 return alerts | |
213 | |
214 | |
215 # Want to get all failures for all builds in the universe. | |
216 # Sort into most recent failures and then walk backwards to understand. | |
217 | |
218 # cron job loads gatekeeper.json and starts MR with master_urls | |
219 # Map master_url to master_blob | |
220 # Map master_blob to (master:builder, build_blobs) and (master:builder, builder_ url) | |
221 # Map builder_url to build_blobs | |
222 # Map build_blob to failures | |
223 # Shuffle failures into (master:builder, [failure, failure]) | |
224 # Reduce | |
ojan
2014/07/22 02:01:24
Did you mean to leave these comments in? It's not
| |
225 | |
226 | |
227 def apply_gatekeeper_rules(alerts, gatekeeper): | |
228 filtered_alerts = [] | |
229 for alert in alerts: | |
230 master_url = alert['master_url'] | |
231 master_name = buildbot.master_name_from_url(master_url) | |
232 config = gatekeeper.get(master_url) | |
233 if not config: | |
234 # Unclear if this should be set or not? | |
ojan
2014/07/22 02:01:24
FIXME?
| |
235 # alert['would_close_tree'] = False | |
236 filtered_alerts.append(alert) | |
237 continue | |
238 excluded_builders = gatekeeper_extras.excluded_builders(config) | |
239 if alert['builder_name'] in excluded_builders: | |
240 continue | |
241 alert['would_close_tree'] = \ | |
ojan
2014/07/22 02:01:24
Ditto re: parens
| |
242 gatekeeper_extras.would_close_tree(config, alert['builder_name'], alert['s tep_name']) | |
243 filtered_alerts.append(alert) | |
244 alert['tree_name'] = gatekeeper_extras.tree_for_master(master_name) | |
245 return filtered_alerts | |
246 | |
247 | |
248 def fetch_master_urls(gatekeeper, args): | |
249 # Currently using gatekeeper.json, but could use: | |
250 # https://apis-explorer.appspot.com/apis-explorer/?base=https://chrome-infra-s tats.appspot.com/_ah/api#p/stats/v1/stats.masters.list?_h=1& | |
251 master_urls = gatekeeper.keys() | |
252 if args.master_filter: | |
253 master_urls = [url for url in master_urls if args.master_filter not in url] | |
254 return master_urls | |
255 | |
256 | |
257 def main(args): | |
258 parser = argparse.ArgumentParser() | |
259 parser.add_argument('data_url', action='store', nargs='*') | |
260 parser.add_argument('--use-cache', action='store_true') | |
261 parser.add_argument('--master-filter', action='store') | |
262 args = parser.parse_args(args) | |
263 | |
264 if not args.data_url: | |
265 log.warn("No /data url passed, won't do anything") | |
266 | |
267 if args.use_cache: | |
268 requests_cache.install_cache('failure_stats') | |
269 else: | |
270 requests_cache.install_cache(backend='memory') | |
271 | |
272 gatekeeper = gatekeeper_ng_config.load_gatekeeper_config(CONFIG_PATH) | |
273 master_urls = fetch_master_urls(gatekeeper, args) | |
274 start_time = datetime.datetime.now() | |
275 | |
276 latest_revisions = {} | |
277 | |
278 cache = buildbot.BuildCache(CACHE_PATH) | |
279 | |
280 alerts = [] | |
281 for master_url in master_urls: | |
282 master_json = buildbot.fetch_master_json(master_url) | |
283 master_alerts = alerts_for_master(cache, master_url, master_json) | |
284 alerts.extend(master_alerts) | |
285 | |
286 # FIXME: This doesn't really belong here. garden-o-matic wants | |
287 # this data and we happen to have the builder json cached at | |
288 # this point so it's cheap to compute. | |
289 revisions = buildbot.latest_revisions_for_master(cache, master_url, master_j son) | |
290 latest_revisions.update(revisions) | |
291 | |
292 | |
293 print "Fetch took: %s" % (datetime.datetime.now() - start_time) | |
294 | |
295 alerts = apply_gatekeeper_rules(alerts, gatekeeper) | |
296 | |
297 alerts = analysis.assign_keys(alerts) | |
298 reason_groups = analysis.group_by_reason(alerts) | |
299 range_groups = analysis.merge_by_range(reason_groups) | |
300 data = { 'content': json.dumps({ | |
ojan
2014/07/22 02:01:24
Nit: I'd make this more verbose just to make it ea
| |
301 'alerts': alerts, | |
302 'reason_groups': reason_groups, | |
303 'range_groups': range_groups, | |
304 'latest_revisions': latest_revisions, | |
305 })} | |
306 for url in args.data_url: | |
307 log.info('POST %s alerts to %s' % (len(alerts), url)) | |
308 requests.post(url, data=data) | |
309 | |
310 | |
311 if __name__ == '__main__': | |
312 sys.exit(main(sys.argv[1:])) | |
OLD | NEW |