Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: Tools/AutoSheriff/feeder.py

Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 import argparse
7 import datetime
8 import json
9 import logging
10 import operator
11 import os.path
12 import sys
13
14 import requests
15 import requests_cache
16
17 import analysis
18 import buildbot
19 import gatekeeper_extras
20 import reasons
21 import string_helpers
22
23 # This is relative to build/scripts:
24 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts
25 BUILD_SCRIPTS_PATH = "/src/build/scripts"
26 sys.path.append(BUILD_SCRIPTS_PATH)
27 from slave import gatekeeper_ng_config
28
29
30 CACHE_PATH = '/src/build_cache'
31
32
33 # Python logging is stupidly verbose to configure.
34 def setup_logging():
ojan 2014/07/22 02:01:24 Move this to a shared file instead of copy-pasting
35 logger = logging.getLogger(__name__)
36 logger.setLevel(logging.DEBUG)
37 handler = logging.StreamHandler()
38 handler.setLevel(logging.DEBUG)
39 formatter = logging.Formatter('%(levelname)s: %(message)s')
40 handler.setFormatter(formatter)
41 logger.addHandler(handler)
42 return logger, handler
43
44
45 log, logging_handler = setup_logging()
46
47 # FIXME: Pull from:
48 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/ gatekeeper.json?format=TEXT
49 CONFIG_PATH = os.path.join(BUILD_SCRIPTS_PATH, 'slave', 'gatekeeper.json')
50
51 # Success or Warnings or None (didn't run) don't count as 'failing'.
52 NON_FAILING_RESULTS = (0, 1, None)
53
54
55 def compute_transition_and_failure_count(failure, build, recent_builds):
56 '''Returns last_pass_build, first_fail_build, fail_count'''
ojan 2014/07/22 02:01:25 Meh. This comment doesn't tell me anything more th
57
58 step_name = failure['step_name']
59 reason = failure['reason']
60
61 first_fail = recent_builds[0]
62 last_pass = None
63 fail_count = 1
64 builds_missing_steps = []
65 for build in recent_builds[1:]:
66 matching_steps = [s for s in build['steps'] if s['name'] == step_name]
67 if len(matching_steps) != 1:
68 if not matching_steps:
69 # This case is pretty common, so just warn all at once at the end.
70 builds_missing_steps.append(build['number'])
71 else:
72 log.error("%s has unexpected number of %s steps: %s" % (build['number'], step_name, matching_steps))
73 continue
74
75 step = matching_steps[0]
76 step_result = step['results'][0]
77 if step_result not in NON_FAILING_RESULTS:
78 if reason:
79 reasons = reasons_for_failure(step, build,
80 failure['builder_name'], failure['master_url'])
81 # This build doesn't seem to have this step reason, ignore it.
82 if not reasons:
83 continue
84 # Failed, but our failure reason wasn't present!
85 # FIXME: This is wrong for compile failures, and possibly
86 # for test failures as well if not all tests are run...
87 if reason not in reasons:
88 break
89
90 first_fail = build
91 fail_count += 1
92 continue
93
94 # None is 'didn't run', not a passing result.
95 if step_result is None:
96 continue
97
98 last_pass = build
99 break
100
101 if builds_missing_steps:
102 log.warn("builds %s missing %s" % (string_helpers.re_range(builds_missing_st eps), step_name))
103
104 return last_pass, first_fail, fail_count
105
106
107 def failing_steps_for_build(build):
108 if build.get('results') is None:
109 log.error('Bad build: %s %s %s' % (build.get('number'), build.get('eta'), bu ild.get('currentStep', {}).get('name')))
110 # This check is probably not necessary.
111 if build.get('results', 0) == 0:
112 return []
113
114 failing_steps = [step for step in build['steps'] if step['results'][0] not in NON_FAILING_RESULTS]
115
116 # Some builders use a sub-step pattern which just generates noise.
117 # FIXME: This code shouldn't contain constants like these.
118 IGNORED_STEPS = ['steps', 'trigger', 'slave_steps']
119 return [step for step in failing_steps if step['name'] not in IGNORED_STEPS]
120
121
122 def reasons_for_failure(step, build, builder_name, master_url):
123 splitter = next((splitter for splitter in reasons.STEP_SPLITTERS if splitter .handles_step(step)), None)
124 if not splitter:
125 return None
126 return splitter.split_step(step, build, builder_name, master_url)
127
128
129 def failures_for_build(build, master_url, builder_name):
130 failures = []
131 for step in failing_steps_for_build(build):
132 step_template = {
133 'master_url': master_url,
134 'last_result_time': step['times'][1],
135 'builder_name': builder_name,
136 'step_name': step['name'],
137 'latest_revisions': buildbot.revisions_from_build(build),
138 }
139 reasons = reasons_for_failure(step, build, builder_name, master_url)
140 if not reasons:
141 failure = dict(step_template)
142 failure['reason'] = None
143 failures.append(failure)
144 else:
145 for reason in reasons:
146 failure = dict(step_template)
147 failure['reason'] = reason
148 failures.append(failure)
149
150 return failures
151
152
153 # FIXME: This should merge with compute_transition_and_failure_count.
154 def fill_in_transition(failure, build, recent_builds):
155 last_pass_build, first_fail_build, fail_count = \
ojan 2014/07/22 02:01:25 Nit: tc tells me that idiomatic python is to use p
156 compute_transition_and_failure_count(failure, build, recent_builds)
157
158 failing = buildbot.revisions_from_build(first_fail_build)
159 passing = buildbot.revisions_from_build(last_pass_build) if last_pass_build el se None
160
161 failure.update({
162 'failing_build_count': fail_count,
163 'passing_build': last_pass_build['number'] if last_pass_build else None,
164 'failing_build': first_fail_build['number'],
165 'failing_revisions': failing,
166 'passing_revisions': passing,
167 })
168 return failure
169
170
171 def alerts_for_builder(cache, master_url, builder_name, recent_build_ids, active _builds):
172 recent_build_ids = sorted(recent_build_ids, reverse=True)
173
174 active_build_ids = [b['number'] for b in active_builds]
175 # recent_build_ids includes active ones.
176 recent_build_ids = [b for b in recent_build_ids if b not in active_build_ids]
177
178 cache_key = buildbot.cache_key_for_build(master_url, builder_name, recent_buil d_ids[0])
179 if not cache.get(cache_key):
180 buildbot.prefill_builds_cache(cache, master_url, builder_name)
181
182 # Limit to 100 for now to match the prefill.
183 recent_build_ids = recent_build_ids[:100]
ojan 2014/07/22 02:01:24 Move this into a constant at the top?
184
185 recent_builds = [buildbot.fetch_build_json(cache, master_url, builder_name, nu m) for num in recent_build_ids]
186 # Some fetches may fail.
187 recent_builds = filter(None, recent_builds)
188 if not recent_builds:
189 log.warn("No recent builds for %s, skipping." % builder_name)
190 return []
191
192 build = recent_builds[0]
193 failures = failures_for_build(build, master_url, builder_name)
194 return [fill_in_transition(failure, build, recent_builds) for failure in failu res]
195
196
197 def alerts_for_master(cache, master_url, master_json):
198 active_builds = []
199 for slave in master_json['slaves'].values():
200 for build in slave['runningBuilds']:
201 active_builds.append(build)
202
203 alerts = []
204 for builder_name, builder_json in master_json['builders'].items():
205 actives = filter(lambda build: build['builderName'] == builder_name, active_ builds)
206 # cachedBuilds will include runningBuilds.
207 recent_build_ids = builder_json['cachedBuilds']
208 master_name = buildbot.master_name_from_url(master_url)
209 log.debug("%s %s" % (master_name, builder_name))
210 alerts.extend(alerts_for_builder(cache, master_url, builder_name, recent_bui ld_ids, actives))
211
212 return alerts
213
214
215 # Want to get all failures for all builds in the universe.
216 # Sort into most recent failures and then walk backwards to understand.
217
218 # cron job loads gatekeeper.json and starts MR with master_urls
219 # Map master_url to master_blob
220 # Map master_blob to (master:builder, build_blobs) and (master:builder, builder_ url)
221 # Map builder_url to build_blobs
222 # Map build_blob to failures
223 # Shuffle failures into (master:builder, [failure, failure])
224 # Reduce
ojan 2014/07/22 02:01:24 Did you mean to leave these comments in? It's not
225
226
227 def apply_gatekeeper_rules(alerts, gatekeeper):
228 filtered_alerts = []
229 for alert in alerts:
230 master_url = alert['master_url']
231 master_name = buildbot.master_name_from_url(master_url)
232 config = gatekeeper.get(master_url)
233 if not config:
234 # Unclear if this should be set or not?
ojan 2014/07/22 02:01:24 FIXME?
235 # alert['would_close_tree'] = False
236 filtered_alerts.append(alert)
237 continue
238 excluded_builders = gatekeeper_extras.excluded_builders(config)
239 if alert['builder_name'] in excluded_builders:
240 continue
241 alert['would_close_tree'] = \
ojan 2014/07/22 02:01:24 Ditto re: parens
242 gatekeeper_extras.would_close_tree(config, alert['builder_name'], alert['s tep_name'])
243 filtered_alerts.append(alert)
244 alert['tree_name'] = gatekeeper_extras.tree_for_master(master_name)
245 return filtered_alerts
246
247
248 def fetch_master_urls(gatekeeper, args):
249 # Currently using gatekeeper.json, but could use:
250 # https://apis-explorer.appspot.com/apis-explorer/?base=https://chrome-infra-s tats.appspot.com/_ah/api#p/stats/v1/stats.masters.list?_h=1&
251 master_urls = gatekeeper.keys()
252 if args.master_filter:
253 master_urls = [url for url in master_urls if args.master_filter not in url]
254 return master_urls
255
256
257 def main(args):
258 parser = argparse.ArgumentParser()
259 parser.add_argument('data_url', action='store', nargs='*')
260 parser.add_argument('--use-cache', action='store_true')
261 parser.add_argument('--master-filter', action='store')
262 args = parser.parse_args(args)
263
264 if not args.data_url:
265 log.warn("No /data url passed, won't do anything")
266
267 if args.use_cache:
268 requests_cache.install_cache('failure_stats')
269 else:
270 requests_cache.install_cache(backend='memory')
271
272 gatekeeper = gatekeeper_ng_config.load_gatekeeper_config(CONFIG_PATH)
273 master_urls = fetch_master_urls(gatekeeper, args)
274 start_time = datetime.datetime.now()
275
276 latest_revisions = {}
277
278 cache = buildbot.BuildCache(CACHE_PATH)
279
280 alerts = []
281 for master_url in master_urls:
282 master_json = buildbot.fetch_master_json(master_url)
283 master_alerts = alerts_for_master(cache, master_url, master_json)
284 alerts.extend(master_alerts)
285
286 # FIXME: This doesn't really belong here. garden-o-matic wants
287 # this data and we happen to have the builder json cached at
288 # this point so it's cheap to compute.
289 revisions = buildbot.latest_revisions_for_master(cache, master_url, master_j son)
290 latest_revisions.update(revisions)
291
292
293 print "Fetch took: %s" % (datetime.datetime.now() - start_time)
294
295 alerts = apply_gatekeeper_rules(alerts, gatekeeper)
296
297 alerts = analysis.assign_keys(alerts)
298 reason_groups = analysis.group_by_reason(alerts)
299 range_groups = analysis.merge_by_range(reason_groups)
300 data = { 'content': json.dumps({
ojan 2014/07/22 02:01:24 Nit: I'd make this more verbose just to make it ea
301 'alerts': alerts,
302 'reason_groups': reason_groups,
303 'range_groups': range_groups,
304 'latest_revisions': latest_revisions,
305 })}
306 for url in args.data_url:
307 log.info('POST %s alerts to %s' % (len(alerts), url))
308 requests.post(url, data=data)
309
310
311 if __name__ == '__main__':
312 sys.exit(main(sys.argv[1:]))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698