Tools/AutoSheriff/feeder.py - Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink

Side by Side Diff: Tools/AutoSheriff/feeder.py

Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2014 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 import argparse

	7 import datetime

	8 import json

	9 import logging

	10 import operator

	11 import os.path

	12 import sys

	13

	14 import requests

	15 import requests_cache

	16

	17 import analysis

	18 import buildbot

	19 import gatekeeper_extras

	20 import reasons

	21 import string_helpers

	22

	23 # This is relative to build/scripts:

	24 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts

	25 BUILD_SCRIPTS_PATH = "/src/build/scripts"

	26 sys.path.append(BUILD_SCRIPTS_PATH)

	27 from slave import gatekeeper_ng_config

	28

	29

	30 CACHE_PATH = '/src/build_cache'

	31

	32

	33 # Python logging is stupidly verbose to configure.

	34 def setup_logging():
	ojan 2014/07/22 02:01:24 Move this to a shared file instead of copy-pasting Move this to a shared file instead of copy-pasting it?
	35 logger = logging.getLogger(__name__)

	36 logger.setLevel(logging.DEBUG)

	37 handler = logging.StreamHandler()

	38 handler.setLevel(logging.DEBUG)

	39 formatter = logging.Formatter('%(levelname)s: %(message)s')

	40 handler.setFormatter(formatter)

	41 logger.addHandler(handler)

	42 return logger, handler

	43

	44

	45 log, logging_handler = setup_logging()

	46

	47 # FIXME: Pull from:

	48 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/ gatekeeper.json?format=TEXT

	49 CONFIG_PATH = os.path.join(BUILD_SCRIPTS_PATH, 'slave', 'gatekeeper.json')

	50

	51 # Success or Warnings or None (didn't run) don't count as 'failing'.

	52 NON_FAILING_RESULTS = (0, 1, None)

	53

	54

	55 def compute_transition_and_failure_count(failure, build, recent_builds):

	56 '''Returns last_pass_build, first_fail_build, fail_count'''
	ojan 2014/07/22 02:01:25 Meh. This comment doesn't tell me anything more th Meh. This comment doesn't tell me anything more than the last line of the function.
	57

	58 step_name = failure['step_name']

	59 reason = failure['reason']

	60

	61 first_fail = recent_builds[0]

	62 last_pass = None

	63 fail_count = 1

	64 builds_missing_steps = []

	65 for build in recent_builds[1:]:

	66 matching_steps = [s for s in build['steps'] if s['name'] == step_name]

	67 if len(matching_steps) != 1:

	68 if not matching_steps:

	69 # This case is pretty common, so just warn all at once at the end.

	70 builds_missing_steps.append(build['number'])

	71 else:

	72 log.error("%s has unexpected number of %s steps: %s" % (build['number'], step_name, matching_steps))

	73 continue

	74

	75 step = matching_steps[0]

	76 step_result = step['results'][0]

	77 if step_result not in NON_FAILING_RESULTS:

	78 if reason:

	79 reasons = reasons_for_failure(step, build,

	80 failure['builder_name'], failure['master_url'])

	81 # This build doesn't seem to have this step reason, ignore it.

	82 if not reasons:

	83 continue

	84 # Failed, but our failure reason wasn't present!

	85 # FIXME: This is wrong for compile failures, and possibly

	86 # for test failures as well if not all tests are run...

	87 if reason not in reasons:

	88 break

	89

	90 first_fail = build

	91 fail_count += 1

	92 continue

	93

	94 # None is 'didn't run', not a passing result.

	95 if step_result is None:

	96 continue

	97

	98 last_pass = build

	99 break

	100

	101 if builds_missing_steps:

	102 log.warn("builds %s missing %s" % (string_helpers.re_range(builds_missing_st eps), step_name))

	103

	104 return last_pass, first_fail, fail_count

	105

	106

	107 def failing_steps_for_build(build):

	108 if build.get('results') is None:

	109 log.error('Bad build: %s %s %s' % (build.get('number'), build.get('eta'), bu ild.get('currentStep', {}).get('name')))

	110 # This check is probably not necessary.

	111 if build.get('results', 0) == 0:

	112 return []

	113

	114 failing_steps = [step for step in build['steps'] if step['results'][0] not in NON_FAILING_RESULTS]

	115

	116 # Some builders use a sub-step pattern which just generates noise.

	117 # FIXME: This code shouldn't contain constants like these.

	118 IGNORED_STEPS = ['steps', 'trigger', 'slave_steps']

	119 return [step for step in failing_steps if step['name'] not in IGNORED_STEPS]

	120

	121

	122 def reasons_for_failure(step, build, builder_name, master_url):

	123 splitter = next((splitter for splitter in reasons.STEP_SPLITTERS if splitter .handles_step(step)), None)

	124 if not splitter:

	125 return None

	126 return splitter.split_step(step, build, builder_name, master_url)

	127

	128

	129 def failures_for_build(build, master_url, builder_name):

	130 failures = []

	131 for step in failing_steps_for_build(build):

	132 step_template = {

	133 'master_url': master_url,

	134 'last_result_time': step['times'][1],

	135 'builder_name': builder_name,

	136 'step_name': step['name'],

	137 'latest_revisions': buildbot.revisions_from_build(build),

	138 }

	139 reasons = reasons_for_failure(step, build, builder_name, master_url)

	140 if not reasons:

	141 failure = dict(step_template)

	142 failure['reason'] = None

	143 failures.append(failure)

	144 else:

	145 for reason in reasons:

	146 failure = dict(step_template)

	147 failure['reason'] = reason

	148 failures.append(failure)

	149

	150 return failures

	151

	152

	153 # FIXME: This should merge with compute_transition_and_failure_count.

	154 def fill_in_transition(failure, build, recent_builds):

	155 last_pass_build, first_fail_build, fail_count = \
	ojan 2014/07/22 02:01:25 Nit: tc tells me that idiomatic python is to use p Nit: tc tells me that idiomatic python is to use parens here. last_pass_build, first_fail_build, fail_count = ( compute_transition_and_failure_count(failure, build, recent_builds))
	156 compute_transition_and_failure_count(failure, build, recent_builds)

	157

	158 failing = buildbot.revisions_from_build(first_fail_build)

	159 passing = buildbot.revisions_from_build(last_pass_build) if last_pass_build el se None

	160

	161 failure.update({

	162 'failing_build_count': fail_count,

	163 'passing_build': last_pass_build['number'] if last_pass_build else None,

	164 'failing_build': first_fail_build['number'],

	165 'failing_revisions': failing,

	166 'passing_revisions': passing,

	167 })

	168 return failure

	169

	170

	171 def alerts_for_builder(cache, master_url, builder_name, recent_build_ids, active _builds):

	172 recent_build_ids = sorted(recent_build_ids, reverse=True)

	173

	174 active_build_ids = [b['number'] for b in active_builds]

	175 # recent_build_ids includes active ones.

	176 recent_build_ids = [b for b in recent_build_ids if b not in active_build_ids]

	177

	178 cache_key = buildbot.cache_key_for_build(master_url, builder_name, recent_buil d_ids[0])

	179 if not cache.get(cache_key):

	180 buildbot.prefill_builds_cache(cache, master_url, builder_name)

	181

	182 # Limit to 100 for now to match the prefill.

	183 recent_build_ids = recent_build_ids[:100]
	ojan 2014/07/22 02:01:24 Move this into a constant at the top? Move this into a constant at the top?
	184

	185 recent_builds = [buildbot.fetch_build_json(cache, master_url, builder_name, nu m) for num in recent_build_ids]

	186 # Some fetches may fail.

	187 recent_builds = filter(None, recent_builds)

	188 if not recent_builds:

	189 log.warn("No recent builds for %s, skipping." % builder_name)

	190 return []

	191

	192 build = recent_builds[0]

	193 failures = failures_for_build(build, master_url, builder_name)

	194 return [fill_in_transition(failure, build, recent_builds) for failure in failu res]

	195

	196

	197 def alerts_for_master(cache, master_url, master_json):

	198 active_builds = []

	199 for slave in master_json['slaves'].values():

	200 for build in slave['runningBuilds']:

	201 active_builds.append(build)

	202

	203 alerts = []

	204 for builder_name, builder_json in master_json['builders'].items():

	205 actives = filter(lambda build: build['builderName'] == builder_name, active_ builds)

	206 # cachedBuilds will include runningBuilds.

	207 recent_build_ids = builder_json['cachedBuilds']

	208 master_name = buildbot.master_name_from_url(master_url)

	209 log.debug("%s %s" % (master_name, builder_name))

	210 alerts.extend(alerts_for_builder(cache, master_url, builder_name, recent_bui ld_ids, actives))

	211

	212 return alerts

	213

	214

	215 # Want to get all failures for all builds in the universe.

	216 # Sort into most recent failures and then walk backwards to understand.

	217

	218 # cron job loads gatekeeper.json and starts MR with master_urls

	219 # Map master_url to master_blob

	220 # Map master_blob to (master:builder, build_blobs) and (master:builder, builder_ url)

	221 # Map builder_url to build_blobs

	222 # Map build_blob to failures

	223 # Shuffle failures into (master:builder, [failure, failure])

	224 # Reduce
	ojan 2014/07/22 02:01:24 Did you mean to leave these comments in? It's not Did you mean to leave these comments in? It's not clear that this is telling me.
	225

	226

	227 def apply_gatekeeper_rules(alerts, gatekeeper):

	228 filtered_alerts = []

	229 for alert in alerts:

	230 master_url = alert['master_url']

	231 master_name = buildbot.master_name_from_url(master_url)

	232 config = gatekeeper.get(master_url)

	233 if not config:

	234 # Unclear if this should be set or not?
	ojan 2014/07/22 02:01:24 FIXME? FIXME?
	235 # alert['would_close_tree'] = False

	236 filtered_alerts.append(alert)

	237 continue

	238 excluded_builders = gatekeeper_extras.excluded_builders(config)

	239 if alert['builder_name'] in excluded_builders:

	240 continue

	241 alert['would_close_tree'] = \
	ojan 2014/07/22 02:01:24 Ditto re: parens Ditto re: parens
	242 gatekeeper_extras.would_close_tree(config, alert['builder_name'], alert['s tep_name'])

	243 filtered_alerts.append(alert)

	244 alert['tree_name'] = gatekeeper_extras.tree_for_master(master_name)

	245 return filtered_alerts

	246

	247

	248 def fetch_master_urls(gatekeeper, args):

	249 # Currently using gatekeeper.json, but could use:

	250 # https://apis-explorer.appspot.com/apis-explorer/?base=https://chrome-infra-s tats.appspot.com/_ah/api#p/stats/v1/stats.masters.list?_h=1&

	251 master_urls = gatekeeper.keys()

	252 if args.master_filter:

	253 master_urls = [url for url in master_urls if args.master_filter not in url]

	254 return master_urls

	255

	256

	257 def main(args):

	258 parser = argparse.ArgumentParser()

	259 parser.add_argument('data_url', action='store', nargs='*')

	260 parser.add_argument('--use-cache', action='store_true')

	261 parser.add_argument('--master-filter', action='store')

	262 args = parser.parse_args(args)

	263

	264 if not args.data_url:

	265 log.warn("No /data url passed, won't do anything")

	266

	267 if args.use_cache:

	268 requests_cache.install_cache('failure_stats')

	269 else:

	270 requests_cache.install_cache(backend='memory')

	271

	272 gatekeeper = gatekeeper_ng_config.load_gatekeeper_config(CONFIG_PATH)

	273 master_urls = fetch_master_urls(gatekeeper, args)

	274 start_time = datetime.datetime.now()

	275

	276 latest_revisions = {}

	277

	278 cache = buildbot.BuildCache(CACHE_PATH)

	279

	280 alerts = []

	281 for master_url in master_urls:

	282 master_json = buildbot.fetch_master_json(master_url)

	283 master_alerts = alerts_for_master(cache, master_url, master_json)

	284 alerts.extend(master_alerts)

	285

	286 # FIXME: This doesn't really belong here. garden-o-matic wants

	287 # this data and we happen to have the builder json cached at

	288 # this point so it's cheap to compute.

	289 revisions = buildbot.latest_revisions_for_master(cache, master_url, master_j son)

	290 latest_revisions.update(revisions)

	291

	292

	293 print "Fetch took: %s" % (datetime.datetime.now() - start_time)

	294

	295 alerts = apply_gatekeeper_rules(alerts, gatekeeper)

	296

	297 alerts = analysis.assign_keys(alerts)

	298 reason_groups = analysis.group_by_reason(alerts)

	299 range_groups = analysis.merge_by_range(reason_groups)

	300 data = { 'content': json.dumps({
	ojan 2014/07/22 02:01:24 Nit: I'd make this more verbose just to make it ea Nit: I'd make this more verbose just to make it easier to read. { 'content: json.dumps({ 'alerts': alerts, ... }), }
	301 'alerts': alerts,

	302 'reason_groups': reason_groups,

	303 'range_groups': range_groups,

	304 'latest_revisions': latest_revisions,

	305 })}

	306 for url in args.data_url:

	307 log.info('POST %s alerts to %s' % (len(alerts), url))

	308 requests.post(url, data=data)

	309

	310

	311 if __name__ == '__main__':

	312 sys.exit(main(sys.argv[1:]))

OLD	NEW

« Tools/AutoSheriff/closers.html ('K') | « Tools/AutoSheriff/favicon.ico ('k') | Tools/AutoSheriff/feeder_start.sh » ('j') | Tools/AutoSheriff/gatekeeper.json » ('J')