Index: Tools/AutoSheriff/feeder.py |
diff --git a/Tools/AutoSheriff/feeder.py b/Tools/AutoSheriff/feeder.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..6d60504538891f932d52e1e3594ab9ffde122390 |
--- /dev/null |
+++ b/Tools/AutoSheriff/feeder.py |
@@ -0,0 +1,312 @@ |
+#!/usr/bin/env python |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import argparse |
+import datetime |
+import json |
+import logging |
+import operator |
+import os.path |
+import sys |
+ |
+import requests |
+import requests_cache |
+ |
+import analysis |
+import buildbot |
+import gatekeeper_extras |
+import reasons |
+import string_helpers |
+ |
+# This is relative to build/scripts: |
+# https://chromium.googlesource.com/chromium/tools/build/+/master/scripts |
+BUILD_SCRIPTS_PATH = "/src/build/scripts" |
+sys.path.append(BUILD_SCRIPTS_PATH) |
+from slave import gatekeeper_ng_config |
+ |
+ |
+CACHE_PATH = '/src/build_cache' |
+ |
+ |
+# Python logging is stupidly verbose to configure. |
+def setup_logging(): |
ojan
2014/07/22 02:01:24
Move this to a shared file instead of copy-pasting
|
+ logger = logging.getLogger(__name__) |
+ logger.setLevel(logging.DEBUG) |
+ handler = logging.StreamHandler() |
+ handler.setLevel(logging.DEBUG) |
+ formatter = logging.Formatter('%(levelname)s: %(message)s') |
+ handler.setFormatter(formatter) |
+ logger.addHandler(handler) |
+ return logger, handler |
+ |
+ |
+log, logging_handler = setup_logging() |
+ |
+# FIXME: Pull from: |
+# https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/gatekeeper.json?format=TEXT |
+CONFIG_PATH = os.path.join(BUILD_SCRIPTS_PATH, 'slave', 'gatekeeper.json') |
+ |
+# Success or Warnings or None (didn't run) don't count as 'failing'. |
+NON_FAILING_RESULTS = (0, 1, None) |
+ |
+ |
+def compute_transition_and_failure_count(failure, build, recent_builds): |
+ '''Returns last_pass_build, first_fail_build, fail_count''' |
ojan
2014/07/22 02:01:25
Meh. This comment doesn't tell me anything more th
|
+ |
+ step_name = failure['step_name'] |
+ reason = failure['reason'] |
+ |
+ first_fail = recent_builds[0] |
+ last_pass = None |
+ fail_count = 1 |
+ builds_missing_steps = [] |
+ for build in recent_builds[1:]: |
+ matching_steps = [s for s in build['steps'] if s['name'] == step_name] |
+ if len(matching_steps) != 1: |
+ if not matching_steps: |
+ # This case is pretty common, so just warn all at once at the end. |
+ builds_missing_steps.append(build['number']) |
+ else: |
+ log.error("%s has unexpected number of %s steps: %s" % (build['number'], step_name, matching_steps)) |
+ continue |
+ |
+ step = matching_steps[0] |
+ step_result = step['results'][0] |
+ if step_result not in NON_FAILING_RESULTS: |
+ if reason: |
+ reasons = reasons_for_failure(step, build, |
+ failure['builder_name'], failure['master_url']) |
+ # This build doesn't seem to have this step reason, ignore it. |
+ if not reasons: |
+ continue |
+ # Failed, but our failure reason wasn't present! |
+ # FIXME: This is wrong for compile failures, and possibly |
+ # for test failures as well if not all tests are run... |
+ if reason not in reasons: |
+ break |
+ |
+ first_fail = build |
+ fail_count += 1 |
+ continue |
+ |
+ # None is 'didn't run', not a passing result. |
+ if step_result is None: |
+ continue |
+ |
+ last_pass = build |
+ break |
+ |
+ if builds_missing_steps: |
+ log.warn("builds %s missing %s" % (string_helpers.re_range(builds_missing_steps), step_name)) |
+ |
+ return last_pass, first_fail, fail_count |
+ |
+ |
+def failing_steps_for_build(build): |
+ if build.get('results') is None: |
+ log.error('Bad build: %s %s %s' % (build.get('number'), build.get('eta'), build.get('currentStep', {}).get('name'))) |
+ # This check is probably not necessary. |
+ if build.get('results', 0) == 0: |
+ return [] |
+ |
+ failing_steps = [step for step in build['steps'] if step['results'][0] not in NON_FAILING_RESULTS] |
+ |
+ # Some builders use a sub-step pattern which just generates noise. |
+ # FIXME: This code shouldn't contain constants like these. |
+ IGNORED_STEPS = ['steps', 'trigger', 'slave_steps'] |
+ return [step for step in failing_steps if step['name'] not in IGNORED_STEPS] |
+ |
+ |
+def reasons_for_failure(step, build, builder_name, master_url): |
+ splitter = next((splitter for splitter in reasons.STEP_SPLITTERS if splitter.handles_step(step)), None) |
+ if not splitter: |
+ return None |
+ return splitter.split_step(step, build, builder_name, master_url) |
+ |
+ |
+def failures_for_build(build, master_url, builder_name): |
+ failures = [] |
+ for step in failing_steps_for_build(build): |
+ step_template = { |
+ 'master_url': master_url, |
+ 'last_result_time': step['times'][1], |
+ 'builder_name': builder_name, |
+ 'step_name': step['name'], |
+ 'latest_revisions': buildbot.revisions_from_build(build), |
+ } |
+ reasons = reasons_for_failure(step, build, builder_name, master_url) |
+ if not reasons: |
+ failure = dict(step_template) |
+ failure['reason'] = None |
+ failures.append(failure) |
+ else: |
+ for reason in reasons: |
+ failure = dict(step_template) |
+ failure['reason'] = reason |
+ failures.append(failure) |
+ |
+ return failures |
+ |
+ |
+# FIXME: This should merge with compute_transition_and_failure_count. |
+def fill_in_transition(failure, build, recent_builds): |
+ last_pass_build, first_fail_build, fail_count = \ |
ojan
2014/07/22 02:01:25
Nit: tc tells me that idiomatic python is to use p
|
+ compute_transition_and_failure_count(failure, build, recent_builds) |
+ |
+ failing = buildbot.revisions_from_build(first_fail_build) |
+ passing = buildbot.revisions_from_build(last_pass_build) if last_pass_build else None |
+ |
+ failure.update({ |
+ 'failing_build_count': fail_count, |
+ 'passing_build': last_pass_build['number'] if last_pass_build else None, |
+ 'failing_build': first_fail_build['number'], |
+ 'failing_revisions': failing, |
+ 'passing_revisions': passing, |
+ }) |
+ return failure |
+ |
+ |
+def alerts_for_builder(cache, master_url, builder_name, recent_build_ids, active_builds): |
+ recent_build_ids = sorted(recent_build_ids, reverse=True) |
+ |
+ active_build_ids = [b['number'] for b in active_builds] |
+ # recent_build_ids includes active ones. |
+ recent_build_ids = [b for b in recent_build_ids if b not in active_build_ids] |
+ |
+ cache_key = buildbot.cache_key_for_build(master_url, builder_name, recent_build_ids[0]) |
+ if not cache.get(cache_key): |
+ buildbot.prefill_builds_cache(cache, master_url, builder_name) |
+ |
+ # Limit to 100 for now to match the prefill. |
+ recent_build_ids = recent_build_ids[:100] |
ojan
2014/07/22 02:01:24
Move this into a constant at the top?
|
+ |
+ recent_builds = [buildbot.fetch_build_json(cache, master_url, builder_name, num) for num in recent_build_ids] |
+ # Some fetches may fail. |
+ recent_builds = filter(None, recent_builds) |
+ if not recent_builds: |
+ log.warn("No recent builds for %s, skipping." % builder_name) |
+ return [] |
+ |
+ build = recent_builds[0] |
+ failures = failures_for_build(build, master_url, builder_name) |
+ return [fill_in_transition(failure, build, recent_builds) for failure in failures] |
+ |
+ |
+def alerts_for_master(cache, master_url, master_json): |
+ active_builds = [] |
+ for slave in master_json['slaves'].values(): |
+ for build in slave['runningBuilds']: |
+ active_builds.append(build) |
+ |
+ alerts = [] |
+ for builder_name, builder_json in master_json['builders'].items(): |
+ actives = filter(lambda build: build['builderName'] == builder_name, active_builds) |
+ # cachedBuilds will include runningBuilds. |
+ recent_build_ids = builder_json['cachedBuilds'] |
+ master_name = buildbot.master_name_from_url(master_url) |
+ log.debug("%s %s" % (master_name, builder_name)) |
+ alerts.extend(alerts_for_builder(cache, master_url, builder_name, recent_build_ids, actives)) |
+ |
+ return alerts |
+ |
+ |
+# Want to get all failures for all builds in the universe. |
+# Sort into most recent failures and then walk backwards to understand. |
+ |
+# cron job loads gatekeeper.json and starts MR with master_urls |
+# Map master_url to master_blob |
+# Map master_blob to (master:builder, build_blobs) and (master:builder, builder_url) |
+# Map builder_url to build_blobs |
+# Map build_blob to failures |
+# Shuffle failures into (master:builder, [failure, failure]) |
+# Reduce |
ojan
2014/07/22 02:01:24
Did you mean to leave these comments in? It's not
|
+ |
+ |
+def apply_gatekeeper_rules(alerts, gatekeeper): |
+ filtered_alerts = [] |
+ for alert in alerts: |
+ master_url = alert['master_url'] |
+ master_name = buildbot.master_name_from_url(master_url) |
+ config = gatekeeper.get(master_url) |
+ if not config: |
+ # Unclear if this should be set or not? |
ojan
2014/07/22 02:01:24
FIXME?
|
+ # alert['would_close_tree'] = False |
+ filtered_alerts.append(alert) |
+ continue |
+ excluded_builders = gatekeeper_extras.excluded_builders(config) |
+ if alert['builder_name'] in excluded_builders: |
+ continue |
+ alert['would_close_tree'] = \ |
ojan
2014/07/22 02:01:24
Ditto re: parens
|
+ gatekeeper_extras.would_close_tree(config, alert['builder_name'], alert['step_name']) |
+ filtered_alerts.append(alert) |
+ alert['tree_name'] = gatekeeper_extras.tree_for_master(master_name) |
+ return filtered_alerts |
+ |
+ |
+def fetch_master_urls(gatekeeper, args): |
+ # Currently using gatekeeper.json, but could use: |
+ # https://apis-explorer.appspot.com/apis-explorer/?base=https://chrome-infra-stats.appspot.com/_ah/api#p/stats/v1/stats.masters.list?_h=1& |
+ master_urls = gatekeeper.keys() |
+ if args.master_filter: |
+ master_urls = [url for url in master_urls if args.master_filter not in url] |
+ return master_urls |
+ |
+ |
+def main(args): |
+ parser = argparse.ArgumentParser() |
+ parser.add_argument('data_url', action='store', nargs='*') |
+ parser.add_argument('--use-cache', action='store_true') |
+ parser.add_argument('--master-filter', action='store') |
+ args = parser.parse_args(args) |
+ |
+ if not args.data_url: |
+ log.warn("No /data url passed, won't do anything") |
+ |
+ if args.use_cache: |
+ requests_cache.install_cache('failure_stats') |
+ else: |
+ requests_cache.install_cache(backend='memory') |
+ |
+ gatekeeper = gatekeeper_ng_config.load_gatekeeper_config(CONFIG_PATH) |
+ master_urls = fetch_master_urls(gatekeeper, args) |
+ start_time = datetime.datetime.now() |
+ |
+ latest_revisions = {} |
+ |
+ cache = buildbot.BuildCache(CACHE_PATH) |
+ |
+ alerts = [] |
+ for master_url in master_urls: |
+ master_json = buildbot.fetch_master_json(master_url) |
+ master_alerts = alerts_for_master(cache, master_url, master_json) |
+ alerts.extend(master_alerts) |
+ |
+ # FIXME: This doesn't really belong here. garden-o-matic wants |
+ # this data and we happen to have the builder json cached at |
+ # this point so it's cheap to compute. |
+ revisions = buildbot.latest_revisions_for_master(cache, master_url, master_json) |
+ latest_revisions.update(revisions) |
+ |
+ |
+ print "Fetch took: %s" % (datetime.datetime.now() - start_time) |
+ |
+ alerts = apply_gatekeeper_rules(alerts, gatekeeper) |
+ |
+ alerts = analysis.assign_keys(alerts) |
+ reason_groups = analysis.group_by_reason(alerts) |
+ range_groups = analysis.merge_by_range(reason_groups) |
+ data = { 'content': json.dumps({ |
ojan
2014/07/22 02:01:24
Nit: I'd make this more verbose just to make it ea
|
+ 'alerts': alerts, |
+ 'reason_groups': reason_groups, |
+ 'range_groups': range_groups, |
+ 'latest_revisions': latest_revisions, |
+ })} |
+ for url in args.data_url: |
+ log.info('POST %s alerts to %s' % (len(alerts), url)) |
+ requests.post(url, data=data) |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main(sys.argv[1:])) |