Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 import argparse | |
| 7 import datetime | |
| 8 import json | |
| 9 import logging | |
| 10 import operator | |
| 11 import os.path | |
| 12 import sys | |
| 13 | |
| 14 import requests | |
| 15 import requests_cache | |
| 16 | |
| 17 import analysis | |
| 18 import buildbot | |
| 19 import gatekeeper_extras | |
| 20 import reasons | |
| 21 import string_helpers | |
| 22 | |
| 23 # This is relative to build/scripts: | |
| 24 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts | |
| 25 BUILD_SCRIPTS_PATH = "/src/build/scripts" | |
| 26 sys.path.append(BUILD_SCRIPTS_PATH) | |
| 27 from slave import gatekeeper_ng_config | |
| 28 | |
| 29 | |
| 30 CACHE_PATH = '/src/build_cache' | |
| 31 | |
| 32 | |
| 33 # Python logging is stupidly verbose to configure. | |
| 34 def setup_logging(): | |
|
ojan
2014/07/22 02:01:24
Move this to a shared file instead of copy-pasting
| |
| 35 logger = logging.getLogger(__name__) | |
| 36 logger.setLevel(logging.DEBUG) | |
| 37 handler = logging.StreamHandler() | |
| 38 handler.setLevel(logging.DEBUG) | |
| 39 formatter = logging.Formatter('%(levelname)s: %(message)s') | |
| 40 handler.setFormatter(formatter) | |
| 41 logger.addHandler(handler) | |
| 42 return logger, handler | |
| 43 | |
| 44 | |
| 45 log, logging_handler = setup_logging() | |
| 46 | |
| 47 # FIXME: Pull from: | |
| 48 # https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/ gatekeeper.json?format=TEXT | |
| 49 CONFIG_PATH = os.path.join(BUILD_SCRIPTS_PATH, 'slave', 'gatekeeper.json') | |
| 50 | |
| 51 # Success or Warnings or None (didn't run) don't count as 'failing'. | |
| 52 NON_FAILING_RESULTS = (0, 1, None) | |
| 53 | |
| 54 | |
| 55 def compute_transition_and_failure_count(failure, build, recent_builds): | |
| 56 '''Returns last_pass_build, first_fail_build, fail_count''' | |
|
ojan
2014/07/22 02:01:25
Meh. This comment doesn't tell me anything more th
| |
| 57 | |
| 58 step_name = failure['step_name'] | |
| 59 reason = failure['reason'] | |
| 60 | |
| 61 first_fail = recent_builds[0] | |
| 62 last_pass = None | |
| 63 fail_count = 1 | |
| 64 builds_missing_steps = [] | |
| 65 for build in recent_builds[1:]: | |
| 66 matching_steps = [s for s in build['steps'] if s['name'] == step_name] | |
| 67 if len(matching_steps) != 1: | |
| 68 if not matching_steps: | |
| 69 # This case is pretty common, so just warn all at once at the end. | |
| 70 builds_missing_steps.append(build['number']) | |
| 71 else: | |
| 72 log.error("%s has unexpected number of %s steps: %s" % (build['number'], step_name, matching_steps)) | |
| 73 continue | |
| 74 | |
| 75 step = matching_steps[0] | |
| 76 step_result = step['results'][0] | |
| 77 if step_result not in NON_FAILING_RESULTS: | |
| 78 if reason: | |
| 79 reasons = reasons_for_failure(step, build, | |
| 80 failure['builder_name'], failure['master_url']) | |
| 81 # This build doesn't seem to have this step reason, ignore it. | |
| 82 if not reasons: | |
| 83 continue | |
| 84 # Failed, but our failure reason wasn't present! | |
| 85 # FIXME: This is wrong for compile failures, and possibly | |
| 86 # for test failures as well if not all tests are run... | |
| 87 if reason not in reasons: | |
| 88 break | |
| 89 | |
| 90 first_fail = build | |
| 91 fail_count += 1 | |
| 92 continue | |
| 93 | |
| 94 # None is 'didn't run', not a passing result. | |
| 95 if step_result is None: | |
| 96 continue | |
| 97 | |
| 98 last_pass = build | |
| 99 break | |
| 100 | |
| 101 if builds_missing_steps: | |
| 102 log.warn("builds %s missing %s" % (string_helpers.re_range(builds_missing_st eps), step_name)) | |
| 103 | |
| 104 return last_pass, first_fail, fail_count | |
| 105 | |
| 106 | |
| 107 def failing_steps_for_build(build): | |
| 108 if build.get('results') is None: | |
| 109 log.error('Bad build: %s %s %s' % (build.get('number'), build.get('eta'), bu ild.get('currentStep', {}).get('name'))) | |
| 110 # This check is probably not necessary. | |
| 111 if build.get('results', 0) == 0: | |
| 112 return [] | |
| 113 | |
| 114 failing_steps = [step for step in build['steps'] if step['results'][0] not in NON_FAILING_RESULTS] | |
| 115 | |
| 116 # Some builders use a sub-step pattern which just generates noise. | |
| 117 # FIXME: This code shouldn't contain constants like these. | |
| 118 IGNORED_STEPS = ['steps', 'trigger', 'slave_steps'] | |
| 119 return [step for step in failing_steps if step['name'] not in IGNORED_STEPS] | |
| 120 | |
| 121 | |
| 122 def reasons_for_failure(step, build, builder_name, master_url): | |
| 123 splitter = next((splitter for splitter in reasons.STEP_SPLITTERS if splitter .handles_step(step)), None) | |
| 124 if not splitter: | |
| 125 return None | |
| 126 return splitter.split_step(step, build, builder_name, master_url) | |
| 127 | |
| 128 | |
| 129 def failures_for_build(build, master_url, builder_name): | |
| 130 failures = [] | |
| 131 for step in failing_steps_for_build(build): | |
| 132 step_template = { | |
| 133 'master_url': master_url, | |
| 134 'last_result_time': step['times'][1], | |
| 135 'builder_name': builder_name, | |
| 136 'step_name': step['name'], | |
| 137 'latest_revisions': buildbot.revisions_from_build(build), | |
| 138 } | |
| 139 reasons = reasons_for_failure(step, build, builder_name, master_url) | |
| 140 if not reasons: | |
| 141 failure = dict(step_template) | |
| 142 failure['reason'] = None | |
| 143 failures.append(failure) | |
| 144 else: | |
| 145 for reason in reasons: | |
| 146 failure = dict(step_template) | |
| 147 failure['reason'] = reason | |
| 148 failures.append(failure) | |
| 149 | |
| 150 return failures | |
| 151 | |
| 152 | |
| 153 # FIXME: This should merge with compute_transition_and_failure_count. | |
| 154 def fill_in_transition(failure, build, recent_builds): | |
| 155 last_pass_build, first_fail_build, fail_count = \ | |
|
ojan
2014/07/22 02:01:25
Nit: tc tells me that idiomatic python is to use p
| |
| 156 compute_transition_and_failure_count(failure, build, recent_builds) | |
| 157 | |
| 158 failing = buildbot.revisions_from_build(first_fail_build) | |
| 159 passing = buildbot.revisions_from_build(last_pass_build) if last_pass_build el se None | |
| 160 | |
| 161 failure.update({ | |
| 162 'failing_build_count': fail_count, | |
| 163 'passing_build': last_pass_build['number'] if last_pass_build else None, | |
| 164 'failing_build': first_fail_build['number'], | |
| 165 'failing_revisions': failing, | |
| 166 'passing_revisions': passing, | |
| 167 }) | |
| 168 return failure | |
| 169 | |
| 170 | |
| 171 def alerts_for_builder(cache, master_url, builder_name, recent_build_ids, active _builds): | |
| 172 recent_build_ids = sorted(recent_build_ids, reverse=True) | |
| 173 | |
| 174 active_build_ids = [b['number'] for b in active_builds] | |
| 175 # recent_build_ids includes active ones. | |
| 176 recent_build_ids = [b for b in recent_build_ids if b not in active_build_ids] | |
| 177 | |
| 178 cache_key = buildbot.cache_key_for_build(master_url, builder_name, recent_buil d_ids[0]) | |
| 179 if not cache.get(cache_key): | |
| 180 buildbot.prefill_builds_cache(cache, master_url, builder_name) | |
| 181 | |
| 182 # Limit to 100 for now to match the prefill. | |
| 183 recent_build_ids = recent_build_ids[:100] | |
|
ojan
2014/07/22 02:01:24
Move this into a constant at the top?
| |
| 184 | |
| 185 recent_builds = [buildbot.fetch_build_json(cache, master_url, builder_name, nu m) for num in recent_build_ids] | |
| 186 # Some fetches may fail. | |
| 187 recent_builds = filter(None, recent_builds) | |
| 188 if not recent_builds: | |
| 189 log.warn("No recent builds for %s, skipping." % builder_name) | |
| 190 return [] | |
| 191 | |
| 192 build = recent_builds[0] | |
| 193 failures = failures_for_build(build, master_url, builder_name) | |
| 194 return [fill_in_transition(failure, build, recent_builds) for failure in failu res] | |
| 195 | |
| 196 | |
| 197 def alerts_for_master(cache, master_url, master_json): | |
| 198 active_builds = [] | |
| 199 for slave in master_json['slaves'].values(): | |
| 200 for build in slave['runningBuilds']: | |
| 201 active_builds.append(build) | |
| 202 | |
| 203 alerts = [] | |
| 204 for builder_name, builder_json in master_json['builders'].items(): | |
| 205 actives = filter(lambda build: build['builderName'] == builder_name, active_ builds) | |
| 206 # cachedBuilds will include runningBuilds. | |
| 207 recent_build_ids = builder_json['cachedBuilds'] | |
| 208 master_name = buildbot.master_name_from_url(master_url) | |
| 209 log.debug("%s %s" % (master_name, builder_name)) | |
| 210 alerts.extend(alerts_for_builder(cache, master_url, builder_name, recent_bui ld_ids, actives)) | |
| 211 | |
| 212 return alerts | |
| 213 | |
| 214 | |
| 215 # Want to get all failures for all builds in the universe. | |
| 216 # Sort into most recent failures and then walk backwards to understand. | |
| 217 | |
| 218 # cron job loads gatekeeper.json and starts MR with master_urls | |
| 219 # Map master_url to master_blob | |
| 220 # Map master_blob to (master:builder, build_blobs) and (master:builder, builder_ url) | |
| 221 # Map builder_url to build_blobs | |
| 222 # Map build_blob to failures | |
| 223 # Shuffle failures into (master:builder, [failure, failure]) | |
| 224 # Reduce | |
|
ojan
2014/07/22 02:01:24
Did you mean to leave these comments in? It's not
| |
| 225 | |
| 226 | |
| 227 def apply_gatekeeper_rules(alerts, gatekeeper): | |
| 228 filtered_alerts = [] | |
| 229 for alert in alerts: | |
| 230 master_url = alert['master_url'] | |
| 231 master_name = buildbot.master_name_from_url(master_url) | |
| 232 config = gatekeeper.get(master_url) | |
| 233 if not config: | |
| 234 # Unclear if this should be set or not? | |
|
ojan
2014/07/22 02:01:24
FIXME?
| |
| 235 # alert['would_close_tree'] = False | |
| 236 filtered_alerts.append(alert) | |
| 237 continue | |
| 238 excluded_builders = gatekeeper_extras.excluded_builders(config) | |
| 239 if alert['builder_name'] in excluded_builders: | |
| 240 continue | |
| 241 alert['would_close_tree'] = \ | |
|
ojan
2014/07/22 02:01:24
Ditto re: parens
| |
| 242 gatekeeper_extras.would_close_tree(config, alert['builder_name'], alert['s tep_name']) | |
| 243 filtered_alerts.append(alert) | |
| 244 alert['tree_name'] = gatekeeper_extras.tree_for_master(master_name) | |
| 245 return filtered_alerts | |
| 246 | |
| 247 | |
| 248 def fetch_master_urls(gatekeeper, args): | |
| 249 # Currently using gatekeeper.json, but could use: | |
| 250 # https://apis-explorer.appspot.com/apis-explorer/?base=https://chrome-infra-s tats.appspot.com/_ah/api#p/stats/v1/stats.masters.list?_h=1& | |
| 251 master_urls = gatekeeper.keys() | |
| 252 if args.master_filter: | |
| 253 master_urls = [url for url in master_urls if args.master_filter not in url] | |
| 254 return master_urls | |
| 255 | |
| 256 | |
| 257 def main(args): | |
| 258 parser = argparse.ArgumentParser() | |
| 259 parser.add_argument('data_url', action='store', nargs='*') | |
| 260 parser.add_argument('--use-cache', action='store_true') | |
| 261 parser.add_argument('--master-filter', action='store') | |
| 262 args = parser.parse_args(args) | |
| 263 | |
| 264 if not args.data_url: | |
| 265 log.warn("No /data url passed, won't do anything") | |
| 266 | |
| 267 if args.use_cache: | |
| 268 requests_cache.install_cache('failure_stats') | |
| 269 else: | |
| 270 requests_cache.install_cache(backend='memory') | |
| 271 | |
| 272 gatekeeper = gatekeeper_ng_config.load_gatekeeper_config(CONFIG_PATH) | |
| 273 master_urls = fetch_master_urls(gatekeeper, args) | |
| 274 start_time = datetime.datetime.now() | |
| 275 | |
| 276 latest_revisions = {} | |
| 277 | |
| 278 cache = buildbot.BuildCache(CACHE_PATH) | |
| 279 | |
| 280 alerts = [] | |
| 281 for master_url in master_urls: | |
| 282 master_json = buildbot.fetch_master_json(master_url) | |
| 283 master_alerts = alerts_for_master(cache, master_url, master_json) | |
| 284 alerts.extend(master_alerts) | |
| 285 | |
| 286 # FIXME: This doesn't really belong here. garden-o-matic wants | |
| 287 # this data and we happen to have the builder json cached at | |
| 288 # this point so it's cheap to compute. | |
| 289 revisions = buildbot.latest_revisions_for_master(cache, master_url, master_j son) | |
| 290 latest_revisions.update(revisions) | |
| 291 | |
| 292 | |
| 293 print "Fetch took: %s" % (datetime.datetime.now() - start_time) | |
| 294 | |
| 295 alerts = apply_gatekeeper_rules(alerts, gatekeeper) | |
| 296 | |
| 297 alerts = analysis.assign_keys(alerts) | |
| 298 reason_groups = analysis.group_by_reason(alerts) | |
| 299 range_groups = analysis.merge_by_range(reason_groups) | |
| 300 data = { 'content': json.dumps({ | |
|
ojan
2014/07/22 02:01:24
Nit: I'd make this more verbose just to make it ea
| |
| 301 'alerts': alerts, | |
| 302 'reason_groups': reason_groups, | |
| 303 'range_groups': range_groups, | |
| 304 'latest_revisions': latest_revisions, | |
| 305 })} | |
| 306 for url in args.data_url: | |
| 307 log.info('POST %s alerts to %s' % (len(alerts), url)) | |
| 308 requests.post(url, data=data) | |
| 309 | |
| 310 | |
| 311 if __name__ == '__main__': | |
| 312 sys.exit(main(sys.argv[1:])) | |
| OLD | NEW |