Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(513)

Unified Diff: Tools/AutoSheriff/analysis.py

Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Tools/AutoSheriff/analysis.py
diff --git a/Tools/AutoSheriff/analysis.py b/Tools/AutoSheriff/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d8fa930895c7b44417c9142f22aba5d0e036877
--- /dev/null
+++ b/Tools/AutoSheriff/analysis.py
@@ -0,0 +1,175 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import json
+import collections
+import operator
+
+
+# Git ready, but only implemented for SVN atm.
ojan 2014/07/22 02:01:23 I think we should only operate on monotonically in
+def ids_after_first_including_second(first, second):
+ if not first or not second:
+ return []
+ try:
+ return range(int(first) + 1, int(second) + 1)
+ except ValueError, e:
+ # likely passed a git hash
+ return []
+
+
+# Git ready, but only implemented for SVN atm.
+def is_ancestor_of(older, younger):
+ return int(older) < int(younger)
+
+
+def is_decendant_of(younger, older):
+ return is_ancestor_of(older, younger)
+
+
+def commit_compare(one, two):
+ if is_decendant_of(one, two):
+ return -1
+ if is_ancestor_of(one, two):
+ return 1
+ return 0 # This is technically not right, since commits can be non-comparable.
+
+
+def flatten_to_commit_list(passing, failing):
+ # Flatten two commit dicts to a list of 'name:commit'
+ if not passing or not failing:
+ return []
+ all_commits = []
+ for name in passing.keys():
+ commits = ids_after_first_including_second(passing[name], failing[name])
+ all_commits.extend(['%s:%s' % (name, commit) for commit in commits])
+ return all_commits
+
+
+# FIXME: Perhaps this should be done by the feeder?
+def assign_keys(alerts):
+ for key, alert in enumerate(alerts):
+ # We could come up with something more sophisticated if necessary.
+ alert['key'] = 'f%s' % key # Just something so it doesn't look like a number.
+ return alerts
+
+
+def _make_merge_dicts(reducer):
+ def merge_dicts(one, two):
+ if not one or not two:
+ return None
+ reduction = {}
+ for key in set(one.keys() + two.keys()):
+ one_value = one.get(key)
+ two_value = two.get(key)
+ reduction[key] = reducer(one_value, two_value)
+ return reduction
+ return merge_dicts
+
+
+def merge_regression_ranges(alerts):
+ def make_compare(compare_func):
+ def compare(one, two):
+ if one and two and compare_func(one, two):
+ return one
+ # Treat None is 'infinite' so always return the more limiting.
+ # FIXME: 'passing' may wish to prefer None over a value?
+ return two or one
+ return compare
+
+ # These don't handle the case where commits can't be compared (git branches)
+ younger_commit = make_compare(is_decendant_of)
+ passing_dicts = map(operator.itemgetter('passing_revisions'), alerts)
+ last_passing = reduce(_make_merge_dicts(younger_commit), passing_dicts)
+
+ older_commit = make_compare(is_ancestor_of)
+ failing_dicts = map(operator.itemgetter('failing_revisions'), alerts)
+ first_failing = reduce(_make_merge_dicts(older_commit), failing_dicts)
+
+ return last_passing, first_failing
+
+
+def reason_key_for_alert(alert):
+ # FIXME: May need something smarter for reason_key.
+ reason_key = alert['step_name']
+ if alert['reason']:
+ reason_key += ':%s' % alert['reason']
+ else:
+ # If we don't understand the alert, just make it builder-unique.
+ reason_key += ':%s' % alert['builder_name']
+ return reason_key
+
+
+def group_by_reason(alerts):
+ by_reason = collections.defaultdict(list)
+ for alert in alerts:
+ by_reason[reason_key_for_alert(alert)].append(alert)
+
+ reason_groups = []
+ for reason_key, alerts in by_reason.items():
+ last_passing, first_failing = merge_regression_ranges(alerts)
+ blame_list = flatten_to_commit_list(last_passing, first_failing)
+ # FIXME: blame_list isn't filtered yet, but should be.
+ reason_groups.append({
+ 'sort_key': reason_key,
+ 'merged_last_passing': last_passing,
+ 'merged_first_failing': first_failing,
+ 'likely_revisions': blame_list,
+ 'failure_keys': map(operator.itemgetter('key'), alerts),
+ })
+ return reason_groups
+
+# http://stackoverflow.com/questions/18715688/find-common-substring-between-two-strings
+def longestSubstringFinder(string1, string2):
ojan 2014/07/22 02:01:23 s/camelCase/camel_case
+ answer = ""
+ len1, len2 = len(string1), len(string2)
+ for i in range(len1):
+ match = ""
+ for j in range(len2):
+ if (i + j < len1 and string1[i + j] == string2[j]):
+ match += string2[j]
+ else:
+ if (len(match) > len(answer)): answer = match
+ match = ""
+ return answer
+
+
+def range_key_for_group(group):
+ last_passing = group['merged_last_passing']
+ first_failing = group['merged_first_failing']
+ if last_passing:
+ range_key = ' '.join(flatten_to_commit_list(last_passing, first_failing))
+ else:
+ # Even regressions where we don't know when they started can be
+ # merged by our earliest known failure.
+ parts = ['<=%s:%s' % (name, commit) for name, commit in first_failing.items()]
+ range_key = ' '.join(parts)
+ # sort_key is a heuristic to avoid merging failiures like
+ # gclient revert + webkit_tests which just happened to pull
+ # exact matching revisions when failing.
ojan 2014/07/22 02:01:23 I'm not sure it's wrong to group these together in
+ return group['sort_key'][:3] + range_key
+
+
+def merge_by_range(reason_groups):
+ if not reason_groups:
+ return []
+ expected_keys = sorted(reason_groups[0].keys())
+ by_range = {}
+ for group in reason_groups:
+ range_key = range_key_for_group(group)
+ existing = by_range.get(range_key)
+ if not existing:
+ # Shallow copy of group.
+ by_range[range_key] = dict(group)
+ continue
+
+ # FIXME: It's possible we don't want to merge two keys with nothing in common.
+ # e.g. bot_update and
+ # We only care about these two keys, the rest should be the same between all groups.
+ # I guess we could assert that...
+ by_range[range_key].update({
+ 'sort_key': longestSubstringFinder(existing['sort_key'], group['sort_key']),
+ 'failure_keys': sorted(set(existing['failure_keys'] + group['failure_keys'])),
+ })
+
+ return sorted(by_range.values(), key=operator.itemgetter('sort_key'))

Powered by Google App Engine
This is Rietveld 408576698