Tools/AutoSheriff/analysis.py - Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink

Unified Diff: Tools/AutoSheriff/analysis.py

Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: Tools/AutoSheriff/analysis.py

diff --git a/Tools/AutoSheriff/analysis.py b/Tools/AutoSheriff/analysis.py

new file mode 100644

index 0000000000000000000000000000000000000000..6d8fa930895c7b44417c9142f22aba5d0e036877

--- /dev/null

+++ b/Tools/AutoSheriff/analysis.py

@@ -0,0 +1,175 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import json

+import collections

+import operator

+# Git ready, but only implemented for SVN atm.

ojan 2014/07/22 02:01:23 I think we should only operate on monotonically in

+def ids_after_first_including_second(first, second):

+ if not first or not second:

+ return []

+ try:

+ return range(int(first) + 1, int(second) + 1)

+ except ValueError, e:

+ # likely passed a git hash

+ return []

+# Git ready, but only implemented for SVN atm.

+def is_ancestor_of(older, younger):

+ return int(older) < int(younger)

+def is_decendant_of(younger, older):

+ return is_ancestor_of(older, younger)

+def commit_compare(one, two):

+ if is_decendant_of(one, two):

+ return -1

+ if is_ancestor_of(one, two):

+ return 1

+ return 0 # This is technically not right, since commits can be non-comparable.

+def flatten_to_commit_list(passing, failing):

+ # Flatten two commit dicts to a list of 'name:commit'

+ if not passing or not failing:

+ return []

+ all_commits = []

+ for name in passing.keys():

+ commits = ids_after_first_including_second(passing[name], failing[name])

+ all_commits.extend(['%s:%s' % (name, commit) for commit in commits])

+ return all_commits

+# FIXME: Perhaps this should be done by the feeder?

+def assign_keys(alerts):

+ for key, alert in enumerate(alerts):

+ # We could come up with something more sophisticated if necessary.

+ alert['key'] = 'f%s' % key # Just something so it doesn't look like a number.

+ return alerts

+def _make_merge_dicts(reducer):

+ def merge_dicts(one, two):

+ if not one or not two:

+ return None

+ reduction = {}

+ for key in set(one.keys() + two.keys()):

+ one_value = one.get(key)

+ two_value = two.get(key)

+ reduction[key] = reducer(one_value, two_value)

+ return reduction

+ return merge_dicts

+def merge_regression_ranges(alerts):

+ def make_compare(compare_func):

+ def compare(one, two):

+ if one and two and compare_func(one, two):

+ return one

+ # Treat None is 'infinite' so always return the more limiting.

+ # FIXME: 'passing' may wish to prefer None over a value?

+ return two or one

+ return compare

+ # These don't handle the case where commits can't be compared (git branches)

+ younger_commit = make_compare(is_decendant_of)

+ passing_dicts = map(operator.itemgetter('passing_revisions'), alerts)

+ last_passing = reduce(_make_merge_dicts(younger_commit), passing_dicts)

+ older_commit = make_compare(is_ancestor_of)

+ failing_dicts = map(operator.itemgetter('failing_revisions'), alerts)

+ first_failing = reduce(_make_merge_dicts(older_commit), failing_dicts)

+ return last_passing, first_failing

+def reason_key_for_alert(alert):

+ # FIXME: May need something smarter for reason_key.

+ reason_key = alert['step_name']

+ if alert['reason']:

+ reason_key += ':%s' % alert['reason']

+ else:

+ # If we don't understand the alert, just make it builder-unique.

+ reason_key += ':%s' % alert['builder_name']

+ return reason_key

+def group_by_reason(alerts):

+ by_reason = collections.defaultdict(list)

+ for alert in alerts:

+ by_reason[reason_key_for_alert(alert)].append(alert)

+ reason_groups = []

+ for reason_key, alerts in by_reason.items():

+ last_passing, first_failing = merge_regression_ranges(alerts)

+ blame_list = flatten_to_commit_list(last_passing, first_failing)

+ # FIXME: blame_list isn't filtered yet, but should be.

+ reason_groups.append({

+ 'sort_key': reason_key,

+ 'merged_last_passing': last_passing,

+ 'merged_first_failing': first_failing,

+ 'likely_revisions': blame_list,

+ 'failure_keys': map(operator.itemgetter('key'), alerts),

+ })

+ return reason_groups

+# http://stackoverflow.com/questions/18715688/find-common-substring-between-two-strings

+def longestSubstringFinder(string1, string2):

ojan 2014/07/22 02:01:23 s/camelCase/camel_case

+ answer = ""

+ len1, len2 = len(string1), len(string2)

+ for i in range(len1):

+ match = ""

+ for j in range(len2):

+ if (i + j < len1 and string1[i + j] == string2[j]):

+ match += string2[j]

+ else:

+ if (len(match) > len(answer)): answer = match

+ match = ""

+ return answer

+def range_key_for_group(group):

+ last_passing = group['merged_last_passing']

+ first_failing = group['merged_first_failing']

+ if last_passing:

+ range_key = ' '.join(flatten_to_commit_list(last_passing, first_failing))

+ else:

+ # Even regressions where we don't know when they started can be

+ # merged by our earliest known failure.

+ parts = ['<=%s:%s' % (name, commit) for name, commit in first_failing.items()]

+ range_key = ' '.join(parts)

+ # sort_key is a heuristic to avoid merging failiures like

+ # gclient revert + webkit_tests which just happened to pull

+ # exact matching revisions when failing.

ojan 2014/07/22 02:01:23 I'm not sure it's wrong to group these together in

+ return group['sort_key'][:3] + range_key

+def merge_by_range(reason_groups):

+ if not reason_groups:

+ return []

+ expected_keys = sorted(reason_groups[0].keys())

+ by_range = {}

+ for group in reason_groups:

+ range_key = range_key_for_group(group)

+ existing = by_range.get(range_key)

+ if not existing:

+ # Shallow copy of group.

+ by_range[range_key] = dict(group)

+ continue

+ # FIXME: It's possible we don't want to merge two keys with nothing in common.

+ # e.g. bot_update and

+ # We only care about these two keys, the rest should be the same between all groups.

+ # I guess we could assert that...

+ by_range[range_key].update({

+ 'sort_key': longestSubstringFinder(existing['sort_key'], group['sort_key']),

+ 'failure_keys': sorted(set(existing['failure_keys'] + group['failure_keys'])),

+ })

+ return sorted(by_range.values(), key=operator.itemgetter('sort_key'))

« Tools/AutoSheriff/README.md ('K') | « Tools/AutoSheriff/README.md ('k') | Tools/AutoSheriff/analysis_unittest.py » ('j') | Tools/AutoSheriff/analysis_unittest.py » ('J')