Chromium Code Reviews| Index: Tools/AutoSheriff/analysis.py |
| diff --git a/Tools/AutoSheriff/analysis.py b/Tools/AutoSheriff/analysis.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..6d8fa930895c7b44417c9142f22aba5d0e036877 |
| --- /dev/null |
| +++ b/Tools/AutoSheriff/analysis.py |
| @@ -0,0 +1,175 @@ |
| +# Copyright 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import json |
| +import collections |
| +import operator |
| + |
| + |
| +# Git ready, but only implemented for SVN atm. |
|
ojan
2014/07/22 02:01:23
I think we should only operate on monotonically in
|
| +def ids_after_first_including_second(first, second): |
| + if not first or not second: |
| + return [] |
| + try: |
| + return range(int(first) + 1, int(second) + 1) |
| + except ValueError, e: |
| + # likely passed a git hash |
| + return [] |
| + |
| + |
| +# Git ready, but only implemented for SVN atm. |
| +def is_ancestor_of(older, younger): |
| + return int(older) < int(younger) |
| + |
| + |
| +def is_decendant_of(younger, older): |
| + return is_ancestor_of(older, younger) |
| + |
| + |
| +def commit_compare(one, two): |
| + if is_decendant_of(one, two): |
| + return -1 |
| + if is_ancestor_of(one, two): |
| + return 1 |
| + return 0 # This is technically not right, since commits can be non-comparable. |
| + |
| + |
| +def flatten_to_commit_list(passing, failing): |
| + # Flatten two commit dicts to a list of 'name:commit' |
| + if not passing or not failing: |
| + return [] |
| + all_commits = [] |
| + for name in passing.keys(): |
| + commits = ids_after_first_including_second(passing[name], failing[name]) |
| + all_commits.extend(['%s:%s' % (name, commit) for commit in commits]) |
| + return all_commits |
| + |
| + |
| +# FIXME: Perhaps this should be done by the feeder? |
| +def assign_keys(alerts): |
| + for key, alert in enumerate(alerts): |
| + # We could come up with something more sophisticated if necessary. |
| + alert['key'] = 'f%s' % key # Just something so it doesn't look like a number. |
| + return alerts |
| + |
| + |
| +def _make_merge_dicts(reducer): |
| + def merge_dicts(one, two): |
| + if not one or not two: |
| + return None |
| + reduction = {} |
| + for key in set(one.keys() + two.keys()): |
| + one_value = one.get(key) |
| + two_value = two.get(key) |
| + reduction[key] = reducer(one_value, two_value) |
| + return reduction |
| + return merge_dicts |
| + |
| + |
| +def merge_regression_ranges(alerts): |
| + def make_compare(compare_func): |
| + def compare(one, two): |
| + if one and two and compare_func(one, two): |
| + return one |
| + # Treat None is 'infinite' so always return the more limiting. |
| + # FIXME: 'passing' may wish to prefer None over a value? |
| + return two or one |
| + return compare |
| + |
| + # These don't handle the case where commits can't be compared (git branches) |
| + younger_commit = make_compare(is_decendant_of) |
| + passing_dicts = map(operator.itemgetter('passing_revisions'), alerts) |
| + last_passing = reduce(_make_merge_dicts(younger_commit), passing_dicts) |
| + |
| + older_commit = make_compare(is_ancestor_of) |
| + failing_dicts = map(operator.itemgetter('failing_revisions'), alerts) |
| + first_failing = reduce(_make_merge_dicts(older_commit), failing_dicts) |
| + |
| + return last_passing, first_failing |
| + |
| + |
| +def reason_key_for_alert(alert): |
| + # FIXME: May need something smarter for reason_key. |
| + reason_key = alert['step_name'] |
| + if alert['reason']: |
| + reason_key += ':%s' % alert['reason'] |
| + else: |
| + # If we don't understand the alert, just make it builder-unique. |
| + reason_key += ':%s' % alert['builder_name'] |
| + return reason_key |
| + |
| + |
| +def group_by_reason(alerts): |
| + by_reason = collections.defaultdict(list) |
| + for alert in alerts: |
| + by_reason[reason_key_for_alert(alert)].append(alert) |
| + |
| + reason_groups = [] |
| + for reason_key, alerts in by_reason.items(): |
| + last_passing, first_failing = merge_regression_ranges(alerts) |
| + blame_list = flatten_to_commit_list(last_passing, first_failing) |
| + # FIXME: blame_list isn't filtered yet, but should be. |
| + reason_groups.append({ |
| + 'sort_key': reason_key, |
| + 'merged_last_passing': last_passing, |
| + 'merged_first_failing': first_failing, |
| + 'likely_revisions': blame_list, |
| + 'failure_keys': map(operator.itemgetter('key'), alerts), |
| + }) |
| + return reason_groups |
| + |
| +# http://stackoverflow.com/questions/18715688/find-common-substring-between-two-strings |
| +def longestSubstringFinder(string1, string2): |
|
ojan
2014/07/22 02:01:23
s/camelCase/camel_case
|
| + answer = "" |
| + len1, len2 = len(string1), len(string2) |
| + for i in range(len1): |
| + match = "" |
| + for j in range(len2): |
| + if (i + j < len1 and string1[i + j] == string2[j]): |
| + match += string2[j] |
| + else: |
| + if (len(match) > len(answer)): answer = match |
| + match = "" |
| + return answer |
| + |
| + |
| +def range_key_for_group(group): |
| + last_passing = group['merged_last_passing'] |
| + first_failing = group['merged_first_failing'] |
| + if last_passing: |
| + range_key = ' '.join(flatten_to_commit_list(last_passing, first_failing)) |
| + else: |
| + # Even regressions where we don't know when they started can be |
| + # merged by our earliest known failure. |
| + parts = ['<=%s:%s' % (name, commit) for name, commit in first_failing.items()] |
| + range_key = ' '.join(parts) |
| + # sort_key is a heuristic to avoid merging failiures like |
| + # gclient revert + webkit_tests which just happened to pull |
| + # exact matching revisions when failing. |
|
ojan
2014/07/22 02:01:23
I'm not sure it's wrong to group these together in
|
| + return group['sort_key'][:3] + range_key |
| + |
| + |
| +def merge_by_range(reason_groups): |
| + if not reason_groups: |
| + return [] |
| + expected_keys = sorted(reason_groups[0].keys()) |
| + by_range = {} |
| + for group in reason_groups: |
| + range_key = range_key_for_group(group) |
| + existing = by_range.get(range_key) |
| + if not existing: |
| + # Shallow copy of group. |
| + by_range[range_key] = dict(group) |
| + continue |
| + |
| + # FIXME: It's possible we don't want to merge two keys with nothing in common. |
| + # e.g. bot_update and |
| + # We only care about these two keys, the rest should be the same between all groups. |
| + # I guess we could assert that... |
| + by_range[range_key].update({ |
| + 'sort_key': longestSubstringFinder(existing['sort_key'], group['sort_key']), |
| + 'failure_keys': sorted(set(existing['failure_keys'] + group['failure_keys'])), |
| + }) |
| + |
| + return sorted(by_range.values(), key=operator.itemgetter('sort_key')) |