Tools/AutoSheriff/analysis.py - Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink

Side by Side Diff: Tools/AutoSheriff/analysis.py

Issue 398823008: WIP: Add auto-sheriff.appspot.com code to Blink Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2014 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import json

	6 import collections

	7 import operator

	8

	9

	10 # Git ready, but only implemented for SVN atm.
	ojan 2014/07/22 02:01:23 I think we should only operate on monotonically in I think we should only operate on monotonically increasing integer values. My understanding is that even after the git migration, we'll have such a value for each commit. We should make it the job of the infrastructure that this is built upon to expose those values in addition to or instead of the git hash.
	11 def ids_after_first_including_second(first, second):

	12 if not first or not second:

	13 return []

	14 try:

	15 return range(int(first) + 1, int(second) + 1)

	16 except ValueError, e:

	17 # likely passed a git hash

	18 return []

	19

	20

	21 # Git ready, but only implemented for SVN atm.

	22 def is_ancestor_of(older, younger):

	23 return int(older) < int(younger)

	24

	25

	26 def is_decendant_of(younger, older):

	27 return is_ancestor_of(older, younger)

	28

	29

	30 def commit_compare(one, two):

	31 if is_decendant_of(one, two):

	32 return -1

	33 if is_ancestor_of(one, two):

	34 return 1

	35 return 0 # This is technically not right, since commits can be non-comparabl e.

	36

	37

	38 def flatten_to_commit_list(passing, failing):

	39 # Flatten two commit dicts to a list of 'name:commit'

	40 if not passing or not failing:

	41 return []

	42 all_commits = []

	43 for name in passing.keys():

	44 commits = ids_after_first_including_second(passing[name], failing[name])

	45 all_commits.extend(['%s:%s' % (name, commit) for commit in commits])

	46 return all_commits

	47

	48

	49 # FIXME: Perhaps this should be done by the feeder?

	50 def assign_keys(alerts):

	51 for key, alert in enumerate(alerts):

	52 # We could come up with something more sophisticated if necessary.

	53 alert['key'] = 'f%s' % key # Just something so it doesn't look like a nu mber.

	54 return alerts

	55

	56

	57 def _make_merge_dicts(reducer):

	58 def merge_dicts(one, two):

	59 if not one or not two:

	60 return None

	61 reduction = {}

	62 for key in set(one.keys() + two.keys()):

	63 one_value = one.get(key)

	64 two_value = two.get(key)

	65 reduction[key] = reducer(one_value, two_value)

	66 return reduction

	67 return merge_dicts

	68

	69

	70 def merge_regression_ranges(alerts):

	71 def make_compare(compare_func):

	72 def compare(one, two):

	73 if one and two and compare_func(one, two):

	74 return one

	75 # Treat None is 'infinite' so always return the more limiting.

	76 # FIXME: 'passing' may wish to prefer None over a value?

	77 return two or one

	78 return compare

	79

	80 # These don't handle the case where commits can't be compared (git branches)

	81 younger_commit = make_compare(is_decendant_of)

	82 passing_dicts = map(operator.itemgetter('passing_revisions'), alerts)

	83 last_passing = reduce(_make_merge_dicts(younger_commit), passing_dicts)

	84

	85 older_commit = make_compare(is_ancestor_of)

	86 failing_dicts = map(operator.itemgetter('failing_revisions'), alerts)

	87 first_failing = reduce(_make_merge_dicts(older_commit), failing_dicts)

	88

	89 return last_passing, first_failing

	90

	91

	92 def reason_key_for_alert(alert):

	93 # FIXME: May need something smarter for reason_key.

	94 reason_key = alert['step_name']

	95 if alert['reason']:

	96 reason_key += ':%s' % alert['reason']

	97 else:

	98 # If we don't understand the alert, just make it builder-unique.

	99 reason_key += ':%s' % alert['builder_name']

	100 return reason_key

	101

	102

	103 def group_by_reason(alerts):

	104 by_reason = collections.defaultdict(list)

	105 for alert in alerts:

	106 by_reason[reason_key_for_alert(alert)].append(alert)

	107

	108 reason_groups = []

	109 for reason_key, alerts in by_reason.items():

	110 last_passing, first_failing = merge_regression_ranges(alerts)

	111 blame_list = flatten_to_commit_list(last_passing, first_failing)

	112 # FIXME: blame_list isn't filtered yet, but should be.

	113 reason_groups.append({

	114 'sort_key': reason_key,

	115 'merged_last_passing': last_passing,

	116 'merged_first_failing': first_failing,

	117 'likely_revisions': blame_list,

	118 'failure_keys': map(operator.itemgetter('key'), alerts),

	119 })

	120 return reason_groups

	121

	122 # http://stackoverflow.com/questions/18715688/find-common-substring-between-two- strings

	123 def longestSubstringFinder(string1, string2):
	ojan 2014/07/22 02:01:23 s/camelCase/camel_case s/camelCase/camel_case
	124 answer = ""

	125 len1, len2 = len(string1), len(string2)

	126 for i in range(len1):

	127 match = ""

	128 for j in range(len2):

	129 if (i + j < len1 and string1[i + j] == string2[j]):

	130 match += string2[j]

	131 else:

	132 if (len(match) > len(answer)): answer = match

	133 match = ""

	134 return answer

	135

	136

	137 def range_key_for_group(group):

	138 last_passing = group['merged_last_passing']

	139 first_failing = group['merged_first_failing']

	140 if last_passing:

	141 range_key = ' '.join(flatten_to_commit_list(last_passing, first_failing) )

	142 else:

	143 # Even regressions where we don't know when they started can be

	144 # merged by our earliest known failure.

	145 parts = ['<=%s:%s' % (name, commit) for name, commit in first_failing.it ems()]

	146 range_key = ' '.join(parts)

	147 # sort_key is a heuristic to avoid merging failiures like

	148 # gclient revert + webkit_tests which just happened to pull

	149 # exact matching revisions when failing.
	ojan 2014/07/22 02:01:23 I'm not sure it's wrong to group these together in I'm not sure it's wrong to group these together in this case. They have the same regression range. I guess you eventually want to be able to associate a "note" with each group for when the sheriff wants to annotate, in which case, not grouping makes sense.
	150 return group['sort_key'][:3] + range_key

	151

	152

	153 def merge_by_range(reason_groups):

	154 if not reason_groups:

	155 return []

	156 expected_keys = sorted(reason_groups[0].keys())

	157 by_range = {}

	158 for group in reason_groups:

	159 range_key = range_key_for_group(group)

	160 existing = by_range.get(range_key)

	161 if not existing:

	162 # Shallow copy of group.

	163 by_range[range_key] = dict(group)

	164 continue

	165

	166 # FIXME: It's possible we don't want to merge two keys with nothing in c ommon.

	167 # e.g. bot_update and

	168 # We only care about these two keys, the rest should be the same between all groups.

	169 # I guess we could assert that...

	170 by_range[range_key].update({

	171 'sort_key': longestSubstringFinder(existing['sort_key'], group['sort _key']),

	172 'failure_keys': sorted(set(existing['failure_keys'] + group['failure _keys'])),

	173 })

	174

	175 return sorted(by_range.values(), key=operator.itemgetter('sort_key'))

OLD	NEW

« Tools/AutoSheriff/README.md ('K') | « Tools/AutoSheriff/README.md ('k') | Tools/AutoSheriff/analysis_unittest.py » ('j') | Tools/AutoSheriff/analysis_unittest.py » ('J')