Chromium Code Reviews| Index: scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py |
| diff --git a/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py b/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py |
| index 41529e8b93566add7fc08abbeb90f8094497ff8c..62cc076ce9594976482d0c8bed4ca1003aa2c4af 100644 |
| --- a/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py |
| +++ b/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py |
| @@ -3,6 +3,7 @@ |
| # found in the LICENSE file. |
| import json |
| +import math |
| import tempfile |
| import os |
| import uuid |
| @@ -22,9 +23,10 @@ class PerfRevisionState(revision_state.RevisionState): |
| self.values = [] |
| self.mean_value = None |
| self.std_dev = None |
| + self.custom_reps = None |
| self._test_config = None |
| - def _read_test_results(self): |
| + def _read_test_results(self, check_revision_goodness=True): |
| """Gets the test results from GS and checks if the rev is good or bad.""" |
| test_results = self._get_test_results() |
| # Results will contain the keys 'results' and 'output' where output is the |
| @@ -37,7 +39,7 @@ class PerfRevisionState(revision_state.RevisionState): |
| if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover |
| self.bisector.surface_result('MISSING_METRIC') |
| return |
| - self.values = results['values'] |
| + self.values = self.values or [] + results['values'] |
|
qyearsley
2016/01/26 19:26:00
`or` is less tightly binding than `+`, so this is
RobertoCN
2016/02/01 17:29:50
Done.
|
| if self.bisector.is_return_code_mode(): |
| retcodes = test_results['retcodes'] |
| overall_return_code = 0 if all(v == 0 for v in retcodes) else 1 |
| @@ -52,7 +54,8 @@ class PerfRevisionState(revision_state.RevisionState): |
| self.bisector.surface_result('MISSING_METRIC') |
| return |
| # We cannot test the goodness of the initial rev range. |
| - if self.bisector.good_rev != self and self.bisector.bad_rev != self: |
| + if (self.bisector.good_rev != self and self.bisector.bad_rev != self and |
| + check_revision_goodness): |
| if self._check_revision_good(): |
| self.good = True |
| else: |
| @@ -122,8 +125,16 @@ class PerfRevisionState(revision_state.RevisionState): |
| if k in required_test_properties: |
| result[k] = v |
| self._test_config = result |
| + if self.custom_reps: # pragma: no cover |
|
qyearsley
2016/01/26 19:26:00
Two spaces before #.
RobertoCN
2016/02/01 17:29:50
Done.
|
| + result['repeat_count'] = self.custom_reps |
| + else: |
| + # We need at the very least 3 data points. (e.g. that's the minimum for |
| + # shapiro's test for normality [even though we don't really use it]) |
|
qyearsley
2016/01/26 19:26:00
The parenthetical part of this comment could proba
RobertoCN
2016/02/01 17:29:50
Done.
|
| + result['repeat_count'] = max(int(result['repeat_count']), 3) |
| return result |
| + # It seems it should be okay to call this again to test the revision some |
| + # more. |
|
qyearsley
2016/01/26 19:26:00
I'm not sure whether this comment is helpful; mayb
RobertoCN
2016/02/01 17:29:50
Yep, I was supposed to remove it and forgot.
Done
|
| def _do_test(self): |
| """Posts a request to buildbot to download and perf-test this build.""" |
| if self.bisector.dummy_builds: |
| @@ -179,6 +190,12 @@ class PerfRevisionState(revision_state.RevisionState): |
| 'job_name': self.job_name, |
| } |
| + def retest(self, target_sample_size): # pragma: no cover |
| + self.status = PerfRevisionState.NEED_MORE_DATA |
| + self.custom_reps = target_sample_size - len(self.values) |
| + self.start_job() |
| + self.bisector.wait_for_any([self]) |
|
qyearsley
2016/01/26 19:26:00
This triggers the bisect to run another |self.cust
RobertoCN
2016/02/01 17:29:50
Yeah.
|
| + |
| def _get_test_results(self): |
| """Tries to get the results of a test job from cloud storage.""" |
| api = self.bisector.api |
| @@ -193,23 +210,42 @@ class PerfRevisionState(revision_state.RevisionState): |
| else: |
| return json.loads(step_result.stdout) |
| + def _calculate_target_sample_size(self): # pragma: no cover |
| + # Make the smallest bigger than the largest, by increasing it at least 33% |
| + sizes = map(lambda x: len(x.values), [self.bisector.lkgr, self, |
|
qyearsley
2016/01/26 19:26:00
Extra space after the comma.
RobertoCN
2016/02/01 17:29:50
Done.
|
| + self.bisector.fkbr]) |
| + smallest_sample_size = min(sizes) |
| + target_sample_size = max(sizes) + 1 |
| + delta = target_sample_size - smallest_sample_size |
|
dtu
2016/02/05 00:18:01
delta is unused.
|
| + min_increase = math.ceil(smallest_sample_size/3.0) |
|
qyearsley
2016/01/26 19:26:00
Optional: could add spaces around the / operator.
RobertoCN
2016/02/01 17:29:50
Done.
|
| + return max(smallest_sample_size + min_increase, target_sample_size) |
| + |
| def _check_revision_good(self): |
|
qyearsley
2016/01/26 19:26:00
Parts of the previous docstring could be kept here
RobertoCN
2016/02/01 17:29:51
Done.
|
| - """Determines if a revision is good or bad. |
| - |
| - Note that our current approach is to determine whether it is closer to |
| - either the 'good' and 'bad' revisions given for the bisect job. |
| - |
| - Returns: |
| - True if this revision is closer to the initial good revision's value than |
| - to the initial bad revision's value. False otherwise. |
| - """ |
| - # TODO: Reevaluate this approach |
| - bisector = self.bisector |
| - distance_to_good = abs(self.mean_value - bisector.good_rev.mean_value) |
| - distance_to_bad = abs(self.mean_value - bisector.bad_rev.mean_value) |
| - if distance_to_good < distance_to_bad: |
| - return True |
| - return False |
| + diff_from_good = self.bisector.significantly_different( |
| + self.bisector.lkgr.values, self.values) |
| + diff_from_bad = self.bisector.significantly_different( |
| + self.bisector.fkbr.values, self.values) |
| + while not (diff_from_good or diff_from_bad): # pragma: no cover |
| + target_sample_size = self._calculate_target_sample_size() |
| + min(self.bisector.lkgr, self, self.bisector.fkbr, |
| + key=lambda x: len(x.values)).retest(target_sample_size) |
| + diff_from_good = self.bisector.significantly_different( |
| + self.bisector.lkgr.values, self.values) |
| + diff_from_bad = self.bisector.significantly_different( |
| + self.bisector.fkbr.values, self.values) |
| + if diff_from_good and diff_from_bad: |
|
qyearsley
2016/01/26 19:26:00
A blank line above this if block might slightly he
RobertoCN
2016/02/01 17:29:50
Done.
|
| + # Multiple regressions. |
| + # For now, proceed bisecting the biggest difference of the means, and |
| + # handle the secondary regression by spinning a second job. |
|
qyearsley
2016/01/26 19:26:00
No second job is started as of this CL, right?
RobertoCN
2016/02/01 17:29:50
Correct, for now we'll go with the larger regressi
|
| + dist_from_good = abs(self.mean_value - self.bisector.lkgr.mean_value) |
| + dist_from_bad = abs(self.mean_value - self.bisector.fkbr.mean_value) |
| + if dist_from_good > dist_from_bad: |
| + #self.bisector.handle_secondary_regression(self, self.bisector.fkbr) |
| + return False |
| + else: |
| + #self.bisector.handle_secondary_regression(self.bisector.lkgr, self) |
| + return True |
| + return diff_from_bad # pragma: no cover |
| def __repr__(self): |
| return ('PerfRevisionState(cp=%s, values=%r, mean_value=%r, std_dev=%r)' % |