Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import json | 5 import json |
| 6 import math | |
| 6 import tempfile | 7 import tempfile |
| 7 import os | 8 import os |
| 8 import uuid | 9 import uuid |
| 9 | 10 |
| 10 from . import revision_state | 11 from . import revision_state |
| 11 | 12 |
| 12 if 'CACHE_TEST_RESULTS' in os.environ: # pragma: no cover | 13 if 'CACHE_TEST_RESULTS' in os.environ: # pragma: no cover |
| 13 from . import test_results_cache | 14 from . import test_results_cache |
| 14 | 15 |
| 15 | 16 |
| 16 class PerfRevisionState(revision_state.RevisionState): | 17 class PerfRevisionState(revision_state.RevisionState): |
| 17 | 18 |
| 18 """Contains the state and results for one revision in a perf bisect job.""" | 19 """Contains the state and results for one revision in a perf bisect job.""" |
| 19 | 20 |
| 20 def __init__(self, *args, **kwargs): | 21 def __init__(self, *args, **kwargs): |
| 21 super(PerfRevisionState, self).__init__(*args, **kwargs) | 22 super(PerfRevisionState, self).__init__(*args, **kwargs) |
| 22 self.values = [] | 23 self.values = [] |
| 23 self.mean_value = None | 24 self.mean_value = None |
| 24 self.std_dev = None | 25 self.std_dev = None |
| 26 self.custom_reps = None | |
| 25 self._test_config = None | 27 self._test_config = None |
| 26 | 28 |
| 27 def _read_test_results(self): | 29 def _read_test_results(self, check_revision_goodness=True): |
| 28 """Gets the test results from GS and checks if the rev is good or bad.""" | 30 """Gets the test results from GS and checks if the rev is good or bad.""" |
| 29 test_results = self._get_test_results() | 31 test_results = self._get_test_results() |
| 30 # Results will contain the keys 'results' and 'output' where output is the | 32 # Results will contain the keys 'results' and 'output' where output is the |
| 31 # stdout of the command, and 'results' is itself a dict with the key | 33 # stdout of the command, and 'results' is itself a dict with the key |
| 32 # 'values' unless the test failed, in which case 'results' will contain | 34 # 'values' unless the test failed, in which case 'results' will contain |
| 33 # the 'error' key explaining the type of error. | 35 # the 'error' key explaining the type of error. |
| 34 results = test_results['results'] | 36 results = test_results['results'] |
| 35 if results.get('errors'): | 37 if results.get('errors'): |
| 36 self.status = PerfRevisionState.FAILED | 38 self.status = PerfRevisionState.FAILED |
| 37 if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover | 39 if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover |
| 38 self.bisector.surface_result('MISSING_METRIC') | 40 self.bisector.surface_result('MISSING_METRIC') |
| 39 return | 41 return |
| 40 self.values = results['values'] | 42 self.values = self.values or [] + results['values'] |
|
qyearsley
2016/01/26 19:26:00
`or` is less tightly binding than `+`, so this is
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 41 if self.bisector.is_return_code_mode(): | 43 if self.bisector.is_return_code_mode(): |
| 42 retcodes = test_results['retcodes'] | 44 retcodes = test_results['retcodes'] |
| 43 overall_return_code = 0 if all(v == 0 for v in retcodes) else 1 | 45 overall_return_code = 0 if all(v == 0 for v in retcodes) else 1 |
| 44 self.mean_value = overall_return_code | 46 self.mean_value = overall_return_code |
| 45 elif self.values: | 47 elif self.values: |
| 46 api = self.bisector.api | 48 api = self.bisector.api |
| 47 self.mean_value = api.m.math_utils.mean(self.values) | 49 self.mean_value = api.m.math_utils.mean(self.values) |
| 48 self.std_dev = api.m.math_utils.standard_deviation(self.values) | 50 self.std_dev = api.m.math_utils.standard_deviation(self.values) |
| 49 # Values were not found, but the test did not otherwise fail. | 51 # Values were not found, but the test did not otherwise fail. |
| 50 else: | 52 else: |
| 51 self.status = PerfRevisionState.FAILED | 53 self.status = PerfRevisionState.FAILED |
| 52 self.bisector.surface_result('MISSING_METRIC') | 54 self.bisector.surface_result('MISSING_METRIC') |
| 53 return | 55 return |
| 54 # We cannot test the goodness of the initial rev range. | 56 # We cannot test the goodness of the initial rev range. |
| 55 if self.bisector.good_rev != self and self.bisector.bad_rev != self: | 57 if (self.bisector.good_rev != self and self.bisector.bad_rev != self and |
| 58 check_revision_goodness): | |
| 56 if self._check_revision_good(): | 59 if self._check_revision_good(): |
| 57 self.good = True | 60 self.good = True |
| 58 else: | 61 else: |
| 59 self.bad = True | 62 self.bad = True |
| 60 | 63 |
| 61 def _write_deps_patch_file(self, build_name): | 64 def _write_deps_patch_file(self, build_name): |
| 62 api = self.bisector.api | 65 api = self.bisector.api |
| 63 file_name = str(api.m.path['tmp_base'].join(build_name + '.diff')) | 66 file_name = str(api.m.path['tmp_base'].join(build_name + '.diff')) |
| 64 api.m.file.write('Saving diff patch for ' + str(self.revision_string), | 67 api.m.file.write('Saving diff patch for ' + str(self.revision_string), |
| 65 file_name, self.deps_patch + self.deps_sha_patch) | 68 file_name, self.deps_patch + self.deps_sha_patch) |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 115 'metric', | 118 'metric', |
| 116 'max_time_minutes', | 119 'max_time_minutes', |
| 117 'command', | 120 'command', |
| 118 'repeat_count', | 121 'repeat_count', |
| 119 'test_type' | 122 'test_type' |
| 120 } | 123 } |
| 121 for k, v in self.bisector.bisect_config.iteritems(): | 124 for k, v in self.bisector.bisect_config.iteritems(): |
| 122 if k in required_test_properties: | 125 if k in required_test_properties: |
| 123 result[k] = v | 126 result[k] = v |
| 124 self._test_config = result | 127 self._test_config = result |
| 128 if self.custom_reps: # pragma: no cover | |
|
qyearsley
2016/01/26 19:26:00
Two spaces before #.
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 129 result['repeat_count'] = self.custom_reps | |
| 130 else: | |
| 131 # We need at the very least 3 data points. (e.g. that's the minimum for | |
| 132 # shapiro's test for normality [even though we don't really use it]) | |
|
qyearsley
2016/01/26 19:26:00
The parenthetical part of this comment could proba
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 133 result['repeat_count'] = max(int(result['repeat_count']), 3) | |
| 125 return result | 134 return result |
| 126 | 135 |
| 136 # It seems it should be okay to call this again to test the revision some | |
| 137 # more. | |
|
qyearsley
2016/01/26 19:26:00
I'm not sure whether this comment is helpful; mayb
RobertoCN
2016/02/01 17:29:50
Yep, I was supposed to remove it and forgot.
Done
| |
| 127 def _do_test(self): | 138 def _do_test(self): |
| 128 """Posts a request to buildbot to download and perf-test this build.""" | 139 """Posts a request to buildbot to download and perf-test this build.""" |
| 129 if self.bisector.dummy_builds: | 140 if self.bisector.dummy_builds: |
| 130 self.job_name = self.commit_hash + '-test' | 141 self.job_name = self.commit_hash + '-test' |
| 131 elif 'CACHE_TEST_RESULTS' in os.environ: # pragma: no cover | 142 elif 'CACHE_TEST_RESULTS' in os.environ: # pragma: no cover |
| 132 self.job_name = test_results_cache.make_id( | 143 self.job_name = test_results_cache.make_id( |
| 133 self.revision_string, self._get_bisect_config_for_tester()) | 144 self.revision_string, self._get_bisect_config_for_tester()) |
| 134 else: # pragma: no cover | 145 else: # pragma: no cover |
| 135 self.job_name = uuid.uuid4().hex | 146 self.job_name = uuid.uuid4().hex |
| 136 api = self.bisector.api | 147 api = self.bisector.api |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 172 builder = self.bisector.get_builder_bot_for_this_platform() | 183 builder = self.bisector.get_builder_bot_for_this_platform() |
| 173 if self.status == PerfRevisionState.TESTING: | 184 if self.status == PerfRevisionState.TESTING: |
| 174 builder = self.bisector.get_perf_tester_name() | 185 builder = self.bisector.get_perf_tester_name() |
| 175 return { | 186 return { |
| 176 'type': 'buildbot', | 187 'type': 'buildbot', |
| 177 'master': master, | 188 'master': master, |
| 178 'builder': builder, | 189 'builder': builder, |
| 179 'job_name': self.job_name, | 190 'job_name': self.job_name, |
| 180 } | 191 } |
| 181 | 192 |
| 193 def retest(self, target_sample_size): # pragma: no cover | |
| 194 self.status = PerfRevisionState.NEED_MORE_DATA | |
| 195 self.custom_reps = target_sample_size - len(self.values) | |
| 196 self.start_job() | |
| 197 self.bisector.wait_for_any([self]) | |
|
qyearsley
2016/01/26 19:26:00
This triggers the bisect to run another |self.cust
RobertoCN
2016/02/01 17:29:50
Yeah.
| |
| 198 | |
| 182 def _get_test_results(self): | 199 def _get_test_results(self): |
| 183 """Tries to get the results of a test job from cloud storage.""" | 200 """Tries to get the results of a test job from cloud storage.""" |
| 184 api = self.bisector.api | 201 api = self.bisector.api |
| 185 try: | 202 try: |
| 186 stdout = api.m.raw_io.output() | 203 stdout = api.m.raw_io.output() |
| 187 name = 'Get test results for build ' + self.commit_hash | 204 name = 'Get test results for build ' + self.commit_hash |
| 188 step_result = api.m.gsutil.cat(self.test_results_url, stdout=stdout, | 205 step_result = api.m.gsutil.cat(self.test_results_url, stdout=stdout, |
| 189 name=name) | 206 name=name) |
| 190 except api.m.step.StepFailure: # pragma: no cover | 207 except api.m.step.StepFailure: # pragma: no cover |
| 191 self.bisector.surface_result('TEST_FAILURE') | 208 self.bisector.surface_result('TEST_FAILURE') |
| 192 return None | 209 return None |
| 193 else: | 210 else: |
| 194 return json.loads(step_result.stdout) | 211 return json.loads(step_result.stdout) |
| 195 | 212 |
| 213 def _calculate_target_sample_size(self): # pragma: no cover | |
| 214 # Make the smallest bigger than the largest, by increasing it at least 33% | |
| 215 sizes = map(lambda x: len(x.values), [self.bisector.lkgr, self, | |
|
qyearsley
2016/01/26 19:26:00
Extra space after the comma.
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 216 self.bisector.fkbr]) | |
| 217 smallest_sample_size = min(sizes) | |
| 218 target_sample_size = max(sizes) + 1 | |
| 219 delta = target_sample_size - smallest_sample_size | |
|
dtu
2016/02/05 00:18:01
delta is unused.
| |
| 220 min_increase = math.ceil(smallest_sample_size/3.0) | |
|
qyearsley
2016/01/26 19:26:00
Optional: could add spaces around the / operator.
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 221 return max(smallest_sample_size + min_increase, target_sample_size) | |
| 222 | |
| 196 def _check_revision_good(self): | 223 def _check_revision_good(self): |
|
qyearsley
2016/01/26 19:26:00
Parts of the previous docstring could be kept here
RobertoCN
2016/02/01 17:29:51
Done.
| |
| 197 """Determines if a revision is good or bad. | 224 diff_from_good = self.bisector.significantly_different( |
| 198 | 225 self.bisector.lkgr.values, self.values) |
| 199 Note that our current approach is to determine whether it is closer to | 226 diff_from_bad = self.bisector.significantly_different( |
| 200 either the 'good' and 'bad' revisions given for the bisect job. | 227 self.bisector.fkbr.values, self.values) |
| 201 | 228 while not (diff_from_good or diff_from_bad): # pragma: no cover |
| 202 Returns: | 229 target_sample_size = self._calculate_target_sample_size() |
| 203 True if this revision is closer to the initial good revision's value than | 230 min(self.bisector.lkgr, self, self.bisector.fkbr, |
| 204 to the initial bad revision's value. False otherwise. | 231 key=lambda x: len(x.values)).retest(target_sample_size) |
| 205 """ | 232 diff_from_good = self.bisector.significantly_different( |
| 206 # TODO: Reevaluate this approach | 233 self.bisector.lkgr.values, self.values) |
| 207 bisector = self.bisector | 234 diff_from_bad = self.bisector.significantly_different( |
| 208 distance_to_good = abs(self.mean_value - bisector.good_rev.mean_value) | 235 self.bisector.fkbr.values, self.values) |
| 209 distance_to_bad = abs(self.mean_value - bisector.bad_rev.mean_value) | 236 if diff_from_good and diff_from_bad: |
|
qyearsley
2016/01/26 19:26:00
A blank line above this if block might slightly he
RobertoCN
2016/02/01 17:29:50
Done.
| |
| 210 if distance_to_good < distance_to_bad: | 237 # Multiple regressions. |
| 211 return True | 238 # For now, proceed bisecting the biggest difference of the means, and |
| 212 return False | 239 # handle the secondary regression by spinning a second job. |
|
qyearsley
2016/01/26 19:26:00
No second job is started as of this CL, right?
RobertoCN
2016/02/01 17:29:50
Correct, for now we'll go with the larger regressi
| |
| 240 dist_from_good = abs(self.mean_value - self.bisector.lkgr.mean_value) | |
| 241 dist_from_bad = abs(self.mean_value - self.bisector.fkbr.mean_value) | |
| 242 if dist_from_good > dist_from_bad: | |
| 243 #self.bisector.handle_secondary_regression(self, self.bisector.fkbr) | |
| 244 return False | |
| 245 else: | |
| 246 #self.bisector.handle_secondary_regression(self.bisector.lkgr, self) | |
| 247 return True | |
| 248 return diff_from_bad # pragma: no cover | |
| 213 | 249 |
| 214 def __repr__(self): | 250 def __repr__(self): |
| 215 return ('PerfRevisionState(cp=%s, values=%r, mean_value=%r, std_dev=%r)' % | 251 return ('PerfRevisionState(cp=%s, values=%r, mean_value=%r, std_dev=%r)' % |
| 216 (self.commit_pos, self.values, self.mean_value, self.std_dev)) | 252 (self.commit_pos, self.values, self.mean_value, self.std_dev)) |
| OLD | NEW |