Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import json | 5 import json |
| 6 import re | 6 import re |
| 7 import time | 7 import time |
| 8 import urllib | 8 import urllib |
| 9 | 9 |
| 10 from . import config_validation | 10 from . import config_validation |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 40 ) | 40 ) |
| 41 | 41 |
| 42 # When we look for the next revision to build, we search nearby revisions | 42 # When we look for the next revision to build, we search nearby revisions |
| 43 # looking for a revision that's already been archived. Since we don't want | 43 # looking for a revision that's already been archived. Since we don't want |
| 44 # to move *too* far from the original revision, we'll cap the search at 25%. | 44 # to move *too* far from the original revision, we'll cap the search at 25%. |
| 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 | 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 |
| 46 | 46 |
| 47 # How long to re-test the initial good-bad range for until significant | 47 # How long to re-test the initial good-bad range for until significant |
| 48 # difference is established. | 48 # difference is established. |
| 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 | 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 |
| 50 # If we reach this number of samples on the reference range and have not | |
| 51 # achieved statistical significance, bail. | |
| 52 MAX_REQUIRED_SAMPLES = 15 | |
| 53 | |
| 54 # Significance level to use for determining difference between revisions via | |
| 55 # hypothesis testing. | |
| 56 SIGNIFICANCE_LEVEL = 0.01 | |
| 57 | 50 |
| 58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( | 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( |
| 59 'The metric values for the initial "good" and "bad" revisions ' | 52 'The metric values for the initial "good" and "bad" revisions ' |
| 60 'do not represent a clear regression.') | 53 'do not represent a clear regression.') |
| 61 | 54 |
| 62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( | 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( |
| 63 'The metric values for the initial "good" and "bad" revisions match the ' | 56 'The metric values for the initial "good" and "bad" revisions match the ' |
| 64 'expected direction of improvement. Thus, likely represent an improvement ' | 57 'expected direction of improvement. Thus, likely represent an improvement ' |
| 65 'and not a regression.') | 58 'and not a regression.') |
| 66 | 59 |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 148 | 141 |
| 149 Returns: | 142 Returns: |
| 150 A 40-digit git commit hash string. | 143 A 40-digit git commit hash string. |
| 151 """ | 144 """ |
| 152 if self._is_sha1(rev): # pragma: no cover | 145 if self._is_sha1(rev): # pragma: no cover |
| 153 return rev | 146 return rev |
| 154 if rev.isdigit(): | 147 if rev.isdigit(): |
| 155 commit_position = self._api.m.commit_position.construct( | 148 commit_position = self._api.m.commit_position.construct( |
| 156 branch='refs/heads/master', value=rev) | 149 branch='refs/heads/master', value=rev) |
| 157 try: | 150 try: |
| 158 return self._api.m.crrev.to_commit_hash(commit_position) | 151 return self._api.m.crrev.to_commit_hash( |
| 152 commit_position, | |
| 153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev]) | |
| 159 except self.api.m.step.StepFailure: # pragma: no cover | 154 except self.api.m.step.StepFailure: # pragma: no cover |
| 160 self.surface_result('BAD_REV') | 155 self.surface_result('BAD_REV') |
| 161 raise | 156 raise |
| 162 self.surface_result('BAD_REV') # pragma: no cover | 157 self.surface_result('BAD_REV') # pragma: no cover |
| 163 raise self.api.m.step.StepFailure( | 158 raise self.api.m.step.StepFailure( |
| 164 'Invalid input revision: %r' % (rev,)) # pragma: no cover | 159 'Invalid input revision: %r' % (rev,)) # pragma: no cover |
| 165 | 160 |
| 166 @staticmethod | 161 @staticmethod |
| 167 def _is_sha1(s): | 162 def _is_sha1(s): |
| 168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) | 163 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) |
| 169 | 164 |
| 170 def significantly_different( | 165 def compare_revisions(self, revision_a, revision_b): |
| 171 self, list_a, list_b, | 166 """ |
| 172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover | 167 Returns: |
| 173 """Uses an external script to run hypothesis testing with scipy. | 168 True if the samples are significantly different. |
| 169 None if there is not enough data to tell. | |
| 170 False if there's enough data but still can't tell the samples apart. | |
| 171 """ | |
| 172 output_format = 'chartjson' | |
| 173 values_a = revision_a.chartjson_paths | |
| 174 values_b = revision_b.chartjson_paths | |
| 175 if revision_a.valueset_paths and revision_b.valueset_paths: | |
| 176 output_format = 'valueset' | |
| 174 | 177 |
| 175 The reason why we need an external script is that scipy is not available to | 178 result = self.api.stat_compare( |
| 176 the default python installed in all platforms. We instead rely on an | 179 values_a, |
| 177 anaconda environment to provide those packages. | 180 values_b, |
| 181 self.bisect_config['metric'], | |
| 182 output_format=output_format, | |
| 183 step_test_data=lambda: self.api.test_api.compare_samples_data( | |
| 184 self.api._test_data.get('revision_data'), revision_a, revision_b)) | |
| 178 | 185 |
| 179 Args: | 186 revision_a.debug_values = result['sample_a']['debug_values'] |
| 180 list_a, list_b: Two lists representing samples to be compared. | 187 revision_b.debug_values = result['sample_b']['debug_values'] |
| 181 significance_level: Self-describing. As a decimal fraction. | 188 revision_a.mean = result['sample_a']['mean'] |
|
RobertoCN
2016/08/23 00:26:24
Make mean and std_dev properties of revision_state
RobertoCN
2016/09/07 00:33:24
Done.
| |
| 189 revision_b.mean = result['sample_b']['mean'] | |
| 190 revision_a.std_dev = result['sample_a']['std_dev'] | |
| 191 revision_b.std_dev = result['sample_b']['std_dev'] | |
| 182 | 192 |
| 183 Returns: | 193 if result['result'] == 'needMoreData': |
|
RobertoCN
2016/08/23 00:26:24
Make 3 constants for true, false and needMore.
RobertoCN
2016/09/07 00:33:24
Done.
| |
| 184 A boolean indicating whether the null hypothesis ~(that the lists are | 194 return None |
| 185 samples from the same population) can be rejected at the specified | 195 return bool(result['result']) |
| 186 significance level. | |
| 187 """ | |
| 188 step_result = self.api.m.python( | |
| 189 'Checking sample difference', | |
| 190 self.api.resource('significantly_different.py'), | |
| 191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)], | |
| 192 stdout=self.api.m.json.output()) | |
| 193 results = step_result.stdout | |
| 194 if results is None: | |
| 195 assert self.dummy_builds | |
| 196 return True | |
| 197 significantly_different = results['significantly_different'] | |
| 198 step_result.presentation.logs[str(significantly_different)] = [ | |
| 199 'See json.output for details'] | |
| 200 return significantly_different | |
| 201 | 196 |
| 202 def config_step(self): | 197 def config_step(self): |
| 203 """Yields a step that prints the bisect config.""" | 198 """Yields a step that prints the bisect config.""" |
| 204 api = self.api | 199 api = self.api |
| 205 | 200 |
| 206 # bisect_config may come as a FrozenDict (which is not serializable). | 201 # bisect_config may come as a FrozenDict (which is not serializable). |
| 207 bisect_config = dict(self.bisect_config) | 202 bisect_config = dict(self.bisect_config) |
| 208 | 203 |
| 209 def fix_windows_backslashes(s): | 204 def fix_windows_backslashes(s): |
| 210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') | 205 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 228 except config_validation.ValidationFail as error: | 223 except config_validation.ValidationFail as error: |
| 229 self.surface_result('BAD_CONFIG') | 224 self.surface_result('BAD_CONFIG') |
| 230 self.api.m.halt(error.message) | 225 self.api.m.halt(error.message) |
| 231 raise self.api.m.step.StepFailure(error.message) | 226 raise self.api.m.step.StepFailure(error.message) |
| 232 | 227 |
| 233 @property | 228 @property |
| 234 def api(self): | 229 def api(self): |
| 235 return self._api | 230 return self._api |
| 236 | 231 |
| 237 def compute_relative_change(self): | 232 def compute_relative_change(self): |
| 238 old_value = float(self.good_rev.mean_value) | 233 old_value = float(self.good_rev.mean) |
| 239 new_value = float(self.bad_rev.mean_value) | 234 new_value = float(self.bad_rev.mean) |
| 240 | 235 |
| 241 if new_value and not old_value: # pragma: no cover | 236 if new_value and not old_value: # pragma: no cover |
| 242 self.relative_change = ZERO_TO_NON_ZERO | 237 self.relative_change = ZERO_TO_NON_ZERO |
| 243 return | 238 return |
| 244 | 239 |
| 245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) | 240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) |
| 246 self.relative_change = '%.2f%%' % (100 * rel_change) | 241 self.relative_change = '%.2f%%' % (100 * rel_change) |
| 247 | 242 |
| 248 def make_deps_sha_file(self, deps_sha): | 243 def make_deps_sha_file(self, deps_sha): |
| 249 """Make a diff patch that creates DEPS.sha. | 244 """Make a diff patch that creates DEPS.sha. |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 266 file is to be written to. | 261 file is to be written to. |
| 267 commit_hash (str): An identifier for the step. | 262 commit_hash (str): An identifier for the step. |
| 268 | 263 |
| 269 Returns: | 264 Returns: |
| 270 A string containing the hash of the interned object. | 265 A string containing the hash of the interned object. |
| 271 """ | 266 """ |
| 272 cmd = 'hash-object -t blob -w --stdin'.split(' ') | 267 cmd = 'hash-object -t blob -w --stdin'.split(' ') |
| 273 stdin = self.api.m.raw_io.input(file_contents) | 268 stdin = self.api.m.raw_io.input(file_contents) |
| 274 stdout = self.api.m.raw_io.output() | 269 stdout = self.api.m.raw_io.output() |
| 275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash | 270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash |
| 276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, | 271 step_result = self.api.m.git( |
| 277 name=step_name) | 272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name, |
| 273 step_test_data=lambda: | |
| 274 self.api.m.raw_io.test_api.stream_output(commit_hash)) | |
| 278 hash_string = step_result.stdout.splitlines()[0] | 275 hash_string = step_result.stdout.splitlines()[0] |
| 279 try: | 276 try: |
| 280 if hash_string: | 277 if hash_string: |
| 281 int(hash_string, 16) | 278 int(hash_string, 16) |
| 282 return hash_string | 279 return hash_string |
| 283 except ValueError: # pragma: no cover | 280 except ValueError: # pragma: no cover |
| 284 reason = 'Git did not output a valid hash for the interned file.' | 281 reason = 'Git did not output a valid hash for the interned file.' |
| 285 self.api.m.halt(reason) | 282 self.api.m.halt(reason) |
| 286 raise self.api.m.step.StepFailure(reason) | 283 raise self.api.m.step.StepFailure(reason) |
| 287 | 284 |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 302 Returns: | 299 Returns: |
| 303 A string containing the diff patch as produced by the 'git diff' command. | 300 A string containing the diff patch as produced by the 'git diff' command. |
| 304 """ | 301 """ |
| 305 # The prefixes used in the command below are used to find and replace the | 302 # The prefixes used in the command below are used to find and replace the |
| 306 # tree-ish git object id's on the diff output more easily. | 303 # tree-ish git object id's on the diff output more easily. |
| 307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' | 304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' |
| 308 cmd %= (git_object_a, git_object_b) | 305 cmd %= (git_object_a, git_object_b) |
| 309 cmd = cmd.split(' ') | 306 cmd = cmd.split(' ') |
| 310 stdout = self.api.m.raw_io.output() | 307 stdout = self.api.m.raw_io.output() |
| 311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) | 308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) |
| 312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) | 309 step_result = self.api.m.git( |
| 310 *cmd, cwd=cwd, stdout=stdout, name=step_name, | |
| 311 step_test_data=lambda: self.api._test_data['diff_patch']) | |
| 313 patch_text = step_result.stdout | 312 patch_text = step_result.stdout |
| 314 src_string = 'IAMSRC:' + git_object_a | 313 src_string = 'IAMSRC:' + git_object_a |
| 315 dst_string = 'IAMDST:' + git_object_b | 314 dst_string = 'IAMDST:' + git_object_b |
| 316 patch_text = patch_text.replace(src_string, src_alias) | 315 patch_text = patch_text.replace(src_string, src_alias) |
| 317 patch_text = patch_text.replace(dst_string, dst_alias) | 316 patch_text = patch_text.replace(dst_string, dst_alias) |
| 318 return patch_text | 317 return patch_text |
| 319 | 318 |
| 320 def make_deps_patch(self, base_revision, base_file_contents, | 319 def make_deps_patch(self, base_revision, base_file_contents, |
| 321 depot, new_commit_hash): | 320 depot, new_commit_hash): |
| 322 """Make a diff patch that updates a specific dependency revision. | 321 """Make a diff patch that updates a specific dependency revision. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 365 interned_deps_hash, deps_file, deps_file, | 364 interned_deps_hash, deps_file, deps_file, |
| 366 cwd=cwd, | 365 cwd=cwd, |
| 367 deps_rev=new_commit_hash) | 366 deps_rev=new_commit_hash) |
| 368 return patch_text, patched_contents | 367 return patch_text, patched_contents |
| 369 | 368 |
| 370 def _expand_initial_revision_range(self): | 369 def _expand_initial_revision_range(self): |
| 371 """Sets the initial contents of |self.revisions|.""" | 370 """Sets the initial contents of |self.revisions|.""" |
| 372 with self.api.m.step.nest('Expanding revision range'): | 371 with self.api.m.step.nest('Expanding revision range'): |
| 373 good_hash = self.good_rev.commit_hash | 372 good_hash = self.good_rev.commit_hash |
| 374 bad_hash = self.bad_rev.commit_hash | 373 bad_hash = self.bad_rev.commit_hash |
| 374 depot = self.good_rev.depot_name | |
| 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) | 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) |
| 376 revisions = self._revision_range( | 376 revisions = self._revision_range( |
| 377 start=good_hash, | 377 start=good_hash, |
| 378 end=bad_hash, | 378 end=bad_hash, |
| 379 depot_name=self.base_depot, | 379 depot_name=self.base_depot, |
| 380 step_name=step_name) | 380 step_name=step_name, |
| 381 step_test_data=lambda: self.api._test_data['revision_list'][depot] | |
| 382 ) | |
| 381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] | 383 self.revisions = [self.good_rev] + revisions + [self.bad_rev] |
| 382 self._update_revision_list_indexes() | 384 self._update_revision_list_indexes() |
| 383 | 385 |
| 384 def _revision_range(self, start, end, depot_name, base_revision=None, | 386 def _revision_range(self, start, end, depot_name, base_revision=None, |
| 385 step_name=None): | 387 step_name=None, **kwargs): |
| 386 """Returns a list of RevisionState objects between |start| and |end|. | 388 """Returns a list of RevisionState objects between |start| and |end|. |
| 387 | 389 |
| 388 Args: | 390 Args: |
| 389 start (str): Start commit hash. | 391 start (str): Start commit hash. |
| 390 end (str): End commit hash. | 392 end (str): End commit hash. |
| 391 depot_name (str): Short string name of repo, e.g. chromium or v8. | 393 depot_name (str): Short string name of repo, e.g. chromium or v8. |
| 392 base_revision (str): Base revision in the downstream repo (e.g. chromium). | 394 base_revision (str): Base revision in the downstream repo (e.g. chromium). |
| 393 step_name (str): Optional step name. | 395 step_name (str): Optional step name. |
| 394 | 396 |
| 395 Returns: | 397 Returns: |
| 396 A list of RevisionState objects, not including the given start or end. | 398 A list of RevisionState objects, not including the given start or end. |
| 397 """ | 399 """ |
| 398 if self.internal_bisect: # pragma: no cover | 400 if self.internal_bisect: # pragma: no cover |
| 399 return self._revision_range_with_gitiles( | 401 return self._revision_range_with_gitiles( |
| 400 start, end, depot_name, base_revision, step_name) | 402 start, end, depot_name, base_revision, step_name) |
| 401 try: | 403 try: |
| 402 step_result = self.api.m.python( | 404 step_result = self.api.m.python( |
| 403 step_name, | 405 step_name, |
| 404 self.api.resource('fetch_intervening_revisions.py'), | 406 self.api.resource('fetch_intervening_revisions.py'), |
| 405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], | 407 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], |
| 406 stdout=self.api.m.json.output()) | 408 stdout=self.api.m.json.output(), **kwargs) |
| 407 except self.api.m.step.StepFailure: # pragma: no cover | 409 except self.api.m.step.StepFailure: # pragma: no cover |
| 408 self.surface_result('BAD_REV') | 410 self.surface_result('BAD_REV') |
| 409 raise | 411 raise |
| 410 revisions = [] | 412 revisions = [] |
| 411 for commit_hash, _ in step_result.stdout: | 413 for commit_hash, _ in step_result.stdout: |
| 412 revisions.append(self.revision_class( | 414 revisions.append(self.revision_class( |
| 413 bisector=self, | 415 bisector=self, |
| 414 commit_hash=commit_hash, | 416 commit_hash=commit_hash, |
| 415 depot_name=depot_name, | 417 depot_name=depot_name, |
| 416 base_revision=base_revision)) | 418 base_revision=base_revision)) |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 502 dep_revision_max = max_revision.deps[depot_name] | 504 dep_revision_max = max_revision.deps[depot_name] |
| 503 if (dep_revision_min and dep_revision_max and | 505 if (dep_revision_min and dep_revision_max and |
| 504 dep_revision_min != dep_revision_max): | 506 dep_revision_min != dep_revision_max): |
| 505 step_name = ('Expanding revision range for revision %s' | 507 step_name = ('Expanding revision range for revision %s' |
| 506 ' on depot %s' % (dep_revision_max, depot_name)) | 508 ' on depot %s' % (dep_revision_max, depot_name)) |
| 507 rev_list = self._revision_range( | 509 rev_list = self._revision_range( |
| 508 start=dep_revision_min, | 510 start=dep_revision_min, |
| 509 end=dep_revision_max, | 511 end=dep_revision_max, |
| 510 depot_name=depot_name, | 512 depot_name=depot_name, |
| 511 base_revision=min_revision, | 513 base_revision=min_revision, |
| 512 step_name=step_name) | 514 step_name=step_name, |
| 515 step_test_data=lambda: | |
| 516 self.api._test_data['revision_list'][depot_name]) | |
| 513 new_revisions = self.revisions[:max_revision.list_index] | 517 new_revisions = self.revisions[:max_revision.list_index] |
| 514 new_revisions += rev_list | 518 new_revisions += rev_list |
| 515 new_revisions += self.revisions[max_revision.list_index:] | 519 new_revisions += self.revisions[max_revision.list_index:] |
| 516 self.revisions = new_revisions | 520 self.revisions = new_revisions |
| 517 self._update_revision_list_indexes() | 521 self._update_revision_list_indexes() |
| 518 return True | 522 return True |
| 519 except RuntimeError: # pragma: no cover | 523 except RuntimeError: # pragma: no cover |
| 520 warning_text = ('Could not expand dependency revisions for ' + | 524 warning_text = ('Could not expand dependency revisions for ' + |
| 521 revision_to_expand.commit_hash) | 525 revision_to_expand.commit_hash) |
| 522 self.surface_result('BAD_REV') | 526 self.surface_result('BAD_REV') |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 539 | 543 |
| 540 The change between the test results obtained for the given 'good' and | 544 The change between the test results obtained for the given 'good' and |
| 541 'bad' revisions is expected to be considered a regression. The | 545 'bad' revisions is expected to be considered a regression. The |
| 542 `improvement_direction` attribute is positive if a larger number is | 546 `improvement_direction` attribute is positive if a larger number is |
| 543 considered better, and negative if a smaller number is considered better. | 547 considered better, and negative if a smaller number is considered better. |
| 544 | 548 |
| 545 Returns: | 549 Returns: |
| 546 True if the check passes (i.e. no problem), False if the change is not | 550 True if the check passes (i.e. no problem), False if the change is not |
| 547 a regression according to the improvement direction. | 551 a regression according to the improvement direction. |
| 548 """ | 552 """ |
| 549 good = self.good_rev.mean_value | 553 good = self.good_rev.mean |
| 550 bad = self.bad_rev.mean_value | 554 bad = self.bad_rev.mean |
| 551 | 555 |
| 552 if self.is_return_code_mode(): | 556 if self.is_return_code_mode(): |
| 553 return True | 557 return True |
| 554 | 558 |
| 555 direction = self.improvement_direction | 559 direction = self.improvement_direction |
| 556 if direction is None: | 560 if direction is None: |
| 557 return True | 561 return True |
| 558 if (bad > good and direction > 0) or (bad < good and direction < 0): | 562 if (bad > good and direction > 0) or (bad < good and direction < 0): |
| 559 self._set_failed_direction_results() | 563 self._set_failed_direction_results() |
| 560 return False | 564 return False |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 579 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. | 583 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. |
| 580 | 584 |
| 581 Returns: True if the revisions produced results that differ from each | 585 Returns: True if the revisions produced results that differ from each |
| 582 other in a statistically significant manner. False if such difference could | 586 other in a statistically significant manner. False if such difference could |
| 583 not be established in the time or sample size allowed. | 587 not be established in the time or sample size allowed. |
| 584 """ | 588 """ |
| 585 if self.test_type == 'return_code': | 589 if self.test_type == 'return_code': |
| 586 return (self.good_rev.overall_return_code != | 590 return (self.good_rev.overall_return_code != |
| 587 self.bad_rev.overall_return_code) | 591 self.bad_rev.overall_return_code) |
| 588 | 592 |
| 589 if self.bypass_stats_check: | 593 if self.bypass_stats_check: |
|
RobertoCN
2016/08/23 00:26:24
Remove this flag
RobertoCN
2016/09/07 00:33:24
Acknowledged.
| |
| 590 dummy_result = self.good_rev.values != self.bad_rev.values | 594 self.compare_revisions(self.good_rev, self.bad_rev) |
| 595 dummy_result = self.good_rev.mean != self.bad_rev.mean | |
| 591 if not dummy_result: | 596 if not dummy_result: |
| 592 self._set_insufficient_confidence_warning() | 597 self._set_insufficient_confidence_warning() |
| 593 return dummy_result | 598 return dummy_result |
| 594 | 599 |
| 600 # TODO(robertocn): This step should not be necessary in some cases. | |
| 595 with self.api.m.step.nest('Re-testing reference range'): | 601 with self.api.m.step.nest('Re-testing reference range'): |
| 596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT | 602 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT |
| 597 while time.time() < expiration_time: | 603 while time.time() < expiration_time: |
| 598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: | 604 if (self.good_rev.test_run_count >= 5 |
| 599 if self.significantly_different(self.good_rev.values, | 605 and self.bad_rev.test_run_count >= 5): |
| 600 self.bad_rev.values): | 606 if self.compare_revisions(self.good_rev, self.bad_rev): |
| 601 return True | 607 return True |
| 602 if len(self.good_rev.values) == len(self.bad_rev.values): | 608 if self.good_rev.test_run_count == self.bad_rev.test_run_count: |
| 603 revision_to_retest = self.last_tested_revision | 609 revision_to_retest = self.last_tested_revision |
| 604 else: | 610 else: |
| 605 revision_to_retest = min(self.good_rev, self.bad_rev, | 611 revision_to_retest = min(self.good_rev, self.bad_rev, |
| 606 key=lambda x: len(x.values)) | 612 key=lambda x: x.test_run_count) |
| 607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: | |
| 608 revision_to_retest.retest() | |
| 609 else: | |
| 610 break | |
| 611 self._set_insufficient_confidence_warning() | 613 self._set_insufficient_confidence_warning() |
| 612 return False | 614 return False |
| 613 | 615 |
| 614 | 616 |
| 615 def get_exception(self): | 617 def get_exception(self): |
| 616 raise NotImplementedError() # pragma: no cover | 618 raise NotImplementedError() # pragma: no cover |
| 617 # TODO: should return an exception with the details of the failure. | 619 # TODO: should return an exception with the details of the failure. |
| 618 | 620 |
| 619 def _set_insufficient_confidence_warning( | 621 def _set_insufficient_confidence_warning( |
| 620 self): # pragma: no cover | 622 self): # pragma: no cover |
| 621 """Adds a warning about the lack of initial regression confidence.""" | 623 """Adds a warning about the lack of initial regression confidence.""" |
| 622 self.failed_initial_confidence = True | 624 self.failed_initial_confidence = True |
| 623 self.surface_result('LO_INIT_CONF') | 625 self.surface_result('LO_INIT_CONF') |
| 624 self.warnings.append( | 626 self.warnings.append( |
| 625 'Bisect failed to reproduce the regression with enough confidence.') | 627 'Bisect failed to reproduce the regression with enough confidence.') |
| 626 | 628 |
| 627 def _results_debug_message(self): | 629 def _results_debug_message(self): |
| 628 """Returns a string with values used to debug a bisect result.""" | 630 """Returns a string with values used to debug a bisect result.""" |
| 629 result = 'bisector.lkgr: %r\n' % self.lkgr | 631 result = 'bisector.lkgr: %r\n' % self.lkgr |
| 630 result += 'bisector.fkbr: %r\n\n' % self.fkbr | 632 result += 'bisector.fkbr: %r\n\n' % self.fkbr |
| 631 result += self._revision_value_table() | 633 result += self._revision_value_table() |
| 632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): | 634 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and |
| 633 result += '\n' + self._t_test_results() | 635 self.fkbr.test_run_count): |
| 636 result += '\n' + '\n'.join([ | |
| 637 'LKGR values: %r' % list(self.lkgr.debug_values), | |
| 638 'FKBR values: %r' % list(self.fkbr.debug_values), | |
| 639 ]) | |
| 634 return result | 640 return result |
| 635 | 641 |
| 636 def _revision_value_table(self): | 642 def _revision_value_table(self): |
| 637 """Returns a string table showing revisions and their values.""" | 643 """Returns a string table showing revisions and their values.""" |
| 638 header = [['Revision', 'Values']] | 644 header = [['Revision', 'Values']] |
| 639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] | 645 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions] |
| 640 return self._pretty_table(header + rows) | 646 return self._pretty_table(header + rows) |
| 641 | 647 |
| 642 def _pretty_table(self, data): | 648 def _pretty_table(self, data): |
| 643 results = [] | 649 results = [] |
| 644 for row in data: | 650 for row in data: |
| 645 results.append('%-15s' * len(row) % tuple(row)) | 651 results.append('%-15s' * len(row) % tuple(row)) |
| 646 return '\n'.join(results) | 652 return '\n'.join(results) |
| 647 | 653 |
| 648 def _t_test_results(self): | |
| 649 """Returns a string showing t-test results for lkgr and fkbr.""" | |
| 650 t, df, p = self.api.m.math_utils.welchs_t_test( | |
| 651 self.lkgr.values, self.fkbr.values) | |
| 652 lines = [ | |
| 653 'LKGR values: %r' % self.lkgr.values, | |
| 654 'FKBR values: %r' % self.fkbr.values, | |
| 655 't-statistic: %r' % t, | |
| 656 'deg. of freedom: %r' % df, | |
| 657 'p-value: %r' % p, | |
| 658 'Confidence score: %r' % (100 * (1 - p)) | |
| 659 ] | |
| 660 return '\n'.join(lines) | |
| 661 | |
| 662 def print_result_debug_info(self): | 654 def print_result_debug_info(self): |
| 663 """Prints extra debug info at the end of the bisect process.""" | 655 """Prints extra debug info at the end of the bisect process.""" |
| 664 lines = self._results_debug_message().splitlines() | 656 lines = self._results_debug_message().splitlines() |
| 665 # If we emit a null step then add a log to it, the log should be kept | 657 # If we emit a null step then add a log to it, the log should be kept |
| 666 # longer than 7 days (which is often needed to debug some issues). | 658 # longer than 7 days (which is often needed to debug some issues). |
| 667 self.api.m.step('Debug Info', []) | 659 self.api.m.step('Debug Info', []) |
| 668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines | 660 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines |
| 669 | 661 |
| 670 def post_result(self, halt_on_failure=False): | 662 def post_result(self, halt_on_failure=False): |
| 671 """Posts bisect results to Perf Dashboard.""" | 663 """Posts bisect results to Perf Dashboard.""" |
| 672 self.api.m.perf_dashboard.set_default_config() | 664 self.api.m.perf_dashboard.set_default_config() |
| 673 self.api.m.perf_dashboard.post_bisect_results( | 665 self.api.m.perf_dashboard.post_bisect_results( |
| 674 self.get_result(), halt_on_failure) | 666 self.get_result(), halt_on_failure) |
| 675 | 667 |
| 676 def get_revision_to_eval(self): | 668 def get_revision_to_eval(self): |
| 677 """Gets the next RevisionState object in the candidate range. | 669 """Gets the next RevisionState object in the candidate range. |
| 678 | 670 |
| 679 Returns: | 671 Returns: |
| 680 The next Revision object in a list. | 672 The next Revision object in a list. |
| 681 """ | 673 """ |
| 682 self._update_candidate_range() | 674 self._update_candidate_range() |
| 683 candidate_range = [revision for revision in | 675 candidate_range = [revision for revision in |
| 684 self.revisions[self.lkgr.list_index + 1: | 676 self.revisions[self.lkgr.list_index + 1: |
| 685 self.fkbr.list_index] | 677 self.fkbr.list_index] |
| 686 if not revision.tested and not revision.failed] | 678 if not revision.failed] |
| 687 if len(candidate_range) == 1: | 679 if len(candidate_range) == 1: |
| 688 return candidate_range[0] | 680 return candidate_range[0] |
| 689 if len(candidate_range) == 0: | 681 if len(candidate_range) == 0: |
| 690 return None | 682 return None |
| 691 | 683 |
| 692 default_revision = candidate_range[len(candidate_range) / 2] | 684 default_revision = candidate_range[len(candidate_range) / 2] |
| 693 | 685 |
| 694 with self.api.m.step.nest( | 686 with self.api.m.step.nest( |
| 695 'Wiggling revision ' + default_revision.revision_string()): | 687 'Wiggling revision ' + default_revision.revision_string()): |
| 696 # We'll search up to 25% of the range (in either direction) to try and | 688 # We'll search up to 25% of the range (in either direction) to try and |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 733 return True | 725 return True |
| 734 if (revision.good and revision.next_revision and | 726 if (revision.good and revision.next_revision and |
| 735 revision.next_revision.bad): | 727 revision.next_revision.bad): |
| 736 if (revision.next_revision.deps_change() | 728 if (revision.next_revision.deps_change() |
| 737 and self._expand_deps_revisions(revision.next_revision)): | 729 and self._expand_deps_revisions(revision.next_revision)): |
| 738 return False | 730 return False |
| 739 self.culprit = revision.next_revision | 731 self.culprit = revision.next_revision |
| 740 return True | 732 return True |
| 741 return False | 733 return False |
| 742 | 734 |
| 743 def wait_for_all(self, revision_list): | |
| 744 """Waits for all revisions in list to finish.""" | |
| 745 for r in revision_list: | |
| 746 self.wait_for(r) | |
| 747 | |
| 748 def wait_for(self, revision, nest_check=True): | |
| 749 """Waits for the revision to finish its job.""" | |
| 750 if nest_check and not self.flags.get( | |
| 751 'do_not_nest_wait_for_revision'): # pragma: no cover | |
| 752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()): | |
| 753 return self.wait_for(revision, nest_check=False) | |
| 754 while True: | |
| 755 revision.update_status() | |
| 756 if revision.in_progress: | |
| 757 self.api.m.python.inline( | |
| 758 'sleeping', | |
| 759 """ | |
| 760 import sys | |
| 761 import time | |
| 762 time.sleep(20*60) | |
| 763 sys.exit(0) | |
| 764 """) | |
| 765 else: | |
| 766 break | |
| 767 | |
| 768 def _update_candidate_range(self): | 735 def _update_candidate_range(self): |
| 769 """Updates lkgr and fkbr (last known good/first known bad) revisions. | 736 """Updates lkgr and fkbr (last known good/first known bad) revisions. |
| 770 | 737 |
| 771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in | 738 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in |
| 772 bisectors.revisions.""" | 739 bisectors.revisions.""" |
| 773 for r in self.revisions: | 740 for r in self.revisions: |
| 774 if r.tested: | 741 if r.test_run_count: |
| 775 if r.good: | 742 if r.good: |
| 776 self.lkgr = r | 743 self.lkgr = r |
| 777 elif r.bad: | 744 elif r.bad: |
| 778 self.fkbr = r | 745 self.fkbr = r |
| 779 break | 746 break |
| 780 assert self.lkgr and self.fkbr | 747 assert self.lkgr and self.fkbr |
| 781 | 748 |
| 782 def get_perf_tester_name(self): | 749 def get_perf_tester_name(self): |
| 783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. | 750 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. |
| 784 | 751 |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 869 assert len(result_code) <= 20 | 836 assert len(result_code) <= 20 |
| 870 if result_code not in self.result_codes: | 837 if result_code not in self.result_codes: |
| 871 self.result_codes.add(result_code) | 838 self.result_codes.add(result_code) |
| 872 properties = self.api.m.step.active_result.presentation.properties | 839 properties = self.api.m.step.active_result.presentation.properties |
| 873 properties['extra_result_code'] = sorted(self.result_codes) | 840 properties['extra_result_code'] = sorted(self.result_codes) |
| 874 | 841 |
| 875 def get_result(self): | 842 def get_result(self): |
| 876 """Returns the results as a jsonable object.""" | 843 """Returns the results as a jsonable object.""" |
| 877 config = self.bisect_config | 844 config = self.bisect_config |
| 878 results_confidence = 0 | 845 results_confidence = 0 |
| 879 if self.culprit: | |
| 880 results_confidence = self.api.m.math_utils.confidence_score( | |
| 881 self.lkgr.values, self.fkbr.values) | |
| 882 | 846 |
| 883 if self.failed: | 847 if self.failed: |
| 884 status = 'failed' | 848 status = 'failed' |
| 885 elif self.bisect_over: | 849 elif self.bisect_over: |
| 886 status = 'completed' | 850 status = 'completed' |
| 887 else: | 851 else: |
| 888 status = 'started' | 852 status = 'started' |
| 889 | 853 |
| 890 aborted_reason = None | 854 aborted_reason = None |
| 891 if self.failed_initial_confidence: | 855 if self.failed_initial_confidence: |
| 892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON | 856 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON |
| 893 elif self.failed_direction: | 857 elif self.failed_direction: |
| 894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON | 858 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON |
| 895 return { | 859 return { |
| 896 'try_job_id': config.get('try_job_id'), | 860 'try_job_id': config.get('try_job_id'), |
| 897 'bug_id': config.get('bug_id'), | 861 'bug_id': config.get('bug_id'), |
| 898 'status': status, | 862 'status': status, |
| 899 'buildbot_log_url': self._get_build_url(), | 863 'buildbot_log_url': self._get_build_url(), |
| 900 'bisect_bot': self.get_perf_tester_name(), | 864 'bisect_bot': self.get_perf_tester_name(), |
| 901 'command': config['command'], | 865 'command': config['command'], |
| 902 'test_type': config['test_type'], | 866 'test_type': config['test_type'], |
| 903 'metric': config['metric'], | 867 'metric': config['metric'], |
| 904 'change': self.relative_change, | 868 'change': self.relative_change, |
| 905 'score': results_confidence, | |
| 906 'good_revision': self.good_rev.commit_hash, | 869 'good_revision': self.good_rev.commit_hash, |
| 907 'bad_revision': self.bad_rev.commit_hash, | 870 'bad_revision': self.bad_rev.commit_hash, |
| 908 'warnings': self.warnings, | 871 'warnings': self.warnings, |
| 909 'aborted_reason': aborted_reason, | 872 'aborted_reason': aborted_reason, |
| 910 'culprit_data': self._culprit_data(), | 873 'culprit_data': self._culprit_data(), |
| 911 'revision_data': self._revision_data() | 874 'revision_data': self._revision_data() |
| 912 } | 875 } |
| 913 | 876 |
| 914 def _culprit_data(self): | 877 def _culprit_data(self): |
| 915 culprit = self.culprit | 878 culprit = self.culprit |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 927 'email': culprit_info['email'], | 890 'email': culprit_info['email'], |
| 928 'cl_date': culprit_info['date'], | 891 'cl_date': culprit_info['date'], |
| 929 'commit_info': culprit_info['body'], | 892 'commit_info': culprit_info['body'], |
| 930 'revisions_links': [], | 893 'revisions_links': [], |
| 931 'cl': culprit.commit_hash | 894 'cl': culprit.commit_hash |
| 932 } | 895 } |
| 933 | 896 |
| 934 def _revision_data(self): | 897 def _revision_data(self): |
| 935 revision_rows = [] | 898 revision_rows = [] |
| 936 for r in self.revisions: | 899 for r in self.revisions: |
| 937 if r.tested or r.aborted: | 900 if r.test_run_count: |
| 938 revision_rows.append({ | 901 revision_rows.append({ |
| 939 'depot_name': r.depot_name, | 902 'depot_name': r.depot_name, |
| 940 'commit_hash': r.commit_hash, | 903 'commit_hash': r.commit_hash, |
| 941 'revision_string': r.revision_string(), | 904 'revision_string': r.revision_string(), |
| 942 'mean_value': r.mean_value, | 905 'mean_value': r.mean, |
| 943 'std_dev': r.std_dev, | 906 'std_dev': r.std_dev, |
| 944 'values': r.values, | 907 'values': r.debug_values, |
| 945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', | 908 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', |
| 946 }) | 909 }) |
| 947 return revision_rows | 910 return revision_rows |
| 948 | 911 |
| 949 def _get_build_url(self): | 912 def _get_build_url(self): |
| 950 properties = self.api.m.properties | 913 properties = self.api.m.properties |
| 951 bot_url = properties.get('buildbotURL', | 914 bot_url = properties.get('buildbotURL', |
| 952 'http://build.chromium.org/p/chromium/') | 915 'http://build.chromium.org/p/chromium/') |
| 953 builder_name = urllib.quote(properties.get('buildername', '')) | 916 builder_name = urllib.quote(properties.get('buildername', '')) |
| 954 builder_number = str(properties.get('buildnumber', '')) | 917 builder_number = str(properties.get('buildnumber', '')) |
| 955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) | 918 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) |
| OLD | NEW |