Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import json | 5 import json |
| 6 import re | 6 import re |
| 7 import time | 7 import time |
| 8 import urllib | 8 import urllib |
| 9 | 9 |
| 10 from . import config_validation | 10 from . import config_validation |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 40 ) | 40 ) |
| 41 | 41 |
| 42 # When we look for the next revision to build, we search nearby revisions | 42 # When we look for the next revision to build, we search nearby revisions |
| 43 # looking for a revision that's already been archived. Since we don't want | 43 # looking for a revision that's already been archived. Since we don't want |
| 44 # to move *too* far from the original revision, we'll cap the search at 25%. | 44 # to move *too* far from the original revision, we'll cap the search at 25%. |
| 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 | 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 |
| 46 | 46 |
| 47 # How long to re-test the initial good-bad range for until significant | 47 # How long to re-test the initial good-bad range for until significant |
| 48 # difference is established. | 48 # difference is established. |
| 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 | 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 |
| 50 # If we reach this number of samples on the reference range and have not | |
| 51 # achieved statistical significance, bail. | |
| 52 MAX_REQUIRED_SAMPLES = 15 | |
| 53 | |
| 54 # Significance level to use for determining difference between revisions via | |
| 55 # hypothesis testing. | |
| 56 SIGNIFICANCE_LEVEL = 0.01 | |
| 57 | 50 |
| 58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( | 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( |
| 59 'The metric values for the initial "good" and "bad" revisions ' | 52 'The metric values for the initial "good" and "bad" revisions ' |
| 60 'do not represent a clear regression.') | 53 'do not represent a clear regression.') |
| 61 | 54 |
| 62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( | 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( |
| 63 'The metric values for the initial "good" and "bad" revisions match the ' | 56 'The metric values for the initial "good" and "bad" revisions match the ' |
| 64 'expected direction of improvement. Thus, likely represent an improvement ' | 57 'expected direction of improvement. Thus, likely represent an improvement ' |
| 65 'and not a regression.') | 58 'and not a regression.') |
| 66 | 59 |
| 67 | 60 |
| 68 class Bisector(object): | 61 class Bisector(object): |
| 69 """This class abstracts an ongoing bisect (or n-sect) job.""" | 62 """This class abstracts an ongoing bisect (or n-sect) job.""" |
| 70 | 63 |
| 71 def __init__(self, api, bisect_config, revision_class, init_revisions=True, | 64 def __init__(self, api, bisect_config, revision_class, init_revisions=True, |
| 72 **flags): | 65 **flags): |
| 73 """Initializes the state of a new bisect job from a dictionary. | 66 """Initializes the state of a new bisect job from a dictionary. |
| 74 | 67 |
| 75 Note that the initial good_rev and bad_rev MUST resolve to a commit position | 68 Note that the initial good_rev and bad_rev MUST resolve to a commit position |
| 76 in the chromium repo. | 69 in the chromium repo. |
| 77 """ | 70 """ |
| 78 super(Bisector, self).__init__() | 71 super(Bisector, self).__init__() |
| 72 self.loopCHECK = {} | |
| 79 self.flags = flags | 73 self.flags = flags |
| 80 self._api = api | 74 self._api = api |
| 81 self.result_codes = set() | 75 self.result_codes = set() |
| 82 self.ensure_sync_master_branch() | 76 self.ensure_sync_master_branch() |
| 83 self.bisect_config = bisect_config | 77 self.bisect_config = bisect_config |
| 84 self.config_step() | 78 self.config_step() |
| 85 self._validate_config() | 79 self._validate_config() |
| 86 self.revision_class = revision_class | 80 self.revision_class = revision_class |
| 87 self.last_tested_revision = None | 81 self.last_tested_revision = None |
| 88 | 82 |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 148 | 142 |
| 149 Returns: | 143 Returns: |
| 150 A 40-digit git commit hash string. | 144 A 40-digit git commit hash string. |
| 151 """ | 145 """ |
| 152 if self._is_sha1(rev): # pragma: no cover | 146 if self._is_sha1(rev): # pragma: no cover |
| 153 return rev | 147 return rev |
| 154 if rev.isdigit(): | 148 if rev.isdigit(): |
| 155 commit_position = self._api.m.commit_position.construct( | 149 commit_position = self._api.m.commit_position.construct( |
| 156 branch='refs/heads/master', value=rev) | 150 branch='refs/heads/master', value=rev) |
| 157 try: | 151 try: |
| 158 return self._api.m.crrev.to_commit_hash(commit_position) | 152 return self._api.m.crrev.to_commit_hash( |
| 153 commit_position, | |
| 154 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev]) | |
| 159 except self.api.m.step.StepFailure: # pragma: no cover | 155 except self.api.m.step.StepFailure: # pragma: no cover |
| 160 self.surface_result('BAD_REV') | 156 self.surface_result('BAD_REV') |
| 161 raise | 157 raise |
| 162 self.surface_result('BAD_REV') # pragma: no cover | 158 self.surface_result('BAD_REV') # pragma: no cover |
| 163 raise self.api.m.step.StepFailure( | 159 raise self.api.m.step.StepFailure( |
| 164 'Invalid input revision: %r' % (rev,)) # pragma: no cover | 160 'Invalid input revision: %r' % (rev,)) # pragma: no cover |
| 165 | 161 |
| 166 @staticmethod | 162 @staticmethod |
| 167 def _is_sha1(s): | 163 def _is_sha1(s): |
| 168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) | 164 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) |
| 169 | 165 |
| 170 def significantly_different( | 166 def compare_revisions(self, revision_a, revision_b): |
| 171 self, list_a, list_b, | 167 """ |
| 172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover | 168 Returns: |
| 173 """Uses an external script to run hypothesis testing with scipy. | 169 True if the samples are significantly different. |
| 170 None if there is not enough data to tell. | |
| 171 False if there's enough data but still can't tell the samples apart. | |
| 172 """ | |
| 173 output_format = 'chartjson' | |
| 174 values_a = revision_a.chartjson_paths | |
| 175 values_b = revision_b.chartjson_paths | |
| 176 if revision_a.valueset_paths and revision_b.valueset_paths: | |
| 177 output_format = 'valueset' | |
| 174 | 178 |
| 175 The reason why we need an external script is that scipy is not available to | 179 result = self.api.stat_compare( |
| 176 the default python installed in all platforms. We instead rely on an | 180 values_a, |
| 177 anaconda environment to provide those packages. | 181 values_b, |
| 182 self.bisect_config['metric'], | |
| 183 output_format=output_format, | |
| 184 step_test_data=lambda: self.api.test_api.compare_samples_data( | |
| 185 self.api._test_data.get('revision_data'), revision_a, revision_b)) | |
| 178 | 186 |
| 179 Args: | 187 revision_a.debug_values = result['sample_a']['debug_values'] |
| 180 list_a, list_b: Two lists representing samples to be compared. | 188 revision_b.debug_values = result['sample_b']['debug_values'] |
| 181 significance_level: Self-describing. As a decimal fraction. | 189 revision_a.mean = result['sample_a']['mean'] |
| 190 revision_b.mean = result['sample_b']['mean'] | |
| 191 revision_a.std_dev = result['sample_a']['std_dev'] | |
| 192 revision_b.std_dev = result['sample_b']['std_dev'] | |
| 182 | 193 |
| 183 Returns: | 194 if result['result'] == 'needMoreData': |
| 184 A boolean indicating whether the null hypothesis ~(that the lists are | 195 key = tuple(values_a), tuple(values_b) |
| 185 samples from the same population) can be rejected at the specified | 196 self.loopCHECK.setdefault(key, 0) |
| 186 significance level. | 197 self.loopCHECK[key] += 1 |
|
RobertoCN
2016/08/23 00:26:23
Remove loop check and debug prints
RobertoCN
2016/09/07 00:33:24
Done.
| |
| 187 """ | 198 if self.loopCHECK[key] > 10: |
| 188 step_result = self.api.m.python( | 199 raise Exception('loopCHECK!@') |
| 189 'Checking sample difference', | 200 print result['result'], revision_a.debug_values, revision_b.debug_values |
| 190 self.api.resource('significantly_different.py'), | 201 print revision_a.bisector.revisions |
| 191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)], | 202 return None |
| 192 stdout=self.api.m.json.output()) | 203 return bool(result['result']) |
| 193 results = step_result.stdout | |
| 194 if results is None: | |
| 195 assert self.dummy_builds | |
| 196 return True | |
| 197 significantly_different = results['significantly_different'] | |
| 198 step_result.presentation.logs[str(significantly_different)] = [ | |
| 199 'See json.output for details'] | |
| 200 return significantly_different | |
| 201 | 204 |
| 202 def config_step(self): | 205 def config_step(self): |
| 203 """Yields a step that prints the bisect config.""" | 206 """Yields a step that prints the bisect config.""" |
| 204 api = self.api | 207 api = self.api |
| 205 | 208 |
| 206 # bisect_config may come as a FrozenDict (which is not serializable). | 209 # bisect_config may come as a FrozenDict (which is not serializable). |
| 207 bisect_config = dict(self.bisect_config) | 210 bisect_config = dict(self.bisect_config) |
| 208 | 211 |
| 209 def fix_windows_backslashes(s): | 212 def fix_windows_backslashes(s): |
| 210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') | 213 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 228 except config_validation.ValidationFail as error: | 231 except config_validation.ValidationFail as error: |
| 229 self.surface_result('BAD_CONFIG') | 232 self.surface_result('BAD_CONFIG') |
| 230 self.api.m.halt(error.message) | 233 self.api.m.halt(error.message) |
| 231 raise self.api.m.step.StepFailure(error.message) | 234 raise self.api.m.step.StepFailure(error.message) |
| 232 | 235 |
| 233 @property | 236 @property |
| 234 def api(self): | 237 def api(self): |
| 235 return self._api | 238 return self._api |
| 236 | 239 |
| 237 def compute_relative_change(self): | 240 def compute_relative_change(self): |
| 238 old_value = float(self.good_rev.mean_value) | 241 old_value = float(self.good_rev.mean) |
| 239 new_value = float(self.bad_rev.mean_value) | 242 new_value = float(self.bad_rev.mean) |
| 240 | 243 |
| 241 if new_value and not old_value: # pragma: no cover | 244 if new_value and not old_value: # pragma: no cover |
| 242 self.relative_change = ZERO_TO_NON_ZERO | 245 self.relative_change = ZERO_TO_NON_ZERO |
| 243 return | 246 return |
| 244 | 247 |
| 245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) | 248 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) |
| 246 self.relative_change = '%.2f%%' % (100 * rel_change) | 249 self.relative_change = '%.2f%%' % (100 * rel_change) |
| 247 | 250 |
| 248 def make_deps_sha_file(self, deps_sha): | 251 def make_deps_sha_file(self, deps_sha): |
| 249 """Make a diff patch that creates DEPS.sha. | 252 """Make a diff patch that creates DEPS.sha. |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 266 file is to be written to. | 269 file is to be written to. |
| 267 commit_hash (str): An identifier for the step. | 270 commit_hash (str): An identifier for the step. |
| 268 | 271 |
| 269 Returns: | 272 Returns: |
| 270 A string containing the hash of the interned object. | 273 A string containing the hash of the interned object. |
| 271 """ | 274 """ |
| 272 cmd = 'hash-object -t blob -w --stdin'.split(' ') | 275 cmd = 'hash-object -t blob -w --stdin'.split(' ') |
| 273 stdin = self.api.m.raw_io.input(file_contents) | 276 stdin = self.api.m.raw_io.input(file_contents) |
| 274 stdout = self.api.m.raw_io.output() | 277 stdout = self.api.m.raw_io.output() |
| 275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash | 278 step_name = 'Hashing modified DEPS file with revision ' + commit_hash |
| 276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, | 279 step_result = self.api.m.git( |
| 277 name=step_name) | 280 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name, |
| 281 step_test_data=lambda: | |
| 282 self.api.m.raw_io.test_api.stream_output(commit_hash)) | |
| 278 hash_string = step_result.stdout.splitlines()[0] | 283 hash_string = step_result.stdout.splitlines()[0] |
| 279 try: | 284 try: |
| 280 if hash_string: | 285 if hash_string: |
| 281 int(hash_string, 16) | 286 int(hash_string, 16) |
| 282 return hash_string | 287 return hash_string |
| 283 except ValueError: # pragma: no cover | 288 except ValueError: # pragma: no cover |
| 284 reason = 'Git did not output a valid hash for the interned file.' | 289 reason = 'Git did not output a valid hash for the interned file.' |
| 285 self.api.m.halt(reason) | 290 self.api.m.halt(reason) |
| 286 raise self.api.m.step.StepFailure(reason) | 291 raise self.api.m.step.StepFailure(reason) |
| 287 | 292 |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 302 Returns: | 307 Returns: |
| 303 A string containing the diff patch as produced by the 'git diff' command. | 308 A string containing the diff patch as produced by the 'git diff' command. |
| 304 """ | 309 """ |
| 305 # The prefixes used in the command below are used to find and replace the | 310 # The prefixes used in the command below are used to find and replace the |
| 306 # tree-ish git object id's on the diff output more easily. | 311 # tree-ish git object id's on the diff output more easily. |
| 307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' | 312 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' |
| 308 cmd %= (git_object_a, git_object_b) | 313 cmd %= (git_object_a, git_object_b) |
| 309 cmd = cmd.split(' ') | 314 cmd = cmd.split(' ') |
| 310 stdout = self.api.m.raw_io.output() | 315 stdout = self.api.m.raw_io.output() |
| 311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) | 316 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) |
| 312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) | 317 step_result = self.api.m.git( |
| 318 *cmd, cwd=cwd, stdout=stdout, name=step_name, | |
| 319 step_test_data=lambda: self.api._test_data['diff_patch']) | |
| 313 patch_text = step_result.stdout | 320 patch_text = step_result.stdout |
| 314 src_string = 'IAMSRC:' + git_object_a | 321 src_string = 'IAMSRC:' + git_object_a |
| 315 dst_string = 'IAMDST:' + git_object_b | 322 dst_string = 'IAMDST:' + git_object_b |
| 316 patch_text = patch_text.replace(src_string, src_alias) | 323 patch_text = patch_text.replace(src_string, src_alias) |
| 317 patch_text = patch_text.replace(dst_string, dst_alias) | 324 patch_text = patch_text.replace(dst_string, dst_alias) |
| 318 return patch_text | 325 return patch_text |
| 319 | 326 |
| 320 def make_deps_patch(self, base_revision, base_file_contents, | 327 def make_deps_patch(self, base_revision, base_file_contents, |
| 321 depot, new_commit_hash): | 328 depot, new_commit_hash): |
| 322 """Make a diff patch that updates a specific dependency revision. | 329 """Make a diff patch that updates a specific dependency revision. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 365 interned_deps_hash, deps_file, deps_file, | 372 interned_deps_hash, deps_file, deps_file, |
| 366 cwd=cwd, | 373 cwd=cwd, |
| 367 deps_rev=new_commit_hash) | 374 deps_rev=new_commit_hash) |
| 368 return patch_text, patched_contents | 375 return patch_text, patched_contents |
| 369 | 376 |
| 370 def _expand_initial_revision_range(self): | 377 def _expand_initial_revision_range(self): |
| 371 """Sets the initial contents of |self.revisions|.""" | 378 """Sets the initial contents of |self.revisions|.""" |
| 372 with self.api.m.step.nest('Expanding revision range'): | 379 with self.api.m.step.nest('Expanding revision range'): |
| 373 good_hash = self.good_rev.commit_hash | 380 good_hash = self.good_rev.commit_hash |
| 374 bad_hash = self.bad_rev.commit_hash | 381 bad_hash = self.bad_rev.commit_hash |
| 382 depot = self.good_rev.depot_name | |
| 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) | 383 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) |
| 376 revisions = self._revision_range( | 384 revisions = self._revision_range( |
| 377 start=good_hash, | 385 start=good_hash, |
| 378 end=bad_hash, | 386 end=bad_hash, |
| 379 depot_name=self.base_depot, | 387 depot_name=self.base_depot, |
| 380 step_name=step_name) | 388 step_name=step_name, |
| 389 step_test_data=lambda: self.api._test_data['revision_list'][depot] | |
| 390 ) | |
| 381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] | 391 self.revisions = [self.good_rev] + revisions + [self.bad_rev] |
| 382 self._update_revision_list_indexes() | 392 self._update_revision_list_indexes() |
| 383 | 393 |
| 384 def _revision_range(self, start, end, depot_name, base_revision=None, | 394 def _revision_range(self, start, end, depot_name, base_revision=None, |
| 385 step_name=None): | 395 step_name=None, **kwargs): |
| 386 """Returns a list of RevisionState objects between |start| and |end|. | 396 """Returns a list of RevisionState objects between |start| and |end|. |
| 387 | 397 |
| 388 Args: | 398 Args: |
| 389 start (str): Start commit hash. | 399 start (str): Start commit hash. |
| 390 end (str): End commit hash. | 400 end (str): End commit hash. |
| 391 depot_name (str): Short string name of repo, e.g. chromium or v8. | 401 depot_name (str): Short string name of repo, e.g. chromium or v8. |
| 392 base_revision (str): Base revision in the downstream repo (e.g. chromium). | 402 base_revision (str): Base revision in the downstream repo (e.g. chromium). |
| 393 step_name (str): Optional step name. | 403 step_name (str): Optional step name. |
| 394 | 404 |
| 395 Returns: | 405 Returns: |
| 396 A list of RevisionState objects, not including the given start or end. | 406 A list of RevisionState objects, not including the given start or end. |
| 397 """ | 407 """ |
| 398 if self.internal_bisect: # pragma: no cover | 408 if self.internal_bisect: # pragma: no cover |
| 399 return self._revision_range_with_gitiles( | 409 return self._revision_range_with_gitiles( |
| 400 start, end, depot_name, base_revision, step_name) | 410 start, end, depot_name, base_revision, step_name) |
| 401 try: | 411 try: |
| 402 step_result = self.api.m.python( | 412 step_result = self.api.m.python( |
| 403 step_name, | 413 step_name, |
| 404 self.api.resource('fetch_intervening_revisions.py'), | 414 self.api.resource('fetch_intervening_revisions.py'), |
| 405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], | 415 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], |
| 406 stdout=self.api.m.json.output()) | 416 stdout=self.api.m.json.output(), **kwargs) |
| 407 except self.api.m.step.StepFailure: # pragma: no cover | 417 except self.api.m.step.StepFailure: # pragma: no cover |
| 408 self.surface_result('BAD_REV') | 418 self.surface_result('BAD_REV') |
| 409 raise | 419 raise |
| 410 revisions = [] | 420 revisions = [] |
| 411 for commit_hash, _ in step_result.stdout: | 421 for commit_hash, _ in step_result.stdout: |
| 412 revisions.append(self.revision_class( | 422 revisions.append(self.revision_class( |
| 413 bisector=self, | 423 bisector=self, |
| 414 commit_hash=commit_hash, | 424 commit_hash=commit_hash, |
| 415 depot_name=depot_name, | 425 depot_name=depot_name, |
| 416 base_revision=base_revision)) | 426 base_revision=base_revision)) |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 502 dep_revision_max = max_revision.deps[depot_name] | 512 dep_revision_max = max_revision.deps[depot_name] |
| 503 if (dep_revision_min and dep_revision_max and | 513 if (dep_revision_min and dep_revision_max and |
| 504 dep_revision_min != dep_revision_max): | 514 dep_revision_min != dep_revision_max): |
| 505 step_name = ('Expanding revision range for revision %s' | 515 step_name = ('Expanding revision range for revision %s' |
| 506 ' on depot %s' % (dep_revision_max, depot_name)) | 516 ' on depot %s' % (dep_revision_max, depot_name)) |
| 507 rev_list = self._revision_range( | 517 rev_list = self._revision_range( |
| 508 start=dep_revision_min, | 518 start=dep_revision_min, |
| 509 end=dep_revision_max, | 519 end=dep_revision_max, |
| 510 depot_name=depot_name, | 520 depot_name=depot_name, |
| 511 base_revision=min_revision, | 521 base_revision=min_revision, |
| 512 step_name=step_name) | 522 step_name=step_name, |
| 523 step_test_data=lambda: | |
| 524 self.api._test_data['revision_list'][depot_name]) | |
| 513 new_revisions = self.revisions[:max_revision.list_index] | 525 new_revisions = self.revisions[:max_revision.list_index] |
| 514 new_revisions += rev_list | 526 new_revisions += rev_list |
| 515 new_revisions += self.revisions[max_revision.list_index:] | 527 new_revisions += self.revisions[max_revision.list_index:] |
| 516 self.revisions = new_revisions | 528 self.revisions = new_revisions |
| 517 self._update_revision_list_indexes() | 529 self._update_revision_list_indexes() |
| 518 return True | 530 return True |
| 519 except RuntimeError: # pragma: no cover | 531 except RuntimeError: # pragma: no cover |
| 520 warning_text = ('Could not expand dependency revisions for ' + | 532 warning_text = ('Could not expand dependency revisions for ' + |
| 521 revision_to_expand.commit_hash) | 533 revision_to_expand.commit_hash) |
| 522 self.surface_result('BAD_REV') | 534 self.surface_result('BAD_REV') |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 539 | 551 |
| 540 The change between the test results obtained for the given 'good' and | 552 The change between the test results obtained for the given 'good' and |
| 541 'bad' revisions is expected to be considered a regression. The | 553 'bad' revisions is expected to be considered a regression. The |
| 542 `improvement_direction` attribute is positive if a larger number is | 554 `improvement_direction` attribute is positive if a larger number is |
| 543 considered better, and negative if a smaller number is considered better. | 555 considered better, and negative if a smaller number is considered better. |
| 544 | 556 |
| 545 Returns: | 557 Returns: |
| 546 True if the check passes (i.e. no problem), False if the change is not | 558 True if the check passes (i.e. no problem), False if the change is not |
| 547 a regression according to the improvement direction. | 559 a regression according to the improvement direction. |
| 548 """ | 560 """ |
| 549 good = self.good_rev.mean_value | 561 good = self.good_rev.mean |
| 550 bad = self.bad_rev.mean_value | 562 bad = self.bad_rev.mean |
| 551 | 563 |
| 552 if self.is_return_code_mode(): | 564 if self.is_return_code_mode(): |
| 553 return True | 565 return True |
| 554 | 566 |
| 555 direction = self.improvement_direction | 567 direction = self.improvement_direction |
| 556 if direction is None: | 568 if direction is None: |
| 557 return True | 569 return True |
| 558 if (bad > good and direction > 0) or (bad < good and direction < 0): | 570 if (bad > good and direction > 0) or (bad < good and direction < 0): |
| 559 self._set_failed_direction_results() | 571 self._set_failed_direction_results() |
| 560 return False | 572 return False |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 580 | 592 |
| 581 Returns: True if the revisions produced results that differ from each | 593 Returns: True if the revisions produced results that differ from each |
| 582 other in a statistically significant manner. False if such difference could | 594 other in a statistically significant manner. False if such difference could |
| 583 not be established in the time or sample size allowed. | 595 not be established in the time or sample size allowed. |
| 584 """ | 596 """ |
| 585 if self.test_type == 'return_code': | 597 if self.test_type == 'return_code': |
| 586 return (self.good_rev.overall_return_code != | 598 return (self.good_rev.overall_return_code != |
| 587 self.bad_rev.overall_return_code) | 599 self.bad_rev.overall_return_code) |
| 588 | 600 |
| 589 if self.bypass_stats_check: | 601 if self.bypass_stats_check: |
| 590 dummy_result = self.good_rev.values != self.bad_rev.values | 602 self.compare_revisions(self.good_rev, self.bad_rev) |
| 603 dummy_result = self.good_rev.mean != self.bad_rev.mean | |
| 591 if not dummy_result: | 604 if not dummy_result: |
| 592 self._set_insufficient_confidence_warning() | 605 self._set_insufficient_confidence_warning() |
| 593 return dummy_result | 606 return dummy_result |
| 594 | 607 |
| 608 # TODO(robertocn): This step should not be necessary in some cases. | |
| 595 with self.api.m.step.nest('Re-testing reference range'): | 609 with self.api.m.step.nest('Re-testing reference range'): |
| 596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT | 610 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT |
| 597 while time.time() < expiration_time: | 611 while time.time() < expiration_time: |
| 598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: | 612 if (self.good_rev.test_run_count >= 5 |
| 599 if self.significantly_different(self.good_rev.values, | 613 and self.bad_rev.test_run_count >= 5): |
| 600 self.bad_rev.values): | 614 if self.compare_revisions(self.good_rev, self.bad_rev): |
| 601 return True | 615 return True |
| 602 if len(self.good_rev.values) == len(self.bad_rev.values): | 616 if self.good_rev.test_run_count == self.bad_rev.test_run_count: |
| 603 revision_to_retest = self.last_tested_revision | 617 revision_to_retest = self.last_tested_revision |
| 604 else: | 618 else: |
| 605 revision_to_retest = min(self.good_rev, self.bad_rev, | 619 revision_to_retest = min(self.good_rev, self.bad_rev, |
| 606 key=lambda x: len(x.values)) | 620 key=lambda x: x.test_run_count) |
| 607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: | |
| 608 revision_to_retest.retest() | |
| 609 else: | |
| 610 break | |
| 611 self._set_insufficient_confidence_warning() | 621 self._set_insufficient_confidence_warning() |
| 612 return False | 622 return False |
| 613 | 623 |
| 614 | 624 |
| 615 def get_exception(self): | 625 def get_exception(self): |
| 616 raise NotImplementedError() # pragma: no cover | 626 raise NotImplementedError() # pragma: no cover |
| 617 # TODO: should return an exception with the details of the failure. | 627 # TODO: should return an exception with the details of the failure. |
| 618 | 628 |
| 619 def _set_insufficient_confidence_warning( | 629 def _set_insufficient_confidence_warning( |
| 620 self): # pragma: no cover | 630 self): # pragma: no cover |
| 621 """Adds a warning about the lack of initial regression confidence.""" | 631 """Adds a warning about the lack of initial regression confidence.""" |
| 622 self.failed_initial_confidence = True | 632 self.failed_initial_confidence = True |
| 623 self.surface_result('LO_INIT_CONF') | 633 self.surface_result('LO_INIT_CONF') |
| 624 self.warnings.append( | 634 self.warnings.append( |
| 625 'Bisect failed to reproduce the regression with enough confidence.') | 635 'Bisect failed to reproduce the regression with enough confidence.') |
| 626 | 636 |
| 627 def _results_debug_message(self): | 637 def _results_debug_message(self): |
| 628 """Returns a string with values used to debug a bisect result.""" | 638 """Returns a string with values used to debug a bisect result.""" |
| 629 result = 'bisector.lkgr: %r\n' % self.lkgr | 639 result = 'bisector.lkgr: %r\n' % self.lkgr |
| 630 result += 'bisector.fkbr: %r\n\n' % self.fkbr | 640 result += 'bisector.fkbr: %r\n\n' % self.fkbr |
| 631 result += self._revision_value_table() | 641 result += self._revision_value_table() |
| 632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): | 642 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and |
| 633 result += '\n' + self._t_test_results() | 643 self.fkbr.test_run_count): |
| 644 result += '\n' + '\n'.join([ | |
| 645 'LKGR values: %r' % list(self.lkgr.debug_values), | |
| 646 'FKBR values: %r' % list(self.fkbr.debug_values), | |
| 647 ]) | |
| 634 return result | 648 return result |
| 635 | 649 |
| 636 def _revision_value_table(self): | 650 def _revision_value_table(self): |
| 637 """Returns a string table showing revisions and their values.""" | 651 """Returns a string table showing revisions and their values.""" |
| 638 header = [['Revision', 'Values']] | 652 header = [['Revision', 'Values']] |
| 639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] | 653 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions] |
| 640 return self._pretty_table(header + rows) | 654 return self._pretty_table(header + rows) |
| 641 | 655 |
| 642 def _pretty_table(self, data): | 656 def _pretty_table(self, data): |
| 643 results = [] | 657 results = [] |
| 644 for row in data: | 658 for row in data: |
| 645 results.append('%-15s' * len(row) % tuple(row)) | 659 results.append('%-15s' * len(row) % tuple(row)) |
| 646 return '\n'.join(results) | 660 return '\n'.join(results) |
| 647 | 661 |
| 648 def _t_test_results(self): | |
| 649 """Returns a string showing t-test results for lkgr and fkbr.""" | |
| 650 t, df, p = self.api.m.math_utils.welchs_t_test( | |
| 651 self.lkgr.values, self.fkbr.values) | |
| 652 lines = [ | |
| 653 'LKGR values: %r' % self.lkgr.values, | |
| 654 'FKBR values: %r' % self.fkbr.values, | |
| 655 't-statistic: %r' % t, | |
| 656 'deg. of freedom: %r' % df, | |
| 657 'p-value: %r' % p, | |
| 658 'Confidence score: %r' % (100 * (1 - p)) | |
| 659 ] | |
| 660 return '\n'.join(lines) | |
| 661 | |
| 662 def print_result_debug_info(self): | 662 def print_result_debug_info(self): |
| 663 """Prints extra debug info at the end of the bisect process.""" | 663 """Prints extra debug info at the end of the bisect process.""" |
| 664 lines = self._results_debug_message().splitlines() | 664 lines = self._results_debug_message().splitlines() |
| 665 # If we emit a null step then add a log to it, the log should be kept | 665 # If we emit a null step then add a log to it, the log should be kept |
| 666 # longer than 7 days (which is often needed to debug some issues). | 666 # longer than 7 days (which is often needed to debug some issues). |
| 667 self.api.m.step('Debug Info', []) | 667 self.api.m.step('Debug Info', []) |
| 668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines | 668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines |
| 669 | 669 |
| 670 def post_result(self, halt_on_failure=False): | 670 def post_result(self, halt_on_failure=False): |
| 671 """Posts bisect results to Perf Dashboard.""" | 671 """Posts bisect results to Perf Dashboard.""" |
| 672 self.api.m.perf_dashboard.set_default_config() | 672 self.api.m.perf_dashboard.set_default_config() |
| 673 self.api.m.perf_dashboard.post_bisect_results( | 673 self.api.m.perf_dashboard.post_bisect_results( |
| 674 self.get_result(), halt_on_failure) | 674 self.get_result(), halt_on_failure) |
| 675 | 675 |
| 676 def get_revision_to_eval(self): | 676 def get_revision_to_eval(self): |
| 677 """Gets the next RevisionState object in the candidate range. | 677 """Gets the next RevisionState object in the candidate range. |
| 678 | 678 |
| 679 Returns: | 679 Returns: |
| 680 The next Revision object in a list. | 680 The next Revision object in a list. |
| 681 """ | 681 """ |
| 682 self._update_candidate_range() | 682 self._update_candidate_range() |
| 683 candidate_range = [revision for revision in | 683 candidate_range = [revision for revision in |
| 684 self.revisions[self.lkgr.list_index + 1: | 684 self.revisions[self.lkgr.list_index + 1: |
| 685 self.fkbr.list_index] | 685 self.fkbr.list_index] |
| 686 if not revision.tested and not revision.failed] | 686 if not revision.failed] |
| 687 if len(candidate_range) == 1: | 687 if len(candidate_range) == 1: |
| 688 return candidate_range[0] | 688 return candidate_range[0] |
| 689 if len(candidate_range) == 0: | 689 if len(candidate_range) == 0: |
| 690 return None | 690 return None |
| 691 | 691 |
| 692 default_revision = candidate_range[len(candidate_range) / 2] | 692 default_revision = candidate_range[len(candidate_range) / 2] |
| 693 | 693 |
| 694 with self.api.m.step.nest( | 694 with self.api.m.step.nest( |
| 695 'Wiggling revision ' + default_revision.revision_string()): | 695 'Wiggling revision ' + default_revision.revision_string()): |
| 696 # We'll search up to 25% of the range (in either direction) to try and | 696 # We'll search up to 25% of the range (in either direction) to try and |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 733 return True | 733 return True |
| 734 if (revision.good and revision.next_revision and | 734 if (revision.good and revision.next_revision and |
| 735 revision.next_revision.bad): | 735 revision.next_revision.bad): |
| 736 if (revision.next_revision.deps_change() | 736 if (revision.next_revision.deps_change() |
| 737 and self._expand_deps_revisions(revision.next_revision)): | 737 and self._expand_deps_revisions(revision.next_revision)): |
| 738 return False | 738 return False |
| 739 self.culprit = revision.next_revision | 739 self.culprit = revision.next_revision |
| 740 return True | 740 return True |
| 741 return False | 741 return False |
| 742 | 742 |
| 743 def wait_for_all(self, revision_list): | |
| 744 """Waits for all revisions in list to finish.""" | |
| 745 for r in revision_list: | |
| 746 self.wait_for(r) | |
| 747 | |
| 748 def wait_for(self, revision, nest_check=True): | |
| 749 """Waits for the revision to finish its job.""" | |
| 750 if nest_check and not self.flags.get( | |
| 751 'do_not_nest_wait_for_revision'): # pragma: no cover | |
| 752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()): | |
| 753 return self.wait_for(revision, nest_check=False) | |
| 754 while True: | |
| 755 revision.update_status() | |
| 756 if revision.in_progress: | |
| 757 self.api.m.python.inline( | |
| 758 'sleeping', | |
| 759 """ | |
| 760 import sys | |
| 761 import time | |
| 762 time.sleep(20*60) | |
| 763 sys.exit(0) | |
| 764 """) | |
| 765 else: | |
| 766 break | |
| 767 | |
| 768 def _update_candidate_range(self): | 743 def _update_candidate_range(self): |
| 769 """Updates lkgr and fkbr (last known good/first known bad) revisions. | 744 """Updates lkgr and fkbr (last known good/first known bad) revisions. |
| 770 | 745 |
| 771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in | 746 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in |
| 772 bisectors.revisions.""" | 747 bisectors.revisions.""" |
| 773 for r in self.revisions: | 748 for r in self.revisions: |
| 774 if r.tested: | 749 if r.test_run_count: |
| 775 if r.good: | 750 if r.good: |
| 776 self.lkgr = r | 751 self.lkgr = r |
| 777 elif r.bad: | 752 elif r.bad: |
| 778 self.fkbr = r | 753 self.fkbr = r |
| 779 break | 754 break |
| 780 assert self.lkgr and self.fkbr | 755 assert self.lkgr and self.fkbr |
| 781 | 756 |
| 782 def get_perf_tester_name(self): | 757 def get_perf_tester_name(self): |
| 783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. | 758 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. |
| 784 | 759 |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 869 assert len(result_code) <= 20 | 844 assert len(result_code) <= 20 |
| 870 if result_code not in self.result_codes: | 845 if result_code not in self.result_codes: |
| 871 self.result_codes.add(result_code) | 846 self.result_codes.add(result_code) |
| 872 properties = self.api.m.step.active_result.presentation.properties | 847 properties = self.api.m.step.active_result.presentation.properties |
| 873 properties['extra_result_code'] = sorted(self.result_codes) | 848 properties['extra_result_code'] = sorted(self.result_codes) |
| 874 | 849 |
| 875 def get_result(self): | 850 def get_result(self): |
| 876 """Returns the results as a jsonable object.""" | 851 """Returns the results as a jsonable object.""" |
| 877 config = self.bisect_config | 852 config = self.bisect_config |
| 878 results_confidence = 0 | 853 results_confidence = 0 |
| 879 if self.culprit: | |
| 880 results_confidence = self.api.m.math_utils.confidence_score( | |
| 881 self.lkgr.values, self.fkbr.values) | |
| 882 | 854 |
| 883 if self.failed: | 855 if self.failed: |
| 884 status = 'failed' | 856 status = 'failed' |
| 885 elif self.bisect_over: | 857 elif self.bisect_over: |
| 886 status = 'completed' | 858 status = 'completed' |
| 887 else: | 859 else: |
| 888 status = 'started' | 860 status = 'started' |
| 889 | 861 |
| 890 aborted_reason = None | 862 aborted_reason = None |
| 891 if self.failed_initial_confidence: | 863 if self.failed_initial_confidence: |
| 892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON | 864 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON |
| 893 elif self.failed_direction: | 865 elif self.failed_direction: |
| 894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON | 866 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON |
| 895 return { | 867 return { |
| 896 'try_job_id': config.get('try_job_id'), | 868 'try_job_id': config.get('try_job_id'), |
| 897 'bug_id': config.get('bug_id'), | 869 'bug_id': config.get('bug_id'), |
| 898 'status': status, | 870 'status': status, |
| 899 'buildbot_log_url': self._get_build_url(), | 871 'buildbot_log_url': self._get_build_url(), |
| 900 'bisect_bot': self.get_perf_tester_name(), | 872 'bisect_bot': self.get_perf_tester_name(), |
| 901 'command': config['command'], | 873 'command': config['command'], |
| 902 'test_type': config['test_type'], | 874 'test_type': config['test_type'], |
| 903 'metric': config['metric'], | 875 'metric': config['metric'], |
| 904 'change': self.relative_change, | 876 'change': self.relative_change, |
| 905 'score': results_confidence, | |
| 906 'good_revision': self.good_rev.commit_hash, | 877 'good_revision': self.good_rev.commit_hash, |
| 907 'bad_revision': self.bad_rev.commit_hash, | 878 'bad_revision': self.bad_rev.commit_hash, |
| 908 'warnings': self.warnings, | 879 'warnings': self.warnings, |
| 909 'aborted_reason': aborted_reason, | 880 'aborted_reason': aborted_reason, |
| 910 'culprit_data': self._culprit_data(), | 881 'culprit_data': self._culprit_data(), |
| 911 'revision_data': self._revision_data() | 882 'revision_data': self._revision_data() |
| 912 } | 883 } |
| 913 | 884 |
| 914 def _culprit_data(self): | 885 def _culprit_data(self): |
| 915 culprit = self.culprit | 886 culprit = self.culprit |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 927 'email': culprit_info['email'], | 898 'email': culprit_info['email'], |
| 928 'cl_date': culprit_info['date'], | 899 'cl_date': culprit_info['date'], |
| 929 'commit_info': culprit_info['body'], | 900 'commit_info': culprit_info['body'], |
| 930 'revisions_links': [], | 901 'revisions_links': [], |
| 931 'cl': culprit.commit_hash | 902 'cl': culprit.commit_hash |
| 932 } | 903 } |
| 933 | 904 |
| 934 def _revision_data(self): | 905 def _revision_data(self): |
| 935 revision_rows = [] | 906 revision_rows = [] |
| 936 for r in self.revisions: | 907 for r in self.revisions: |
| 937 if r.tested or r.aborted: | 908 if r.test_run_count: |
| 938 revision_rows.append({ | 909 revision_rows.append({ |
| 939 'depot_name': r.depot_name, | 910 'depot_name': r.depot_name, |
| 940 'commit_hash': r.commit_hash, | 911 'commit_hash': r.commit_hash, |
| 941 'revision_string': r.revision_string(), | 912 'revision_string': r.revision_string(), |
| 942 'mean_value': r.mean_value, | 913 'mean_value': r.mean, |
| 943 'std_dev': r.std_dev, | 914 'std_dev': r.std_dev, |
| 944 'values': r.values, | 915 'values': r.debug_values, |
| 945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', | 916 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', |
| 946 }) | 917 }) |
| 947 return revision_rows | 918 return revision_rows |
| 948 | 919 |
| 949 def _get_build_url(self): | 920 def _get_build_url(self): |
| 950 properties = self.api.m.properties | 921 properties = self.api.m.properties |
| 951 bot_url = properties.get('buildbotURL', | 922 bot_url = properties.get('buildbotURL', |
| 952 'http://build.chromium.org/p/chromium/') | 923 'http://build.chromium.org/p/chromium/') |
| 953 builder_name = urllib.quote(properties.get('buildername', '')) | 924 builder_name = urllib.quote(properties.get('buildername', '')) |
| 954 builder_number = str(properties.get('buildnumber', '')) | 925 builder_number = str(properties.get('buildnumber', '')) |
| 955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) | 926 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) |
| OLD | NEW |