OLD | NEW |
---|---|
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import json | 5 import json |
6 import re | 6 import re |
7 import time | 7 import time |
8 import urllib | 8 import urllib |
9 | 9 |
10 from . import config_validation | 10 from . import config_validation |
(...skipping 29 matching lines...) Expand all Loading... | |
40 ) | 40 ) |
41 | 41 |
42 # When we look for the next revision to build, we search nearby revisions | 42 # When we look for the next revision to build, we search nearby revisions |
43 # looking for a revision that's already been archived. Since we don't want | 43 # looking for a revision that's already been archived. Since we don't want |
44 # to move *too* far from the original revision, we'll cap the search at 25%. | 44 # to move *too* far from the original revision, we'll cap the search at 25%. |
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 | 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 |
46 | 46 |
47 # How long to re-test the initial good-bad range for until significant | 47 # How long to re-test the initial good-bad range for until significant |
48 # difference is established. | 48 # difference is established. |
49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 | 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 |
50 # If we reach this number of samples on the reference range and have not | |
51 # achieved statistical significance, bail. | |
52 MAX_REQUIRED_SAMPLES = 15 | |
53 | |
54 # Significance level to use for determining difference between revisions via | |
55 # hypothesis testing. | |
56 SIGNIFICANCE_LEVEL = 0.01 | |
57 | 50 |
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( | 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( |
59 'The metric values for the initial "good" and "bad" revisions ' | 52 'The metric values for the initial "good" and "bad" revisions ' |
60 'do not represent a clear regression.') | 53 'do not represent a clear regression.') |
61 | 54 |
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( | 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( |
63 'The metric values for the initial "good" and "bad" revisions match the ' | 56 'The metric values for the initial "good" and "bad" revisions match the ' |
64 'expected direction of improvement. Thus, likely represent an improvement ' | 57 'expected direction of improvement. Thus, likely represent an improvement ' |
65 'and not a regression.') | 58 'and not a regression.') |
66 | 59 |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 | 141 |
149 Returns: | 142 Returns: |
150 A 40-digit git commit hash string. | 143 A 40-digit git commit hash string. |
151 """ | 144 """ |
152 if self._is_sha1(rev): # pragma: no cover | 145 if self._is_sha1(rev): # pragma: no cover |
153 return rev | 146 return rev |
154 if rev.isdigit(): | 147 if rev.isdigit(): |
155 commit_position = self._api.m.commit_position.construct( | 148 commit_position = self._api.m.commit_position.construct( |
156 branch='refs/heads/master', value=rev) | 149 branch='refs/heads/master', value=rev) |
157 try: | 150 try: |
158 return self._api.m.crrev.to_commit_hash(commit_position) | 151 return self._api.m.crrev.to_commit_hash( |
152 commit_position, | |
153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev]) | |
159 except self.api.m.step.StepFailure: # pragma: no cover | 154 except self.api.m.step.StepFailure: # pragma: no cover |
160 self.surface_result('BAD_REV') | 155 self.surface_result('BAD_REV') |
161 raise | 156 raise |
162 self.surface_result('BAD_REV') # pragma: no cover | 157 self.surface_result('BAD_REV') # pragma: no cover |
163 raise self.api.m.step.StepFailure( | 158 raise self.api.m.step.StepFailure( |
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover | 159 'Invalid input revision: %r' % (rev,)) # pragma: no cover |
165 | 160 |
166 @staticmethod | 161 @staticmethod |
167 def _is_sha1(s): | 162 def _is_sha1(s): |
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) | 163 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) |
169 | 164 |
170 def significantly_different( | 165 def compare_revisions(self, revision_a, revision_b): |
171 self, list_a, list_b, | 166 """ |
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover | 167 Returns: |
173 """Uses an external script to run hypothesis testing with scipy. | 168 True if the samples are significantly different. |
169 None if there is not enough data to tell. | |
170 False if there's enough data but still can't tell the samples apart. | |
171 """ | |
172 output_format = 'chartjson' | |
173 values_a = revision_a.chartjson_paths | |
174 values_b = revision_b.chartjson_paths | |
175 if revision_a.valueset_paths and revision_b.valueset_paths: | |
176 output_format = 'valueset' | |
174 | 177 |
175 The reason why we need an external script is that scipy is not available to | 178 result = self.api.stat_compare( |
176 the default python installed in all platforms. We instead rely on an | 179 values_a, |
177 anaconda environment to provide those packages. | 180 values_b, |
181 self.bisect_config['metric'], | |
182 output_format=output_format, | |
183 step_test_data=lambda: self.api.test_api.compare_samples_data( | |
184 self.api._test_data.get('revision_data'), revision_a, revision_b)) | |
178 | 185 |
179 Args: | 186 revision_a.debug_values = result['sample_a']['debug_values'] |
180 list_a, list_b: Two lists representing samples to be compared. | 187 revision_b.debug_values = result['sample_b']['debug_values'] |
181 significance_level: Self-describing. As a decimal fraction. | 188 revision_a.mean = result['sample_a']['mean'] |
RobertoCN
2016/08/23 00:26:24
Make mean and std_dev properties of revision_state
RobertoCN
2016/09/07 00:33:24
Done.
| |
189 revision_b.mean = result['sample_b']['mean'] | |
190 revision_a.std_dev = result['sample_a']['std_dev'] | |
191 revision_b.std_dev = result['sample_b']['std_dev'] | |
182 | 192 |
183 Returns: | 193 if result['result'] == 'needMoreData': |
RobertoCN
2016/08/23 00:26:24
Make 3 constants for true, false and needMore.
RobertoCN
2016/09/07 00:33:24
Done.
| |
184 A boolean indicating whether the null hypothesis ~(that the lists are | 194 return None |
185 samples from the same population) can be rejected at the specified | 195 return bool(result['result']) |
186 significance level. | |
187 """ | |
188 step_result = self.api.m.python( | |
189 'Checking sample difference', | |
190 self.api.resource('significantly_different.py'), | |
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)], | |
192 stdout=self.api.m.json.output()) | |
193 results = step_result.stdout | |
194 if results is None: | |
195 assert self.dummy_builds | |
196 return True | |
197 significantly_different = results['significantly_different'] | |
198 step_result.presentation.logs[str(significantly_different)] = [ | |
199 'See json.output for details'] | |
200 return significantly_different | |
201 | 196 |
202 def config_step(self): | 197 def config_step(self): |
203 """Yields a step that prints the bisect config.""" | 198 """Yields a step that prints the bisect config.""" |
204 api = self.api | 199 api = self.api |
205 | 200 |
206 # bisect_config may come as a FrozenDict (which is not serializable). | 201 # bisect_config may come as a FrozenDict (which is not serializable). |
207 bisect_config = dict(self.bisect_config) | 202 bisect_config = dict(self.bisect_config) |
208 | 203 |
209 def fix_windows_backslashes(s): | 204 def fix_windows_backslashes(s): |
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') | 205 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') |
(...skipping 17 matching lines...) Expand all Loading... | |
228 except config_validation.ValidationFail as error: | 223 except config_validation.ValidationFail as error: |
229 self.surface_result('BAD_CONFIG') | 224 self.surface_result('BAD_CONFIG') |
230 self.api.m.halt(error.message) | 225 self.api.m.halt(error.message) |
231 raise self.api.m.step.StepFailure(error.message) | 226 raise self.api.m.step.StepFailure(error.message) |
232 | 227 |
233 @property | 228 @property |
234 def api(self): | 229 def api(self): |
235 return self._api | 230 return self._api |
236 | 231 |
237 def compute_relative_change(self): | 232 def compute_relative_change(self): |
238 old_value = float(self.good_rev.mean_value) | 233 old_value = float(self.good_rev.mean) |
239 new_value = float(self.bad_rev.mean_value) | 234 new_value = float(self.bad_rev.mean) |
240 | 235 |
241 if new_value and not old_value: # pragma: no cover | 236 if new_value and not old_value: # pragma: no cover |
242 self.relative_change = ZERO_TO_NON_ZERO | 237 self.relative_change = ZERO_TO_NON_ZERO |
243 return | 238 return |
244 | 239 |
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) | 240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) |
246 self.relative_change = '%.2f%%' % (100 * rel_change) | 241 self.relative_change = '%.2f%%' % (100 * rel_change) |
247 | 242 |
248 def make_deps_sha_file(self, deps_sha): | 243 def make_deps_sha_file(self, deps_sha): |
249 """Make a diff patch that creates DEPS.sha. | 244 """Make a diff patch that creates DEPS.sha. |
(...skipping 16 matching lines...) Expand all Loading... | |
266 file is to be written to. | 261 file is to be written to. |
267 commit_hash (str): An identifier for the step. | 262 commit_hash (str): An identifier for the step. |
268 | 263 |
269 Returns: | 264 Returns: |
270 A string containing the hash of the interned object. | 265 A string containing the hash of the interned object. |
271 """ | 266 """ |
272 cmd = 'hash-object -t blob -w --stdin'.split(' ') | 267 cmd = 'hash-object -t blob -w --stdin'.split(' ') |
273 stdin = self.api.m.raw_io.input(file_contents) | 268 stdin = self.api.m.raw_io.input(file_contents) |
274 stdout = self.api.m.raw_io.output() | 269 stdout = self.api.m.raw_io.output() |
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash | 270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash |
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, | 271 step_result = self.api.m.git( |
277 name=step_name) | 272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name, |
273 step_test_data=lambda: | |
274 self.api.m.raw_io.test_api.stream_output(commit_hash)) | |
278 hash_string = step_result.stdout.splitlines()[0] | 275 hash_string = step_result.stdout.splitlines()[0] |
279 try: | 276 try: |
280 if hash_string: | 277 if hash_string: |
281 int(hash_string, 16) | 278 int(hash_string, 16) |
282 return hash_string | 279 return hash_string |
283 except ValueError: # pragma: no cover | 280 except ValueError: # pragma: no cover |
284 reason = 'Git did not output a valid hash for the interned file.' | 281 reason = 'Git did not output a valid hash for the interned file.' |
285 self.api.m.halt(reason) | 282 self.api.m.halt(reason) |
286 raise self.api.m.step.StepFailure(reason) | 283 raise self.api.m.step.StepFailure(reason) |
287 | 284 |
(...skipping 14 matching lines...) Expand all Loading... | |
302 Returns: | 299 Returns: |
303 A string containing the diff patch as produced by the 'git diff' command. | 300 A string containing the diff patch as produced by the 'git diff' command. |
304 """ | 301 """ |
305 # The prefixes used in the command below are used to find and replace the | 302 # The prefixes used in the command below are used to find and replace the |
306 # tree-ish git object id's on the diff output more easily. | 303 # tree-ish git object id's on the diff output more easily. |
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' | 304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' |
308 cmd %= (git_object_a, git_object_b) | 305 cmd %= (git_object_a, git_object_b) |
309 cmd = cmd.split(' ') | 306 cmd = cmd.split(' ') |
310 stdout = self.api.m.raw_io.output() | 307 stdout = self.api.m.raw_io.output() |
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) | 308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) |
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) | 309 step_result = self.api.m.git( |
310 *cmd, cwd=cwd, stdout=stdout, name=step_name, | |
311 step_test_data=lambda: self.api._test_data['diff_patch']) | |
313 patch_text = step_result.stdout | 312 patch_text = step_result.stdout |
314 src_string = 'IAMSRC:' + git_object_a | 313 src_string = 'IAMSRC:' + git_object_a |
315 dst_string = 'IAMDST:' + git_object_b | 314 dst_string = 'IAMDST:' + git_object_b |
316 patch_text = patch_text.replace(src_string, src_alias) | 315 patch_text = patch_text.replace(src_string, src_alias) |
317 patch_text = patch_text.replace(dst_string, dst_alias) | 316 patch_text = patch_text.replace(dst_string, dst_alias) |
318 return patch_text | 317 return patch_text |
319 | 318 |
320 def make_deps_patch(self, base_revision, base_file_contents, | 319 def make_deps_patch(self, base_revision, base_file_contents, |
321 depot, new_commit_hash): | 320 depot, new_commit_hash): |
322 """Make a diff patch that updates a specific dependency revision. | 321 """Make a diff patch that updates a specific dependency revision. |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
365 interned_deps_hash, deps_file, deps_file, | 364 interned_deps_hash, deps_file, deps_file, |
366 cwd=cwd, | 365 cwd=cwd, |
367 deps_rev=new_commit_hash) | 366 deps_rev=new_commit_hash) |
368 return patch_text, patched_contents | 367 return patch_text, patched_contents |
369 | 368 |
370 def _expand_initial_revision_range(self): | 369 def _expand_initial_revision_range(self): |
371 """Sets the initial contents of |self.revisions|.""" | 370 """Sets the initial contents of |self.revisions|.""" |
372 with self.api.m.step.nest('Expanding revision range'): | 371 with self.api.m.step.nest('Expanding revision range'): |
373 good_hash = self.good_rev.commit_hash | 372 good_hash = self.good_rev.commit_hash |
374 bad_hash = self.bad_rev.commit_hash | 373 bad_hash = self.bad_rev.commit_hash |
374 depot = self.good_rev.depot_name | |
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) | 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) |
376 revisions = self._revision_range( | 376 revisions = self._revision_range( |
377 start=good_hash, | 377 start=good_hash, |
378 end=bad_hash, | 378 end=bad_hash, |
379 depot_name=self.base_depot, | 379 depot_name=self.base_depot, |
380 step_name=step_name) | 380 step_name=step_name, |
381 step_test_data=lambda: self.api._test_data['revision_list'][depot] | |
382 ) | |
381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] | 383 self.revisions = [self.good_rev] + revisions + [self.bad_rev] |
382 self._update_revision_list_indexes() | 384 self._update_revision_list_indexes() |
383 | 385 |
384 def _revision_range(self, start, end, depot_name, base_revision=None, | 386 def _revision_range(self, start, end, depot_name, base_revision=None, |
385 step_name=None): | 387 step_name=None, **kwargs): |
386 """Returns a list of RevisionState objects between |start| and |end|. | 388 """Returns a list of RevisionState objects between |start| and |end|. |
387 | 389 |
388 Args: | 390 Args: |
389 start (str): Start commit hash. | 391 start (str): Start commit hash. |
390 end (str): End commit hash. | 392 end (str): End commit hash. |
391 depot_name (str): Short string name of repo, e.g. chromium or v8. | 393 depot_name (str): Short string name of repo, e.g. chromium or v8. |
392 base_revision (str): Base revision in the downstream repo (e.g. chromium). | 394 base_revision (str): Base revision in the downstream repo (e.g. chromium). |
393 step_name (str): Optional step name. | 395 step_name (str): Optional step name. |
394 | 396 |
395 Returns: | 397 Returns: |
396 A list of RevisionState objects, not including the given start or end. | 398 A list of RevisionState objects, not including the given start or end. |
397 """ | 399 """ |
398 if self.internal_bisect: # pragma: no cover | 400 if self.internal_bisect: # pragma: no cover |
399 return self._revision_range_with_gitiles( | 401 return self._revision_range_with_gitiles( |
400 start, end, depot_name, base_revision, step_name) | 402 start, end, depot_name, base_revision, step_name) |
401 try: | 403 try: |
402 step_result = self.api.m.python( | 404 step_result = self.api.m.python( |
403 step_name, | 405 step_name, |
404 self.api.resource('fetch_intervening_revisions.py'), | 406 self.api.resource('fetch_intervening_revisions.py'), |
405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], | 407 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], |
406 stdout=self.api.m.json.output()) | 408 stdout=self.api.m.json.output(), **kwargs) |
407 except self.api.m.step.StepFailure: # pragma: no cover | 409 except self.api.m.step.StepFailure: # pragma: no cover |
408 self.surface_result('BAD_REV') | 410 self.surface_result('BAD_REV') |
409 raise | 411 raise |
410 revisions = [] | 412 revisions = [] |
411 for commit_hash, _ in step_result.stdout: | 413 for commit_hash, _ in step_result.stdout: |
412 revisions.append(self.revision_class( | 414 revisions.append(self.revision_class( |
413 bisector=self, | 415 bisector=self, |
414 commit_hash=commit_hash, | 416 commit_hash=commit_hash, |
415 depot_name=depot_name, | 417 depot_name=depot_name, |
416 base_revision=base_revision)) | 418 base_revision=base_revision)) |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
502 dep_revision_max = max_revision.deps[depot_name] | 504 dep_revision_max = max_revision.deps[depot_name] |
503 if (dep_revision_min and dep_revision_max and | 505 if (dep_revision_min and dep_revision_max and |
504 dep_revision_min != dep_revision_max): | 506 dep_revision_min != dep_revision_max): |
505 step_name = ('Expanding revision range for revision %s' | 507 step_name = ('Expanding revision range for revision %s' |
506 ' on depot %s' % (dep_revision_max, depot_name)) | 508 ' on depot %s' % (dep_revision_max, depot_name)) |
507 rev_list = self._revision_range( | 509 rev_list = self._revision_range( |
508 start=dep_revision_min, | 510 start=dep_revision_min, |
509 end=dep_revision_max, | 511 end=dep_revision_max, |
510 depot_name=depot_name, | 512 depot_name=depot_name, |
511 base_revision=min_revision, | 513 base_revision=min_revision, |
512 step_name=step_name) | 514 step_name=step_name, |
515 step_test_data=lambda: | |
516 self.api._test_data['revision_list'][depot_name]) | |
513 new_revisions = self.revisions[:max_revision.list_index] | 517 new_revisions = self.revisions[:max_revision.list_index] |
514 new_revisions += rev_list | 518 new_revisions += rev_list |
515 new_revisions += self.revisions[max_revision.list_index:] | 519 new_revisions += self.revisions[max_revision.list_index:] |
516 self.revisions = new_revisions | 520 self.revisions = new_revisions |
517 self._update_revision_list_indexes() | 521 self._update_revision_list_indexes() |
518 return True | 522 return True |
519 except RuntimeError: # pragma: no cover | 523 except RuntimeError: # pragma: no cover |
520 warning_text = ('Could not expand dependency revisions for ' + | 524 warning_text = ('Could not expand dependency revisions for ' + |
521 revision_to_expand.commit_hash) | 525 revision_to_expand.commit_hash) |
522 self.surface_result('BAD_REV') | 526 self.surface_result('BAD_REV') |
(...skipping 16 matching lines...) Expand all Loading... | |
539 | 543 |
540 The change between the test results obtained for the given 'good' and | 544 The change between the test results obtained for the given 'good' and |
541 'bad' revisions is expected to be considered a regression. The | 545 'bad' revisions is expected to be considered a regression. The |
542 `improvement_direction` attribute is positive if a larger number is | 546 `improvement_direction` attribute is positive if a larger number is |
543 considered better, and negative if a smaller number is considered better. | 547 considered better, and negative if a smaller number is considered better. |
544 | 548 |
545 Returns: | 549 Returns: |
546 True if the check passes (i.e. no problem), False if the change is not | 550 True if the check passes (i.e. no problem), False if the change is not |
547 a regression according to the improvement direction. | 551 a regression according to the improvement direction. |
548 """ | 552 """ |
549 good = self.good_rev.mean_value | 553 good = self.good_rev.mean |
550 bad = self.bad_rev.mean_value | 554 bad = self.bad_rev.mean |
551 | 555 |
552 if self.is_return_code_mode(): | 556 if self.is_return_code_mode(): |
553 return True | 557 return True |
554 | 558 |
555 direction = self.improvement_direction | 559 direction = self.improvement_direction |
556 if direction is None: | 560 if direction is None: |
557 return True | 561 return True |
558 if (bad > good and direction > 0) or (bad < good and direction < 0): | 562 if (bad > good and direction > 0) or (bad < good and direction < 0): |
559 self._set_failed_direction_results() | 563 self._set_failed_direction_results() |
560 return False | 564 return False |
(...skipping 18 matching lines...) Expand all Loading... | |
579 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. | 583 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. |
580 | 584 |
581 Returns: True if the revisions produced results that differ from each | 585 Returns: True if the revisions produced results that differ from each |
582 other in a statistically significant manner. False if such difference could | 586 other in a statistically significant manner. False if such difference could |
583 not be established in the time or sample size allowed. | 587 not be established in the time or sample size allowed. |
584 """ | 588 """ |
585 if self.test_type == 'return_code': | 589 if self.test_type == 'return_code': |
586 return (self.good_rev.overall_return_code != | 590 return (self.good_rev.overall_return_code != |
587 self.bad_rev.overall_return_code) | 591 self.bad_rev.overall_return_code) |
588 | 592 |
589 if self.bypass_stats_check: | 593 if self.bypass_stats_check: |
RobertoCN
2016/08/23 00:26:24
Remove this flag
RobertoCN
2016/09/07 00:33:24
Acknowledged.
| |
590 dummy_result = self.good_rev.values != self.bad_rev.values | 594 self.compare_revisions(self.good_rev, self.bad_rev) |
595 dummy_result = self.good_rev.mean != self.bad_rev.mean | |
591 if not dummy_result: | 596 if not dummy_result: |
592 self._set_insufficient_confidence_warning() | 597 self._set_insufficient_confidence_warning() |
593 return dummy_result | 598 return dummy_result |
594 | 599 |
600 # TODO(robertocn): This step should not be necessary in some cases. | |
595 with self.api.m.step.nest('Re-testing reference range'): | 601 with self.api.m.step.nest('Re-testing reference range'): |
596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT | 602 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT |
597 while time.time() < expiration_time: | 603 while time.time() < expiration_time: |
598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: | 604 if (self.good_rev.test_run_count >= 5 |
599 if self.significantly_different(self.good_rev.values, | 605 and self.bad_rev.test_run_count >= 5): |
600 self.bad_rev.values): | 606 if self.compare_revisions(self.good_rev, self.bad_rev): |
601 return True | 607 return True |
602 if len(self.good_rev.values) == len(self.bad_rev.values): | 608 if self.good_rev.test_run_count == self.bad_rev.test_run_count: |
603 revision_to_retest = self.last_tested_revision | 609 revision_to_retest = self.last_tested_revision |
604 else: | 610 else: |
605 revision_to_retest = min(self.good_rev, self.bad_rev, | 611 revision_to_retest = min(self.good_rev, self.bad_rev, |
606 key=lambda x: len(x.values)) | 612 key=lambda x: x.test_run_count) |
607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: | |
608 revision_to_retest.retest() | |
609 else: | |
610 break | |
611 self._set_insufficient_confidence_warning() | 613 self._set_insufficient_confidence_warning() |
612 return False | 614 return False |
613 | 615 |
614 | 616 |
615 def get_exception(self): | 617 def get_exception(self): |
616 raise NotImplementedError() # pragma: no cover | 618 raise NotImplementedError() # pragma: no cover |
617 # TODO: should return an exception with the details of the failure. | 619 # TODO: should return an exception with the details of the failure. |
618 | 620 |
619 def _set_insufficient_confidence_warning( | 621 def _set_insufficient_confidence_warning( |
620 self): # pragma: no cover | 622 self): # pragma: no cover |
621 """Adds a warning about the lack of initial regression confidence.""" | 623 """Adds a warning about the lack of initial regression confidence.""" |
622 self.failed_initial_confidence = True | 624 self.failed_initial_confidence = True |
623 self.surface_result('LO_INIT_CONF') | 625 self.surface_result('LO_INIT_CONF') |
624 self.warnings.append( | 626 self.warnings.append( |
625 'Bisect failed to reproduce the regression with enough confidence.') | 627 'Bisect failed to reproduce the regression with enough confidence.') |
626 | 628 |
627 def _results_debug_message(self): | 629 def _results_debug_message(self): |
628 """Returns a string with values used to debug a bisect result.""" | 630 """Returns a string with values used to debug a bisect result.""" |
629 result = 'bisector.lkgr: %r\n' % self.lkgr | 631 result = 'bisector.lkgr: %r\n' % self.lkgr |
630 result += 'bisector.fkbr: %r\n\n' % self.fkbr | 632 result += 'bisector.fkbr: %r\n\n' % self.fkbr |
631 result += self._revision_value_table() | 633 result += self._revision_value_table() |
632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): | 634 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and |
633 result += '\n' + self._t_test_results() | 635 self.fkbr.test_run_count): |
636 result += '\n' + '\n'.join([ | |
637 'LKGR values: %r' % list(self.lkgr.debug_values), | |
638 'FKBR values: %r' % list(self.fkbr.debug_values), | |
639 ]) | |
634 return result | 640 return result |
635 | 641 |
636 def _revision_value_table(self): | 642 def _revision_value_table(self): |
637 """Returns a string table showing revisions and their values.""" | 643 """Returns a string table showing revisions and their values.""" |
638 header = [['Revision', 'Values']] | 644 header = [['Revision', 'Values']] |
639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] | 645 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions] |
640 return self._pretty_table(header + rows) | 646 return self._pretty_table(header + rows) |
641 | 647 |
642 def _pretty_table(self, data): | 648 def _pretty_table(self, data): |
643 results = [] | 649 results = [] |
644 for row in data: | 650 for row in data: |
645 results.append('%-15s' * len(row) % tuple(row)) | 651 results.append('%-15s' * len(row) % tuple(row)) |
646 return '\n'.join(results) | 652 return '\n'.join(results) |
647 | 653 |
648 def _t_test_results(self): | |
649 """Returns a string showing t-test results for lkgr and fkbr.""" | |
650 t, df, p = self.api.m.math_utils.welchs_t_test( | |
651 self.lkgr.values, self.fkbr.values) | |
652 lines = [ | |
653 'LKGR values: %r' % self.lkgr.values, | |
654 'FKBR values: %r' % self.fkbr.values, | |
655 't-statistic: %r' % t, | |
656 'deg. of freedom: %r' % df, | |
657 'p-value: %r' % p, | |
658 'Confidence score: %r' % (100 * (1 - p)) | |
659 ] | |
660 return '\n'.join(lines) | |
661 | |
662 def print_result_debug_info(self): | 654 def print_result_debug_info(self): |
663 """Prints extra debug info at the end of the bisect process.""" | 655 """Prints extra debug info at the end of the bisect process.""" |
664 lines = self._results_debug_message().splitlines() | 656 lines = self._results_debug_message().splitlines() |
665 # If we emit a null step then add a log to it, the log should be kept | 657 # If we emit a null step then add a log to it, the log should be kept |
666 # longer than 7 days (which is often needed to debug some issues). | 658 # longer than 7 days (which is often needed to debug some issues). |
667 self.api.m.step('Debug Info', []) | 659 self.api.m.step('Debug Info', []) |
668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines | 660 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines |
669 | 661 |
670 def post_result(self, halt_on_failure=False): | 662 def post_result(self, halt_on_failure=False): |
671 """Posts bisect results to Perf Dashboard.""" | 663 """Posts bisect results to Perf Dashboard.""" |
672 self.api.m.perf_dashboard.set_default_config() | 664 self.api.m.perf_dashboard.set_default_config() |
673 self.api.m.perf_dashboard.post_bisect_results( | 665 self.api.m.perf_dashboard.post_bisect_results( |
674 self.get_result(), halt_on_failure) | 666 self.get_result(), halt_on_failure) |
675 | 667 |
676 def get_revision_to_eval(self): | 668 def get_revision_to_eval(self): |
677 """Gets the next RevisionState object in the candidate range. | 669 """Gets the next RevisionState object in the candidate range. |
678 | 670 |
679 Returns: | 671 Returns: |
680 The next Revision object in a list. | 672 The next Revision object in a list. |
681 """ | 673 """ |
682 self._update_candidate_range() | 674 self._update_candidate_range() |
683 candidate_range = [revision for revision in | 675 candidate_range = [revision for revision in |
684 self.revisions[self.lkgr.list_index + 1: | 676 self.revisions[self.lkgr.list_index + 1: |
685 self.fkbr.list_index] | 677 self.fkbr.list_index] |
686 if not revision.tested and not revision.failed] | 678 if not revision.failed] |
687 if len(candidate_range) == 1: | 679 if len(candidate_range) == 1: |
688 return candidate_range[0] | 680 return candidate_range[0] |
689 if len(candidate_range) == 0: | 681 if len(candidate_range) == 0: |
690 return None | 682 return None |
691 | 683 |
692 default_revision = candidate_range[len(candidate_range) / 2] | 684 default_revision = candidate_range[len(candidate_range) / 2] |
693 | 685 |
694 with self.api.m.step.nest( | 686 with self.api.m.step.nest( |
695 'Wiggling revision ' + default_revision.revision_string()): | 687 'Wiggling revision ' + default_revision.revision_string()): |
696 # We'll search up to 25% of the range (in either direction) to try and | 688 # We'll search up to 25% of the range (in either direction) to try and |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
733 return True | 725 return True |
734 if (revision.good and revision.next_revision and | 726 if (revision.good and revision.next_revision and |
735 revision.next_revision.bad): | 727 revision.next_revision.bad): |
736 if (revision.next_revision.deps_change() | 728 if (revision.next_revision.deps_change() |
737 and self._expand_deps_revisions(revision.next_revision)): | 729 and self._expand_deps_revisions(revision.next_revision)): |
738 return False | 730 return False |
739 self.culprit = revision.next_revision | 731 self.culprit = revision.next_revision |
740 return True | 732 return True |
741 return False | 733 return False |
742 | 734 |
743 def wait_for_all(self, revision_list): | |
744 """Waits for all revisions in list to finish.""" | |
745 for r in revision_list: | |
746 self.wait_for(r) | |
747 | |
748 def wait_for(self, revision, nest_check=True): | |
749 """Waits for the revision to finish its job.""" | |
750 if nest_check and not self.flags.get( | |
751 'do_not_nest_wait_for_revision'): # pragma: no cover | |
752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()): | |
753 return self.wait_for(revision, nest_check=False) | |
754 while True: | |
755 revision.update_status() | |
756 if revision.in_progress: | |
757 self.api.m.python.inline( | |
758 'sleeping', | |
759 """ | |
760 import sys | |
761 import time | |
762 time.sleep(20*60) | |
763 sys.exit(0) | |
764 """) | |
765 else: | |
766 break | |
767 | |
768 def _update_candidate_range(self): | 735 def _update_candidate_range(self): |
769 """Updates lkgr and fkbr (last known good/first known bad) revisions. | 736 """Updates lkgr and fkbr (last known good/first known bad) revisions. |
770 | 737 |
771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in | 738 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in |
772 bisectors.revisions.""" | 739 bisectors.revisions.""" |
773 for r in self.revisions: | 740 for r in self.revisions: |
774 if r.tested: | 741 if r.test_run_count: |
775 if r.good: | 742 if r.good: |
776 self.lkgr = r | 743 self.lkgr = r |
777 elif r.bad: | 744 elif r.bad: |
778 self.fkbr = r | 745 self.fkbr = r |
779 break | 746 break |
780 assert self.lkgr and self.fkbr | 747 assert self.lkgr and self.fkbr |
781 | 748 |
782 def get_perf_tester_name(self): | 749 def get_perf_tester_name(self): |
783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. | 750 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. |
784 | 751 |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
869 assert len(result_code) <= 20 | 836 assert len(result_code) <= 20 |
870 if result_code not in self.result_codes: | 837 if result_code not in self.result_codes: |
871 self.result_codes.add(result_code) | 838 self.result_codes.add(result_code) |
872 properties = self.api.m.step.active_result.presentation.properties | 839 properties = self.api.m.step.active_result.presentation.properties |
873 properties['extra_result_code'] = sorted(self.result_codes) | 840 properties['extra_result_code'] = sorted(self.result_codes) |
874 | 841 |
875 def get_result(self): | 842 def get_result(self): |
876 """Returns the results as a jsonable object.""" | 843 """Returns the results as a jsonable object.""" |
877 config = self.bisect_config | 844 config = self.bisect_config |
878 results_confidence = 0 | 845 results_confidence = 0 |
879 if self.culprit: | |
880 results_confidence = self.api.m.math_utils.confidence_score( | |
881 self.lkgr.values, self.fkbr.values) | |
882 | 846 |
883 if self.failed: | 847 if self.failed: |
884 status = 'failed' | 848 status = 'failed' |
885 elif self.bisect_over: | 849 elif self.bisect_over: |
886 status = 'completed' | 850 status = 'completed' |
887 else: | 851 else: |
888 status = 'started' | 852 status = 'started' |
889 | 853 |
890 aborted_reason = None | 854 aborted_reason = None |
891 if self.failed_initial_confidence: | 855 if self.failed_initial_confidence: |
892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON | 856 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON |
893 elif self.failed_direction: | 857 elif self.failed_direction: |
894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON | 858 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON |
895 return { | 859 return { |
896 'try_job_id': config.get('try_job_id'), | 860 'try_job_id': config.get('try_job_id'), |
897 'bug_id': config.get('bug_id'), | 861 'bug_id': config.get('bug_id'), |
898 'status': status, | 862 'status': status, |
899 'buildbot_log_url': self._get_build_url(), | 863 'buildbot_log_url': self._get_build_url(), |
900 'bisect_bot': self.get_perf_tester_name(), | 864 'bisect_bot': self.get_perf_tester_name(), |
901 'command': config['command'], | 865 'command': config['command'], |
902 'test_type': config['test_type'], | 866 'test_type': config['test_type'], |
903 'metric': config['metric'], | 867 'metric': config['metric'], |
904 'change': self.relative_change, | 868 'change': self.relative_change, |
905 'score': results_confidence, | |
906 'good_revision': self.good_rev.commit_hash, | 869 'good_revision': self.good_rev.commit_hash, |
907 'bad_revision': self.bad_rev.commit_hash, | 870 'bad_revision': self.bad_rev.commit_hash, |
908 'warnings': self.warnings, | 871 'warnings': self.warnings, |
909 'aborted_reason': aborted_reason, | 872 'aborted_reason': aborted_reason, |
910 'culprit_data': self._culprit_data(), | 873 'culprit_data': self._culprit_data(), |
911 'revision_data': self._revision_data() | 874 'revision_data': self._revision_data() |
912 } | 875 } |
913 | 876 |
914 def _culprit_data(self): | 877 def _culprit_data(self): |
915 culprit = self.culprit | 878 culprit = self.culprit |
(...skipping 11 matching lines...) Expand all Loading... | |
927 'email': culprit_info['email'], | 890 'email': culprit_info['email'], |
928 'cl_date': culprit_info['date'], | 891 'cl_date': culprit_info['date'], |
929 'commit_info': culprit_info['body'], | 892 'commit_info': culprit_info['body'], |
930 'revisions_links': [], | 893 'revisions_links': [], |
931 'cl': culprit.commit_hash | 894 'cl': culprit.commit_hash |
932 } | 895 } |
933 | 896 |
934 def _revision_data(self): | 897 def _revision_data(self): |
935 revision_rows = [] | 898 revision_rows = [] |
936 for r in self.revisions: | 899 for r in self.revisions: |
937 if r.tested or r.aborted: | 900 if r.test_run_count: |
938 revision_rows.append({ | 901 revision_rows.append({ |
939 'depot_name': r.depot_name, | 902 'depot_name': r.depot_name, |
940 'commit_hash': r.commit_hash, | 903 'commit_hash': r.commit_hash, |
941 'revision_string': r.revision_string(), | 904 'revision_string': r.revision_string(), |
942 'mean_value': r.mean_value, | 905 'mean_value': r.mean, |
943 'std_dev': r.std_dev, | 906 'std_dev': r.std_dev, |
944 'values': r.values, | 907 'values': r.debug_values, |
945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', | 908 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', |
946 }) | 909 }) |
947 return revision_rows | 910 return revision_rows |
948 | 911 |
949 def _get_build_url(self): | 912 def _get_build_url(self): |
950 properties = self.api.m.properties | 913 properties = self.api.m.properties |
951 bot_url = properties.get('buildbotURL', | 914 bot_url = properties.get('buildbotURL', |
952 'http://build.chromium.org/p/chromium/') | 915 'http://build.chromium.org/p/chromium/') |
953 builder_name = urllib.quote(properties.get('buildername', '')) | 916 builder_name = urllib.quote(properties.get('buildername', '')) |
954 builder_number = str(properties.get('buildnumber', '')) | 917 builder_number = str(properties.get('buildnumber', '')) |
955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) | 918 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) |
OLD | NEW |