OLD | NEW |
---|---|
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import json | 5 import json |
6 import re | 6 import re |
7 import time | 7 import time |
8 import urllib | 8 import urllib |
9 | 9 |
10 from . import config_validation | 10 from . import config_validation |
(...skipping 29 matching lines...) Expand all Loading... | |
40 ) | 40 ) |
41 | 41 |
42 # When we look for the next revision to build, we search nearby revisions | 42 # When we look for the next revision to build, we search nearby revisions |
43 # looking for a revision that's already been archived. Since we don't want | 43 # looking for a revision that's already been archived. Since we don't want |
44 # to move *too* far from the original revision, we'll cap the search at 25%. | 44 # to move *too* far from the original revision, we'll cap the search at 25%. |
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 | 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 |
46 | 46 |
47 # How long to re-test the initial good-bad range for until significant | 47 # How long to re-test the initial good-bad range for until significant |
48 # difference is established. | 48 # difference is established. |
49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 | 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 |
50 # If we reach this number of samples on the reference range and have not | |
51 # achieved statistical significance, bail. | |
52 MAX_REQUIRED_SAMPLES = 15 | |
53 | |
54 # Significance level to use for determining difference between revisions via | |
55 # hypothesis testing. | |
56 SIGNIFICANCE_LEVEL = 0.01 | |
57 | 50 |
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( | 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( |
59 'The metric values for the initial "good" and "bad" revisions ' | 52 'The metric values for the initial "good" and "bad" revisions ' |
60 'do not represent a clear regression.') | 53 'do not represent a clear regression.') |
61 | 54 |
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( | 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( |
63 'The metric values for the initial "good" and "bad" revisions match the ' | 56 'The metric values for the initial "good" and "bad" revisions match the ' |
64 'expected direction of improvement. Thus, likely represent an improvement ' | 57 'expected direction of improvement. Thus, likely represent an improvement ' |
65 'and not a regression.') | 58 'and not a regression.') |
66 | 59 |
67 | 60 |
68 class Bisector(object): | 61 class Bisector(object): |
69 """This class abstracts an ongoing bisect (or n-sect) job.""" | 62 """This class abstracts an ongoing bisect (or n-sect) job.""" |
70 | 63 |
71 def __init__(self, api, bisect_config, revision_class, init_revisions=True, | 64 def __init__(self, api, bisect_config, revision_class, init_revisions=True, |
72 **flags): | 65 **flags): |
73 """Initializes the state of a new bisect job from a dictionary. | 66 """Initializes the state of a new bisect job from a dictionary. |
74 | 67 |
75 Note that the initial good_rev and bad_rev MUST resolve to a commit position | 68 Note that the initial good_rev and bad_rev MUST resolve to a commit position |
76 in the chromium repo. | 69 in the chromium repo. |
77 """ | 70 """ |
78 super(Bisector, self).__init__() | 71 super(Bisector, self).__init__() |
72 self.loopCHECK = {} | |
79 self.flags = flags | 73 self.flags = flags |
80 self._api = api | 74 self._api = api |
81 self.result_codes = set() | 75 self.result_codes = set() |
82 self.ensure_sync_master_branch() | 76 self.ensure_sync_master_branch() |
83 self.bisect_config = bisect_config | 77 self.bisect_config = bisect_config |
84 self.config_step() | 78 self.config_step() |
85 self._validate_config() | 79 self._validate_config() |
86 self.revision_class = revision_class | 80 self.revision_class = revision_class |
87 self.last_tested_revision = None | 81 self.last_tested_revision = None |
88 | 82 |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 | 142 |
149 Returns: | 143 Returns: |
150 A 40-digit git commit hash string. | 144 A 40-digit git commit hash string. |
151 """ | 145 """ |
152 if self._is_sha1(rev): # pragma: no cover | 146 if self._is_sha1(rev): # pragma: no cover |
153 return rev | 147 return rev |
154 if rev.isdigit(): | 148 if rev.isdigit(): |
155 commit_position = self._api.m.commit_position.construct( | 149 commit_position = self._api.m.commit_position.construct( |
156 branch='refs/heads/master', value=rev) | 150 branch='refs/heads/master', value=rev) |
157 try: | 151 try: |
158 return self._api.m.crrev.to_commit_hash(commit_position) | 152 return self._api.m.crrev.to_commit_hash( |
153 commit_position, | |
154 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev]) | |
159 except self.api.m.step.StepFailure: # pragma: no cover | 155 except self.api.m.step.StepFailure: # pragma: no cover |
160 self.surface_result('BAD_REV') | 156 self.surface_result('BAD_REV') |
161 raise | 157 raise |
162 self.surface_result('BAD_REV') # pragma: no cover | 158 self.surface_result('BAD_REV') # pragma: no cover |
163 raise self.api.m.step.StepFailure( | 159 raise self.api.m.step.StepFailure( |
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover | 160 'Invalid input revision: %r' % (rev,)) # pragma: no cover |
165 | 161 |
166 @staticmethod | 162 @staticmethod |
167 def _is_sha1(s): | 163 def _is_sha1(s): |
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) | 164 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) |
169 | 165 |
170 def significantly_different( | 166 def compare_revisions(self, revision_a, revision_b): |
171 self, list_a, list_b, | 167 """ |
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover | 168 Returns: |
173 """Uses an external script to run hypothesis testing with scipy. | 169 True if the samples are significantly different. |
170 None if there is not enough data to tell. | |
171 False if there's enough data but still can't tell the samples apart. | |
172 """ | |
173 output_format = 'chartjson' | |
174 values_a = revision_a.chartjson_paths | |
175 values_b = revision_b.chartjson_paths | |
176 if revision_a.valueset_paths and revision_b.valueset_paths: | |
177 output_format = 'valueset' | |
174 | 178 |
175 The reason why we need an external script is that scipy is not available to | 179 result = self.api.stat_compare( |
176 the default python installed in all platforms. We instead rely on an | 180 values_a, |
177 anaconda environment to provide those packages. | 181 values_b, |
182 self.bisect_config['metric'], | |
183 output_format=output_format, | |
184 step_test_data=lambda: self.api.test_api.compare_samples_data( | |
185 self.api._test_data.get('revision_data'), revision_a, revision_b)) | |
178 | 186 |
179 Args: | 187 revision_a.debug_values = result['sample_a']['debug_values'] |
180 list_a, list_b: Two lists representing samples to be compared. | 188 revision_b.debug_values = result['sample_b']['debug_values'] |
181 significance_level: Self-describing. As a decimal fraction. | 189 revision_a.mean = result['sample_a']['mean'] |
190 revision_b.mean = result['sample_b']['mean'] | |
191 revision_a.std_dev = result['sample_a']['std_dev'] | |
192 revision_b.std_dev = result['sample_b']['std_dev'] | |
182 | 193 |
183 Returns: | 194 if result['result'] == 'needMoreData': |
184 A boolean indicating whether the null hypothesis ~(that the lists are | 195 key = tuple(values_a), tuple(values_b) |
185 samples from the same population) can be rejected at the specified | 196 self.loopCHECK.setdefault(key, 0) |
186 significance level. | 197 self.loopCHECK[key] += 1 |
RobertoCN
2016/08/23 00:26:23
Remove loop check and debug prints
RobertoCN
2016/09/07 00:33:24
Done.
| |
187 """ | 198 if self.loopCHECK[key] > 10: |
188 step_result = self.api.m.python( | 199 raise Exception('loopCHECK!@') |
189 'Checking sample difference', | 200 print result['result'], revision_a.debug_values, revision_b.debug_values |
190 self.api.resource('significantly_different.py'), | 201 print revision_a.bisector.revisions |
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)], | 202 return None |
192 stdout=self.api.m.json.output()) | 203 return bool(result['result']) |
193 results = step_result.stdout | |
194 if results is None: | |
195 assert self.dummy_builds | |
196 return True | |
197 significantly_different = results['significantly_different'] | |
198 step_result.presentation.logs[str(significantly_different)] = [ | |
199 'See json.output for details'] | |
200 return significantly_different | |
201 | 204 |
202 def config_step(self): | 205 def config_step(self): |
203 """Yields a step that prints the bisect config.""" | 206 """Yields a step that prints the bisect config.""" |
204 api = self.api | 207 api = self.api |
205 | 208 |
206 # bisect_config may come as a FrozenDict (which is not serializable). | 209 # bisect_config may come as a FrozenDict (which is not serializable). |
207 bisect_config = dict(self.bisect_config) | 210 bisect_config = dict(self.bisect_config) |
208 | 211 |
209 def fix_windows_backslashes(s): | 212 def fix_windows_backslashes(s): |
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') | 213 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') |
(...skipping 17 matching lines...) Expand all Loading... | |
228 except config_validation.ValidationFail as error: | 231 except config_validation.ValidationFail as error: |
229 self.surface_result('BAD_CONFIG') | 232 self.surface_result('BAD_CONFIG') |
230 self.api.m.halt(error.message) | 233 self.api.m.halt(error.message) |
231 raise self.api.m.step.StepFailure(error.message) | 234 raise self.api.m.step.StepFailure(error.message) |
232 | 235 |
233 @property | 236 @property |
234 def api(self): | 237 def api(self): |
235 return self._api | 238 return self._api |
236 | 239 |
237 def compute_relative_change(self): | 240 def compute_relative_change(self): |
238 old_value = float(self.good_rev.mean_value) | 241 old_value = float(self.good_rev.mean) |
239 new_value = float(self.bad_rev.mean_value) | 242 new_value = float(self.bad_rev.mean) |
240 | 243 |
241 if new_value and not old_value: # pragma: no cover | 244 if new_value and not old_value: # pragma: no cover |
242 self.relative_change = ZERO_TO_NON_ZERO | 245 self.relative_change = ZERO_TO_NON_ZERO |
243 return | 246 return |
244 | 247 |
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) | 248 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) |
246 self.relative_change = '%.2f%%' % (100 * rel_change) | 249 self.relative_change = '%.2f%%' % (100 * rel_change) |
247 | 250 |
248 def make_deps_sha_file(self, deps_sha): | 251 def make_deps_sha_file(self, deps_sha): |
249 """Make a diff patch that creates DEPS.sha. | 252 """Make a diff patch that creates DEPS.sha. |
(...skipping 16 matching lines...) Expand all Loading... | |
266 file is to be written to. | 269 file is to be written to. |
267 commit_hash (str): An identifier for the step. | 270 commit_hash (str): An identifier for the step. |
268 | 271 |
269 Returns: | 272 Returns: |
270 A string containing the hash of the interned object. | 273 A string containing the hash of the interned object. |
271 """ | 274 """ |
272 cmd = 'hash-object -t blob -w --stdin'.split(' ') | 275 cmd = 'hash-object -t blob -w --stdin'.split(' ') |
273 stdin = self.api.m.raw_io.input(file_contents) | 276 stdin = self.api.m.raw_io.input(file_contents) |
274 stdout = self.api.m.raw_io.output() | 277 stdout = self.api.m.raw_io.output() |
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash | 278 step_name = 'Hashing modified DEPS file with revision ' + commit_hash |
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, | 279 step_result = self.api.m.git( |
277 name=step_name) | 280 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name, |
281 step_test_data=lambda: | |
282 self.api.m.raw_io.test_api.stream_output(commit_hash)) | |
278 hash_string = step_result.stdout.splitlines()[0] | 283 hash_string = step_result.stdout.splitlines()[0] |
279 try: | 284 try: |
280 if hash_string: | 285 if hash_string: |
281 int(hash_string, 16) | 286 int(hash_string, 16) |
282 return hash_string | 287 return hash_string |
283 except ValueError: # pragma: no cover | 288 except ValueError: # pragma: no cover |
284 reason = 'Git did not output a valid hash for the interned file.' | 289 reason = 'Git did not output a valid hash for the interned file.' |
285 self.api.m.halt(reason) | 290 self.api.m.halt(reason) |
286 raise self.api.m.step.StepFailure(reason) | 291 raise self.api.m.step.StepFailure(reason) |
287 | 292 |
(...skipping 14 matching lines...) Expand all Loading... | |
302 Returns: | 307 Returns: |
303 A string containing the diff patch as produced by the 'git diff' command. | 308 A string containing the diff patch as produced by the 'git diff' command. |
304 """ | 309 """ |
305 # The prefixes used in the command below are used to find and replace the | 310 # The prefixes used in the command below are used to find and replace the |
306 # tree-ish git object id's on the diff output more easily. | 311 # tree-ish git object id's on the diff output more easily. |
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' | 312 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' |
308 cmd %= (git_object_a, git_object_b) | 313 cmd %= (git_object_a, git_object_b) |
309 cmd = cmd.split(' ') | 314 cmd = cmd.split(' ') |
310 stdout = self.api.m.raw_io.output() | 315 stdout = self.api.m.raw_io.output() |
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) | 316 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) |
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) | 317 step_result = self.api.m.git( |
318 *cmd, cwd=cwd, stdout=stdout, name=step_name, | |
319 step_test_data=lambda: self.api._test_data['diff_patch']) | |
313 patch_text = step_result.stdout | 320 patch_text = step_result.stdout |
314 src_string = 'IAMSRC:' + git_object_a | 321 src_string = 'IAMSRC:' + git_object_a |
315 dst_string = 'IAMDST:' + git_object_b | 322 dst_string = 'IAMDST:' + git_object_b |
316 patch_text = patch_text.replace(src_string, src_alias) | 323 patch_text = patch_text.replace(src_string, src_alias) |
317 patch_text = patch_text.replace(dst_string, dst_alias) | 324 patch_text = patch_text.replace(dst_string, dst_alias) |
318 return patch_text | 325 return patch_text |
319 | 326 |
320 def make_deps_patch(self, base_revision, base_file_contents, | 327 def make_deps_patch(self, base_revision, base_file_contents, |
321 depot, new_commit_hash): | 328 depot, new_commit_hash): |
322 """Make a diff patch that updates a specific dependency revision. | 329 """Make a diff patch that updates a specific dependency revision. |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
365 interned_deps_hash, deps_file, deps_file, | 372 interned_deps_hash, deps_file, deps_file, |
366 cwd=cwd, | 373 cwd=cwd, |
367 deps_rev=new_commit_hash) | 374 deps_rev=new_commit_hash) |
368 return patch_text, patched_contents | 375 return patch_text, patched_contents |
369 | 376 |
370 def _expand_initial_revision_range(self): | 377 def _expand_initial_revision_range(self): |
371 """Sets the initial contents of |self.revisions|.""" | 378 """Sets the initial contents of |self.revisions|.""" |
372 with self.api.m.step.nest('Expanding revision range'): | 379 with self.api.m.step.nest('Expanding revision range'): |
373 good_hash = self.good_rev.commit_hash | 380 good_hash = self.good_rev.commit_hash |
374 bad_hash = self.bad_rev.commit_hash | 381 bad_hash = self.bad_rev.commit_hash |
382 depot = self.good_rev.depot_name | |
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) | 383 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) |
376 revisions = self._revision_range( | 384 revisions = self._revision_range( |
377 start=good_hash, | 385 start=good_hash, |
378 end=bad_hash, | 386 end=bad_hash, |
379 depot_name=self.base_depot, | 387 depot_name=self.base_depot, |
380 step_name=step_name) | 388 step_name=step_name, |
389 step_test_data=lambda: self.api._test_data['revision_list'][depot] | |
390 ) | |
381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] | 391 self.revisions = [self.good_rev] + revisions + [self.bad_rev] |
382 self._update_revision_list_indexes() | 392 self._update_revision_list_indexes() |
383 | 393 |
384 def _revision_range(self, start, end, depot_name, base_revision=None, | 394 def _revision_range(self, start, end, depot_name, base_revision=None, |
385 step_name=None): | 395 step_name=None, **kwargs): |
386 """Returns a list of RevisionState objects between |start| and |end|. | 396 """Returns a list of RevisionState objects between |start| and |end|. |
387 | 397 |
388 Args: | 398 Args: |
389 start (str): Start commit hash. | 399 start (str): Start commit hash. |
390 end (str): End commit hash. | 400 end (str): End commit hash. |
391 depot_name (str): Short string name of repo, e.g. chromium or v8. | 401 depot_name (str): Short string name of repo, e.g. chromium or v8. |
392 base_revision (str): Base revision in the downstream repo (e.g. chromium). | 402 base_revision (str): Base revision in the downstream repo (e.g. chromium). |
393 step_name (str): Optional step name. | 403 step_name (str): Optional step name. |
394 | 404 |
395 Returns: | 405 Returns: |
396 A list of RevisionState objects, not including the given start or end. | 406 A list of RevisionState objects, not including the given start or end. |
397 """ | 407 """ |
398 if self.internal_bisect: # pragma: no cover | 408 if self.internal_bisect: # pragma: no cover |
399 return self._revision_range_with_gitiles( | 409 return self._revision_range_with_gitiles( |
400 start, end, depot_name, base_revision, step_name) | 410 start, end, depot_name, base_revision, step_name) |
401 try: | 411 try: |
402 step_result = self.api.m.python( | 412 step_result = self.api.m.python( |
403 step_name, | 413 step_name, |
404 self.api.resource('fetch_intervening_revisions.py'), | 414 self.api.resource('fetch_intervening_revisions.py'), |
405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], | 415 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], |
406 stdout=self.api.m.json.output()) | 416 stdout=self.api.m.json.output(), **kwargs) |
407 except self.api.m.step.StepFailure: # pragma: no cover | 417 except self.api.m.step.StepFailure: # pragma: no cover |
408 self.surface_result('BAD_REV') | 418 self.surface_result('BAD_REV') |
409 raise | 419 raise |
410 revisions = [] | 420 revisions = [] |
411 for commit_hash, _ in step_result.stdout: | 421 for commit_hash, _ in step_result.stdout: |
412 revisions.append(self.revision_class( | 422 revisions.append(self.revision_class( |
413 bisector=self, | 423 bisector=self, |
414 commit_hash=commit_hash, | 424 commit_hash=commit_hash, |
415 depot_name=depot_name, | 425 depot_name=depot_name, |
416 base_revision=base_revision)) | 426 base_revision=base_revision)) |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
502 dep_revision_max = max_revision.deps[depot_name] | 512 dep_revision_max = max_revision.deps[depot_name] |
503 if (dep_revision_min and dep_revision_max and | 513 if (dep_revision_min and dep_revision_max and |
504 dep_revision_min != dep_revision_max): | 514 dep_revision_min != dep_revision_max): |
505 step_name = ('Expanding revision range for revision %s' | 515 step_name = ('Expanding revision range for revision %s' |
506 ' on depot %s' % (dep_revision_max, depot_name)) | 516 ' on depot %s' % (dep_revision_max, depot_name)) |
507 rev_list = self._revision_range( | 517 rev_list = self._revision_range( |
508 start=dep_revision_min, | 518 start=dep_revision_min, |
509 end=dep_revision_max, | 519 end=dep_revision_max, |
510 depot_name=depot_name, | 520 depot_name=depot_name, |
511 base_revision=min_revision, | 521 base_revision=min_revision, |
512 step_name=step_name) | 522 step_name=step_name, |
523 step_test_data=lambda: | |
524 self.api._test_data['revision_list'][depot_name]) | |
513 new_revisions = self.revisions[:max_revision.list_index] | 525 new_revisions = self.revisions[:max_revision.list_index] |
514 new_revisions += rev_list | 526 new_revisions += rev_list |
515 new_revisions += self.revisions[max_revision.list_index:] | 527 new_revisions += self.revisions[max_revision.list_index:] |
516 self.revisions = new_revisions | 528 self.revisions = new_revisions |
517 self._update_revision_list_indexes() | 529 self._update_revision_list_indexes() |
518 return True | 530 return True |
519 except RuntimeError: # pragma: no cover | 531 except RuntimeError: # pragma: no cover |
520 warning_text = ('Could not expand dependency revisions for ' + | 532 warning_text = ('Could not expand dependency revisions for ' + |
521 revision_to_expand.commit_hash) | 533 revision_to_expand.commit_hash) |
522 self.surface_result('BAD_REV') | 534 self.surface_result('BAD_REV') |
(...skipping 16 matching lines...) Expand all Loading... | |
539 | 551 |
540 The change between the test results obtained for the given 'good' and | 552 The change between the test results obtained for the given 'good' and |
541 'bad' revisions is expected to be considered a regression. The | 553 'bad' revisions is expected to be considered a regression. The |
542 `improvement_direction` attribute is positive if a larger number is | 554 `improvement_direction` attribute is positive if a larger number is |
543 considered better, and negative if a smaller number is considered better. | 555 considered better, and negative if a smaller number is considered better. |
544 | 556 |
545 Returns: | 557 Returns: |
546 True if the check passes (i.e. no problem), False if the change is not | 558 True if the check passes (i.e. no problem), False if the change is not |
547 a regression according to the improvement direction. | 559 a regression according to the improvement direction. |
548 """ | 560 """ |
549 good = self.good_rev.mean_value | 561 good = self.good_rev.mean |
550 bad = self.bad_rev.mean_value | 562 bad = self.bad_rev.mean |
551 | 563 |
552 if self.is_return_code_mode(): | 564 if self.is_return_code_mode(): |
553 return True | 565 return True |
554 | 566 |
555 direction = self.improvement_direction | 567 direction = self.improvement_direction |
556 if direction is None: | 568 if direction is None: |
557 return True | 569 return True |
558 if (bad > good and direction > 0) or (bad < good and direction < 0): | 570 if (bad > good and direction > 0) or (bad < good and direction < 0): |
559 self._set_failed_direction_results() | 571 self._set_failed_direction_results() |
560 return False | 572 return False |
(...skipping 19 matching lines...) Expand all Loading... | |
580 | 592 |
581 Returns: True if the revisions produced results that differ from each | 593 Returns: True if the revisions produced results that differ from each |
582 other in a statistically significant manner. False if such difference could | 594 other in a statistically significant manner. False if such difference could |
583 not be established in the time or sample size allowed. | 595 not be established in the time or sample size allowed. |
584 """ | 596 """ |
585 if self.test_type == 'return_code': | 597 if self.test_type == 'return_code': |
586 return (self.good_rev.overall_return_code != | 598 return (self.good_rev.overall_return_code != |
587 self.bad_rev.overall_return_code) | 599 self.bad_rev.overall_return_code) |
588 | 600 |
589 if self.bypass_stats_check: | 601 if self.bypass_stats_check: |
590 dummy_result = self.good_rev.values != self.bad_rev.values | 602 self.compare_revisions(self.good_rev, self.bad_rev) |
603 dummy_result = self.good_rev.mean != self.bad_rev.mean | |
591 if not dummy_result: | 604 if not dummy_result: |
592 self._set_insufficient_confidence_warning() | 605 self._set_insufficient_confidence_warning() |
593 return dummy_result | 606 return dummy_result |
594 | 607 |
608 # TODO(robertocn): This step should not be necessary in some cases. | |
595 with self.api.m.step.nest('Re-testing reference range'): | 609 with self.api.m.step.nest('Re-testing reference range'): |
596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT | 610 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT |
597 while time.time() < expiration_time: | 611 while time.time() < expiration_time: |
598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: | 612 if (self.good_rev.test_run_count >= 5 |
599 if self.significantly_different(self.good_rev.values, | 613 and self.bad_rev.test_run_count >= 5): |
600 self.bad_rev.values): | 614 if self.compare_revisions(self.good_rev, self.bad_rev): |
601 return True | 615 return True |
602 if len(self.good_rev.values) == len(self.bad_rev.values): | 616 if self.good_rev.test_run_count == self.bad_rev.test_run_count: |
603 revision_to_retest = self.last_tested_revision | 617 revision_to_retest = self.last_tested_revision |
604 else: | 618 else: |
605 revision_to_retest = min(self.good_rev, self.bad_rev, | 619 revision_to_retest = min(self.good_rev, self.bad_rev, |
606 key=lambda x: len(x.values)) | 620 key=lambda x: x.test_run_count) |
607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: | |
608 revision_to_retest.retest() | |
609 else: | |
610 break | |
611 self._set_insufficient_confidence_warning() | 621 self._set_insufficient_confidence_warning() |
612 return False | 622 return False |
613 | 623 |
614 | 624 |
615 def get_exception(self): | 625 def get_exception(self): |
616 raise NotImplementedError() # pragma: no cover | 626 raise NotImplementedError() # pragma: no cover |
617 # TODO: should return an exception with the details of the failure. | 627 # TODO: should return an exception with the details of the failure. |
618 | 628 |
619 def _set_insufficient_confidence_warning( | 629 def _set_insufficient_confidence_warning( |
620 self): # pragma: no cover | 630 self): # pragma: no cover |
621 """Adds a warning about the lack of initial regression confidence.""" | 631 """Adds a warning about the lack of initial regression confidence.""" |
622 self.failed_initial_confidence = True | 632 self.failed_initial_confidence = True |
623 self.surface_result('LO_INIT_CONF') | 633 self.surface_result('LO_INIT_CONF') |
624 self.warnings.append( | 634 self.warnings.append( |
625 'Bisect failed to reproduce the regression with enough confidence.') | 635 'Bisect failed to reproduce the regression with enough confidence.') |
626 | 636 |
627 def _results_debug_message(self): | 637 def _results_debug_message(self): |
628 """Returns a string with values used to debug a bisect result.""" | 638 """Returns a string with values used to debug a bisect result.""" |
629 result = 'bisector.lkgr: %r\n' % self.lkgr | 639 result = 'bisector.lkgr: %r\n' % self.lkgr |
630 result += 'bisector.fkbr: %r\n\n' % self.fkbr | 640 result += 'bisector.fkbr: %r\n\n' % self.fkbr |
631 result += self._revision_value_table() | 641 result += self._revision_value_table() |
632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): | 642 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and |
633 result += '\n' + self._t_test_results() | 643 self.fkbr.test_run_count): |
644 result += '\n' + '\n'.join([ | |
645 'LKGR values: %r' % list(self.lkgr.debug_values), | |
646 'FKBR values: %r' % list(self.fkbr.debug_values), | |
647 ]) | |
634 return result | 648 return result |
635 | 649 |
636 def _revision_value_table(self): | 650 def _revision_value_table(self): |
637 """Returns a string table showing revisions and their values.""" | 651 """Returns a string table showing revisions and their values.""" |
638 header = [['Revision', 'Values']] | 652 header = [['Revision', 'Values']] |
639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] | 653 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions] |
640 return self._pretty_table(header + rows) | 654 return self._pretty_table(header + rows) |
641 | 655 |
642 def _pretty_table(self, data): | 656 def _pretty_table(self, data): |
643 results = [] | 657 results = [] |
644 for row in data: | 658 for row in data: |
645 results.append('%-15s' * len(row) % tuple(row)) | 659 results.append('%-15s' * len(row) % tuple(row)) |
646 return '\n'.join(results) | 660 return '\n'.join(results) |
647 | 661 |
648 def _t_test_results(self): | |
649 """Returns a string showing t-test results for lkgr and fkbr.""" | |
650 t, df, p = self.api.m.math_utils.welchs_t_test( | |
651 self.lkgr.values, self.fkbr.values) | |
652 lines = [ | |
653 'LKGR values: %r' % self.lkgr.values, | |
654 'FKBR values: %r' % self.fkbr.values, | |
655 't-statistic: %r' % t, | |
656 'deg. of freedom: %r' % df, | |
657 'p-value: %r' % p, | |
658 'Confidence score: %r' % (100 * (1 - p)) | |
659 ] | |
660 return '\n'.join(lines) | |
661 | |
662 def print_result_debug_info(self): | 662 def print_result_debug_info(self): |
663 """Prints extra debug info at the end of the bisect process.""" | 663 """Prints extra debug info at the end of the bisect process.""" |
664 lines = self._results_debug_message().splitlines() | 664 lines = self._results_debug_message().splitlines() |
665 # If we emit a null step then add a log to it, the log should be kept | 665 # If we emit a null step then add a log to it, the log should be kept |
666 # longer than 7 days (which is often needed to debug some issues). | 666 # longer than 7 days (which is often needed to debug some issues). |
667 self.api.m.step('Debug Info', []) | 667 self.api.m.step('Debug Info', []) |
668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines | 668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines |
669 | 669 |
670 def post_result(self, halt_on_failure=False): | 670 def post_result(self, halt_on_failure=False): |
671 """Posts bisect results to Perf Dashboard.""" | 671 """Posts bisect results to Perf Dashboard.""" |
672 self.api.m.perf_dashboard.set_default_config() | 672 self.api.m.perf_dashboard.set_default_config() |
673 self.api.m.perf_dashboard.post_bisect_results( | 673 self.api.m.perf_dashboard.post_bisect_results( |
674 self.get_result(), halt_on_failure) | 674 self.get_result(), halt_on_failure) |
675 | 675 |
676 def get_revision_to_eval(self): | 676 def get_revision_to_eval(self): |
677 """Gets the next RevisionState object in the candidate range. | 677 """Gets the next RevisionState object in the candidate range. |
678 | 678 |
679 Returns: | 679 Returns: |
680 The next Revision object in a list. | 680 The next Revision object in a list. |
681 """ | 681 """ |
682 self._update_candidate_range() | 682 self._update_candidate_range() |
683 candidate_range = [revision for revision in | 683 candidate_range = [revision for revision in |
684 self.revisions[self.lkgr.list_index + 1: | 684 self.revisions[self.lkgr.list_index + 1: |
685 self.fkbr.list_index] | 685 self.fkbr.list_index] |
686 if not revision.tested and not revision.failed] | 686 if not revision.failed] |
687 if len(candidate_range) == 1: | 687 if len(candidate_range) == 1: |
688 return candidate_range[0] | 688 return candidate_range[0] |
689 if len(candidate_range) == 0: | 689 if len(candidate_range) == 0: |
690 return None | 690 return None |
691 | 691 |
692 default_revision = candidate_range[len(candidate_range) / 2] | 692 default_revision = candidate_range[len(candidate_range) / 2] |
693 | 693 |
694 with self.api.m.step.nest( | 694 with self.api.m.step.nest( |
695 'Wiggling revision ' + default_revision.revision_string()): | 695 'Wiggling revision ' + default_revision.revision_string()): |
696 # We'll search up to 25% of the range (in either direction) to try and | 696 # We'll search up to 25% of the range (in either direction) to try and |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
733 return True | 733 return True |
734 if (revision.good and revision.next_revision and | 734 if (revision.good and revision.next_revision and |
735 revision.next_revision.bad): | 735 revision.next_revision.bad): |
736 if (revision.next_revision.deps_change() | 736 if (revision.next_revision.deps_change() |
737 and self._expand_deps_revisions(revision.next_revision)): | 737 and self._expand_deps_revisions(revision.next_revision)): |
738 return False | 738 return False |
739 self.culprit = revision.next_revision | 739 self.culprit = revision.next_revision |
740 return True | 740 return True |
741 return False | 741 return False |
742 | 742 |
743 def wait_for_all(self, revision_list): | |
744 """Waits for all revisions in list to finish.""" | |
745 for r in revision_list: | |
746 self.wait_for(r) | |
747 | |
748 def wait_for(self, revision, nest_check=True): | |
749 """Waits for the revision to finish its job.""" | |
750 if nest_check and not self.flags.get( | |
751 'do_not_nest_wait_for_revision'): # pragma: no cover | |
752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()): | |
753 return self.wait_for(revision, nest_check=False) | |
754 while True: | |
755 revision.update_status() | |
756 if revision.in_progress: | |
757 self.api.m.python.inline( | |
758 'sleeping', | |
759 """ | |
760 import sys | |
761 import time | |
762 time.sleep(20*60) | |
763 sys.exit(0) | |
764 """) | |
765 else: | |
766 break | |
767 | |
768 def _update_candidate_range(self): | 743 def _update_candidate_range(self): |
769 """Updates lkgr and fkbr (last known good/first known bad) revisions. | 744 """Updates lkgr and fkbr (last known good/first known bad) revisions. |
770 | 745 |
771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in | 746 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in |
772 bisectors.revisions.""" | 747 bisectors.revisions.""" |
773 for r in self.revisions: | 748 for r in self.revisions: |
774 if r.tested: | 749 if r.test_run_count: |
775 if r.good: | 750 if r.good: |
776 self.lkgr = r | 751 self.lkgr = r |
777 elif r.bad: | 752 elif r.bad: |
778 self.fkbr = r | 753 self.fkbr = r |
779 break | 754 break |
780 assert self.lkgr and self.fkbr | 755 assert self.lkgr and self.fkbr |
781 | 756 |
782 def get_perf_tester_name(self): | 757 def get_perf_tester_name(self): |
783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. | 758 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. |
784 | 759 |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
869 assert len(result_code) <= 20 | 844 assert len(result_code) <= 20 |
870 if result_code not in self.result_codes: | 845 if result_code not in self.result_codes: |
871 self.result_codes.add(result_code) | 846 self.result_codes.add(result_code) |
872 properties = self.api.m.step.active_result.presentation.properties | 847 properties = self.api.m.step.active_result.presentation.properties |
873 properties['extra_result_code'] = sorted(self.result_codes) | 848 properties['extra_result_code'] = sorted(self.result_codes) |
874 | 849 |
875 def get_result(self): | 850 def get_result(self): |
876 """Returns the results as a jsonable object.""" | 851 """Returns the results as a jsonable object.""" |
877 config = self.bisect_config | 852 config = self.bisect_config |
878 results_confidence = 0 | 853 results_confidence = 0 |
879 if self.culprit: | |
880 results_confidence = self.api.m.math_utils.confidence_score( | |
881 self.lkgr.values, self.fkbr.values) | |
882 | 854 |
883 if self.failed: | 855 if self.failed: |
884 status = 'failed' | 856 status = 'failed' |
885 elif self.bisect_over: | 857 elif self.bisect_over: |
886 status = 'completed' | 858 status = 'completed' |
887 else: | 859 else: |
888 status = 'started' | 860 status = 'started' |
889 | 861 |
890 aborted_reason = None | 862 aborted_reason = None |
891 if self.failed_initial_confidence: | 863 if self.failed_initial_confidence: |
892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON | 864 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON |
893 elif self.failed_direction: | 865 elif self.failed_direction: |
894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON | 866 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON |
895 return { | 867 return { |
896 'try_job_id': config.get('try_job_id'), | 868 'try_job_id': config.get('try_job_id'), |
897 'bug_id': config.get('bug_id'), | 869 'bug_id': config.get('bug_id'), |
898 'status': status, | 870 'status': status, |
899 'buildbot_log_url': self._get_build_url(), | 871 'buildbot_log_url': self._get_build_url(), |
900 'bisect_bot': self.get_perf_tester_name(), | 872 'bisect_bot': self.get_perf_tester_name(), |
901 'command': config['command'], | 873 'command': config['command'], |
902 'test_type': config['test_type'], | 874 'test_type': config['test_type'], |
903 'metric': config['metric'], | 875 'metric': config['metric'], |
904 'change': self.relative_change, | 876 'change': self.relative_change, |
905 'score': results_confidence, | |
906 'good_revision': self.good_rev.commit_hash, | 877 'good_revision': self.good_rev.commit_hash, |
907 'bad_revision': self.bad_rev.commit_hash, | 878 'bad_revision': self.bad_rev.commit_hash, |
908 'warnings': self.warnings, | 879 'warnings': self.warnings, |
909 'aborted_reason': aborted_reason, | 880 'aborted_reason': aborted_reason, |
910 'culprit_data': self._culprit_data(), | 881 'culprit_data': self._culprit_data(), |
911 'revision_data': self._revision_data() | 882 'revision_data': self._revision_data() |
912 } | 883 } |
913 | 884 |
914 def _culprit_data(self): | 885 def _culprit_data(self): |
915 culprit = self.culprit | 886 culprit = self.culprit |
(...skipping 11 matching lines...) Expand all Loading... | |
927 'email': culprit_info['email'], | 898 'email': culprit_info['email'], |
928 'cl_date': culprit_info['date'], | 899 'cl_date': culprit_info['date'], |
929 'commit_info': culprit_info['body'], | 900 'commit_info': culprit_info['body'], |
930 'revisions_links': [], | 901 'revisions_links': [], |
931 'cl': culprit.commit_hash | 902 'cl': culprit.commit_hash |
932 } | 903 } |
933 | 904 |
934 def _revision_data(self): | 905 def _revision_data(self): |
935 revision_rows = [] | 906 revision_rows = [] |
936 for r in self.revisions: | 907 for r in self.revisions: |
937 if r.tested or r.aborted: | 908 if r.test_run_count: |
938 revision_rows.append({ | 909 revision_rows.append({ |
939 'depot_name': r.depot_name, | 910 'depot_name': r.depot_name, |
940 'commit_hash': r.commit_hash, | 911 'commit_hash': r.commit_hash, |
941 'revision_string': r.revision_string(), | 912 'revision_string': r.revision_string(), |
942 'mean_value': r.mean_value, | 913 'mean_value': r.mean, |
943 'std_dev': r.std_dev, | 914 'std_dev': r.std_dev, |
944 'values': r.values, | 915 'values': r.debug_values, |
945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', | 916 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', |
946 }) | 917 }) |
947 return revision_rows | 918 return revision_rows |
948 | 919 |
949 def _get_build_url(self): | 920 def _get_build_url(self): |
950 properties = self.api.m.properties | 921 properties = self.api.m.properties |
951 bot_url = properties.get('buildbotURL', | 922 bot_url = properties.get('buildbotURL', |
952 'http://build.chromium.org/p/chromium/') | 923 'http://build.chromium.org/p/chromium/') |
953 builder_name = urllib.quote(properties.get('buildername', '')) | 924 builder_name = urllib.quote(properties.get('buildername', '')) |
954 builder_number = str(properties.get('buildnumber', '')) | 925 builder_number = str(properties.get('buildnumber', '')) |
955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) | 926 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) |
OLD | NEW |