Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(669)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Got full coverage for new and changed code. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import re 6 import re
7 import time 7 import time
8 import urllib 8 import urllib
9 9
10 from . import config_validation 10 from . import config_validation
(...skipping 28 matching lines...) Expand all
39 'LO_FINAL_CONF', # The bisect completed without a culprit. 39 'LO_FINAL_CONF', # The bisect completed without a culprit.
40 ) 40 )
41 41
42 # When we look for the next revision to build, we search nearby revisions 42 # When we look for the next revision to build, we search nearby revisions
43 # looking for a revision that's already been archived. Since we don't want 43 # looking for a revision that's already been archived. Since we don't want
44 # to move *too* far from the original revision, we'll cap the search at 25%. 44 # to move *too* far from the original revision, we'll cap the search at 25%.
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25
46 46
47 # How long to re-test the initial good-bad range for until significant 47 # How long to re-test the initial good-bad range for until significant
48 # difference is established. 48 # difference is established.
49 REGRESSION_CHECK_TIMEOUT = 20 * 60 * 60 # 20 hours. A build times out after 24. 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60
50 # If we reach this number of samples on the reference range and have not
51 # achieved statistical significance, bail.
52 MAX_REQUIRED_SAMPLES = 15
53
54 # Significance level to use for determining difference between revisions via
55 # hypothesis testing.
56 SIGNIFICANCE_LEVEL = 0.01
57 50
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (
59 'The metric values for the initial "good" and "bad" revisions ' 52 'The metric values for the initial "good" and "bad" revisions '
60 'do not represent a clear regression.') 53 'do not represent a clear regression.')
61 54
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (
63 'The metric values for the initial "good" and "bad" revisions match the ' 56 'The metric values for the initial "good" and "bad" revisions match the '
64 'expected direction of improvement. Thus, likely represent an improvement ' 57 'expected direction of improvement. Thus, likely represent an improvement '
65 'and not a regression.') 58 'and not a regression.')
66 59
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 141
149 Returns: 142 Returns:
150 A 40-digit git commit hash string. 143 A 40-digit git commit hash string.
151 """ 144 """
152 if self._is_sha1(rev): # pragma: no cover 145 if self._is_sha1(rev): # pragma: no cover
153 return rev 146 return rev
154 if rev.isdigit(): 147 if rev.isdigit():
155 commit_position = self._api.m.commit_position.construct( 148 commit_position = self._api.m.commit_position.construct(
156 branch='refs/heads/master', value=rev) 149 branch='refs/heads/master', value=rev)
157 try: 150 try:
158 return self._api.m.crrev.to_commit_hash(commit_position) 151 return self._api.m.crrev.to_commit_hash(
152 commit_position,
153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])
159 except self.api.m.step.StepFailure: # pragma: no cover 154 except self.api.m.step.StepFailure: # pragma: no cover
160 self.surface_result('BAD_REV') 155 self.surface_result('BAD_REV')
161 raise 156 raise
162 self.surface_result('BAD_REV') # pragma: no cover 157 self.surface_result('BAD_REV') # pragma: no cover
163 raise self.api.m.step.StepFailure( 158 raise self.api.m.step.StepFailure(
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover 159 'Invalid input revision: %r' % (rev,)) # pragma: no cover
165 160
166 @staticmethod 161 @staticmethod
167 def _is_sha1(s): 162 def _is_sha1(s):
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) 163 return bool(re.match('^[0-9A-Fa-f]{40}$', s))
169 164
170 def significantly_different( 165 def compare_revisions(self, revision_a, revision_b):
171 self, list_a, list_b, 166 """
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover 167 Returns:
prasadv 2016/09/09 18:52:30 I think it is better to have a consistent datatype
RobertoCN 2016/09/13 22:11:40 Done.
173 """Uses an external script to run hypothesis testing with scipy. 168 True if the samples are significantly different.
169 None if there is not enough data to tell.
170 False if there's enough data but still can't tell the samples apart.
171 """
172 output_format = 'chartjson'
173 values_a = revision_a.chartjson_paths
174 values_b = revision_b.chartjson_paths
175 if revision_a.valueset_paths and revision_b.valueset_paths:
176 output_format = 'valueset'
174 177
175 The reason why we need an external script is that scipy is not available to 178 result = self.api.stat_compare(
176 the default python installed in all platforms. We instead rely on an 179 values_a,
177 anaconda environment to provide those packages. 180 values_b,
181 self.bisect_config['metric'],
182 output_format=output_format,
183 step_test_data=lambda: self.api.test_api.compare_samples_data(
184 self.api._test_data.get('revision_data'), revision_a, revision_b))
178 185
179 Args: 186 revision_a.debug_values = result['sample_a']['debug_values']
180 list_a, list_b: Two lists representing samples to be compared. 187 revision_b.debug_values = result['sample_b']['debug_values']
181 significance_level: Self-describing. As a decimal fraction. 188 revision_a.mean = result['sample_a']['mean']
189 revision_b.mean = result['sample_b']['mean']
190 revision_a.std_dev = result['sample_a']['std_dev']
191 revision_b.std_dev = result['sample_b']['std_dev']
182 192
183 Returns: 193 if result['result'] == 'needMoreData':
184 A boolean indicating whether the null hypothesis ~(that the lists are 194 return None
185 samples from the same population) can be rejected at the specified 195 return bool(result['result'])
186 significance level.
187 """
188 step_result = self.api.m.python(
189 'Checking sample difference',
190 self.api.resource('significantly_different.py'),
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],
192 stdout=self.api.m.json.output())
193 results = step_result.stdout
194 if results is None:
195 assert self.dummy_builds
196 return True
197 significantly_different = results['significantly_different']
198 step_result.presentation.logs[str(significantly_different)] = [
199 'See json.output for details']
200 return significantly_different
201 196
202 def config_step(self): 197 def config_step(self):
203 """Yields a step that prints the bisect config.""" 198 """Yields a step that prints the bisect config."""
204 api = self.api 199 api = self.api
205 200
206 # bisect_config may come as a FrozenDict (which is not serializable). 201 # bisect_config may come as a FrozenDict (which is not serializable).
207 bisect_config = dict(self.bisect_config) 202 bisect_config = dict(self.bisect_config)
208 203
209 def fix_windows_backslashes(s): 204 def fix_windows_backslashes(s):
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') 205 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)')
(...skipping 17 matching lines...) Expand all
228 except config_validation.ValidationFail as error: 223 except config_validation.ValidationFail as error:
229 self.surface_result('BAD_CONFIG') 224 self.surface_result('BAD_CONFIG')
230 self.api.m.halt(error.message) 225 self.api.m.halt(error.message)
231 raise self.api.m.step.StepFailure(error.message) 226 raise self.api.m.step.StepFailure(error.message)
232 227
233 @property 228 @property
234 def api(self): 229 def api(self):
235 return self._api 230 return self._api
236 231
237 def compute_relative_change(self): 232 def compute_relative_change(self):
238 old_value = float(self.good_rev.mean_value) 233 old_value = float(self.good_rev.mean or 0)
239 new_value = float(self.bad_rev.mean_value) 234 new_value = float(self.bad_rev.mean or 0)
240 235
241 if new_value and not old_value: # pragma: no cover 236 if new_value and not old_value: # pragma: no cover
242 self.relative_change = ZERO_TO_NON_ZERO 237 self.relative_change = ZERO_TO_NON_ZERO
243 return 238 return
244 239
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) 240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)
246 self.relative_change = '%.2f%%' % (100 * rel_change) 241 self.relative_change = '%.2f%%' % (100 * rel_change)
247 242
248 def make_deps_sha_file(self, deps_sha): 243 def make_deps_sha_file(self, deps_sha):
249 """Make a diff patch that creates DEPS.sha. 244 """Make a diff patch that creates DEPS.sha.
(...skipping 16 matching lines...) Expand all
266 file is to be written to. 261 file is to be written to.
267 commit_hash (str): An identifier for the step. 262 commit_hash (str): An identifier for the step.
268 263
269 Returns: 264 Returns:
270 A string containing the hash of the interned object. 265 A string containing the hash of the interned object.
271 """ 266 """
272 cmd = 'hash-object -t blob -w --stdin'.split(' ') 267 cmd = 'hash-object -t blob -w --stdin'.split(' ')
273 stdin = self.api.m.raw_io.input(file_contents) 268 stdin = self.api.m.raw_io.input(file_contents)
274 stdout = self.api.m.raw_io.output() 269 stdout = self.api.m.raw_io.output()
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash 270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, 271 step_result = self.api.m.git(
277 name=step_name) 272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,
273 step_test_data=lambda:
274 self.api.m.raw_io.test_api.stream_output(commit_hash))
278 hash_string = step_result.stdout.splitlines()[0] 275 hash_string = step_result.stdout.splitlines()[0]
279 try: 276 try:
280 if hash_string: 277 if hash_string:
281 int(hash_string, 16) 278 int(hash_string, 16)
282 return hash_string 279 return hash_string
283 except ValueError: # pragma: no cover 280 except ValueError: # pragma: no cover
284 reason = 'Git did not output a valid hash for the interned file.' 281 reason = 'Git did not output a valid hash for the interned file.'
285 self.api.m.halt(reason) 282 self.api.m.halt(reason)
286 raise self.api.m.step.StepFailure(reason) 283 raise self.api.m.step.StepFailure(reason)
287 284
(...skipping 14 matching lines...) Expand all
302 Returns: 299 Returns:
303 A string containing the diff patch as produced by the 'git diff' command. 300 A string containing the diff patch as produced by the 'git diff' command.
304 """ 301 """
305 # The prefixes used in the command below are used to find and replace the 302 # The prefixes used in the command below are used to find and replace the
306 # tree-ish git object id's on the diff output more easily. 303 # tree-ish git object id's on the diff output more easily.
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' 304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'
308 cmd %= (git_object_a, git_object_b) 305 cmd %= (git_object_a, git_object_b)
309 cmd = cmd.split(' ') 306 cmd = cmd.split(' ')
310 stdout = self.api.m.raw_io.output() 307 stdout = self.api.m.raw_io.output()
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) 308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) 309 step_result = self.api.m.git(
310 *cmd, cwd=cwd, stdout=stdout, name=step_name,
311 step_test_data=lambda: self.api._test_data['diff_patch'])
313 patch_text = step_result.stdout 312 patch_text = step_result.stdout
314 src_string = 'IAMSRC:' + git_object_a 313 src_string = 'IAMSRC:' + git_object_a
315 dst_string = 'IAMDST:' + git_object_b 314 dst_string = 'IAMDST:' + git_object_b
316 patch_text = patch_text.replace(src_string, src_alias) 315 patch_text = patch_text.replace(src_string, src_alias)
317 patch_text = patch_text.replace(dst_string, dst_alias) 316 patch_text = patch_text.replace(dst_string, dst_alias)
318 return patch_text 317 return patch_text
319 318
320 def make_deps_patch(self, base_revision, base_file_contents, 319 def make_deps_patch(self, base_revision, base_file_contents,
321 depot, new_commit_hash): 320 depot, new_commit_hash):
322 """Make a diff patch that updates a specific dependency revision. 321 """Make a diff patch that updates a specific dependency revision.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 interned_deps_hash, deps_file, deps_file, 364 interned_deps_hash, deps_file, deps_file,
366 cwd=cwd, 365 cwd=cwd,
367 deps_rev=new_commit_hash) 366 deps_rev=new_commit_hash)
368 return patch_text, patched_contents 367 return patch_text, patched_contents
369 368
370 def _expand_initial_revision_range(self): 369 def _expand_initial_revision_range(self):
371 """Sets the initial contents of |self.revisions|.""" 370 """Sets the initial contents of |self.revisions|."""
372 with self.api.m.step.nest('Expanding revision range'): 371 with self.api.m.step.nest('Expanding revision range'):
373 good_hash = self.good_rev.commit_hash 372 good_hash = self.good_rev.commit_hash
374 bad_hash = self.bad_rev.commit_hash 373 bad_hash = self.bad_rev.commit_hash
374 depot = self.good_rev.depot_name
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)
376 revisions = self._revision_range( 376 revisions = self._revision_range(
377 start=good_hash, 377 start=good_hash,
378 end=bad_hash, 378 end=bad_hash,
379 depot_name=self.base_depot, 379 depot_name=self.base_depot,
380 step_name=step_name, 380 step_name=step_name,
381 exclude_end=True) 381 exclude_end=True,
382 step_test_data=lambda: self.api._test_data['revision_list'][depot]
383 )
382 self.revisions = [self.good_rev] + revisions + [self.bad_rev] 384 self.revisions = [self.good_rev] + revisions + [self.bad_rev]
383 self._update_revision_list_indexes() 385 self._update_revision_list_indexes()
384 386
385 def _revision_range(self, start, end, depot_name, base_revision=None, 387 def _revision_range(self, start, end, depot_name, base_revision=None,
386 step_name=None, exclude_end=False): 388 step_name=None, exclude_end=False, **kwargs):
387 """Returns a list of RevisionState objects between |start| and |end|. 389 """Returns a list of RevisionState objects between |start| and |end|.
388 390
389 When expanding the initial revision range we want to exclude the last 391 When expanding the initial revision range we want to exclude the last
390 revision, since both good and bad have already been created and tested. 392 revision, since both good and bad have already been created and tested.
391 When bisecting into a roll on the other hand, we want to include the last 393 When bisecting into a roll on the other hand, we want to include the last
392 revision in the roll, because although the code should be equivalent to 394 revision in the roll, because although the code should be equivalent to
393 the roll, we want to blame the right culprit and not the roll. 395 the roll, we want to blame the right culprit and not the roll.
394 396
395 Args: 397 Args:
396 start (str): Start commit hash. 398 start (str): Start commit hash.
397 end (str): End commit hash. 399 end (str): End commit hash.
398 depot_name (str): Short string name of repo, e.g. chromium or v8. 400 depot_name (str): Short string name of repo, e.g. chromium or v8.
399 base_revision (str): Base revision in the downstream repo (e.g. chromium). 401 base_revision (str): Base revision in the downstream repo (e.g. chromium).
400 step_name (str): Optional step name. 402 step_name (str): Optional step name.
401 exclude_end (bool): Whether to exclude the last revision in the range, 403 exclude_end (bool): Whether to exclude the last revision in the range,
402 i.e. the revision given as end. 404 i.e. the revision given as end.
403 405
404 Returns: 406 Returns:
405 A list of RevisionState objects. 407 A list of RevisionState objects.
406 """ 408 """
407 if self.internal_bisect: # pragma: no cover 409 if self.internal_bisect: # pragma: no cover
408 return self._revision_range_with_gitiles( 410 return self._revision_range_with_gitiles(
409 start, end, depot_name, base_revision, step_name) 411 start, end, depot_name, base_revision, step_name)
410 try: 412 try:
411 step_result = self.api.m.python( 413 step_result = self.api.m.python(
412 step_name, 414 step_name,
413 self.api.resource('fetch_intervening_revisions.py'), 415 self.api.resource('fetch_intervening_revisions.py'),
414 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], 416 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],
415 stdout=self.api.m.json.output()) 417 stdout=self.api.m.json.output(), **kwargs)
416 except self.api.m.step.StepFailure: # pragma: no cover 418 except self.api.m.step.StepFailure: # pragma: no cover
417 self.surface_result('BAD_REV') 419 self.surface_result('BAD_REV')
418 raise 420 raise
419 revisions = [] 421 revisions = []
420 revision_hashes = step_result.stdout 422 revision_hashes = step_result.stdout
421 if exclude_end: 423 if exclude_end:
422 revision_hashes = revision_hashes[:-1] 424 revision_hashes = revision_hashes[:-1]
423 for commit_hash, _ in revision_hashes: 425 for commit_hash, _ in revision_hashes:
424 revisions.append(self.revision_class( 426 revisions.append(self.revision_class(
425 bisector=self, 427 bisector=self,
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
514 dep_revision_max = max_revision.deps[depot_name] 516 dep_revision_max = max_revision.deps[depot_name]
515 if (dep_revision_min and dep_revision_max and 517 if (dep_revision_min and dep_revision_max and
516 dep_revision_min != dep_revision_max): 518 dep_revision_min != dep_revision_max):
517 step_name = ('Expanding revision range for revision %s' 519 step_name = ('Expanding revision range for revision %s'
518 ' on depot %s' % (dep_revision_max, depot_name)) 520 ' on depot %s' % (dep_revision_max, depot_name))
519 rev_list = self._revision_range( 521 rev_list = self._revision_range(
520 start=dep_revision_min, 522 start=dep_revision_min,
521 end=dep_revision_max, 523 end=dep_revision_max,
522 depot_name=depot_name, 524 depot_name=depot_name,
523 base_revision=min_revision, 525 base_revision=min_revision,
524 step_name=step_name) 526 step_name=step_name,
527 step_test_data=lambda:
528 self.api._test_data['revision_list'][depot_name])
525 new_revisions = self.revisions[:max_revision.list_index] 529 new_revisions = self.revisions[:max_revision.list_index]
526 new_revisions += rev_list 530 new_revisions += rev_list
527 new_revisions += self.revisions[max_revision.list_index:] 531 new_revisions += self.revisions[max_revision.list_index:]
528 self.revisions = new_revisions 532 self.revisions = new_revisions
529 self._update_revision_list_indexes() 533 self._update_revision_list_indexes()
530 return True 534 return True
531 except RuntimeError: # pragma: no cover 535 except RuntimeError: # pragma: no cover
532 warning_text = ('Could not expand dependency revisions for ' + 536 warning_text = ('Could not expand dependency revisions for ' +
533 revision_to_expand.commit_hash) 537 revision_to_expand.commit_hash)
534 self.surface_result('BAD_REV') 538 self.surface_result('BAD_REV')
(...skipping 16 matching lines...) Expand all
551 555
552 The change between the test results obtained for the given 'good' and 556 The change between the test results obtained for the given 'good' and
553 'bad' revisions is expected to be considered a regression. The 557 'bad' revisions is expected to be considered a regression. The
554 `improvement_direction` attribute is positive if a larger number is 558 `improvement_direction` attribute is positive if a larger number is
555 considered better, and negative if a smaller number is considered better. 559 considered better, and negative if a smaller number is considered better.
556 560
557 Returns: 561 Returns:
558 True if the check passes (i.e. no problem), False if the change is not 562 True if the check passes (i.e. no problem), False if the change is not
559 a regression according to the improvement direction. 563 a regression according to the improvement direction.
560 """ 564 """
561 good = self.good_rev.mean_value 565 good = self.good_rev.mean
562 bad = self.bad_rev.mean_value 566 bad = self.bad_rev.mean
563 567
564 if self.is_return_code_mode(): 568 if self.is_return_code_mode():
565 return True 569 return True
566 570
567 direction = self.improvement_direction 571 direction = self.improvement_direction
568 if direction is None: 572 if direction is None:
569 return True 573 return True
570 if (bad > good and direction > 0) or (bad < good and direction < 0): 574 if (bad > good and direction > 0) or (bad < good and direction < 0):
571 self._set_failed_direction_results() 575 self._set_failed_direction_results()
572 return False 576 return False
(...skipping 14 matching lines...) Expand all
587 """Checks that the initial range presents a clear enough regression. 591 """Checks that the initial range presents a clear enough regression.
588 592
589 We ensure that the good and bad revisions produce significantly different 593 We ensure that the good and bad revisions produce significantly different
590 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached 594 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached
591 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. 595 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.
592 596
593 Returns: True if the revisions produced results that differ from each 597 Returns: True if the revisions produced results that differ from each
594 other in a statistically significant manner. False if such difference could 598 other in a statistically significant manner. False if such difference could
595 not be established in the time or sample size allowed. 599 not be established in the time or sample size allowed.
596 """ 600 """
597 if self.test_type == 'return_code': 601 if self.is_return_code_mode():
598 return (self.good_rev.overall_return_code != 602 return (self.good_rev.overall_return_code !=
599 self.bad_rev.overall_return_code) 603 self.bad_rev.overall_return_code)
600 604
601 if self.bypass_stats_check: 605 if self.bypass_stats_check:
602 dummy_result = self.good_rev.values != self.bad_rev.values 606 self.compare_revisions(self.good_rev, self.bad_rev)
607 dummy_result = self.good_rev.mean != self.bad_rev.mean
603 if not dummy_result: 608 if not dummy_result:
604 self._set_insufficient_confidence_warning() 609 self._set_insufficient_confidence_warning()
605 return dummy_result 610 return dummy_result
606 611
612 # TODO(robertocn): This step should not be necessary in some cases.
607 with self.api.m.step.nest('Re-testing reference range'): 613 with self.api.m.step.nest('Re-testing reference range'):
608 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT 614 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT
609 while time.time() < expiration_time: 615 while time.time() < expiration_time:
610 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: 616 if (self.good_rev.test_run_count >= 5
611 if self.significantly_different(self.good_rev.values, 617 and self.bad_rev.test_run_count >= 5):
612 self.bad_rev.values): 618 if self.compare_revisions(self.good_rev, self.bad_rev):
613 return True 619 return True
614 if len(self.good_rev.values) == len(self.bad_rev.values): 620 if self.good_rev.test_run_count == self.bad_rev.test_run_count:
615 revision_to_retest = self.last_tested_revision 621 revision_to_retest = self.last_tested_revision
616 else: 622 else:
617 revision_to_retest = min(self.good_rev, self.bad_rev, 623 revision_to_retest = min(self.good_rev, self.bad_rev,
618 key=lambda x: len(x.values)) 624 key=lambda x: x.test_run_count)
619 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: 625 revision_to_retest._do_test()
620 revision_to_retest.retest() 626
621 else:
622 break
623 self._set_insufficient_confidence_warning() 627 self._set_insufficient_confidence_warning()
624 return False 628 return False
625 629
626 630
627 def get_exception(self): 631 def get_exception(self):
628 raise NotImplementedError() # pragma: no cover 632 raise NotImplementedError() # pragma: no cover
629 # TODO: should return an exception with the details of the failure. 633 # TODO: should return an exception with the details of the failure.
630 634
631 def _set_insufficient_confidence_warning( 635 def _set_insufficient_confidence_warning(
632 self): # pragma: no cover 636 self): # pragma: no cover
633 """Adds a warning about the lack of initial regression confidence.""" 637 """Adds a warning about the lack of initial regression confidence."""
634 self.failed_initial_confidence = True 638 self.failed_initial_confidence = True
635 self.surface_result('LO_INIT_CONF') 639 self.surface_result('LO_INIT_CONF')
636 self.warnings.append( 640 self.warnings.append(
637 'Bisect failed to reproduce the regression with enough confidence.') 641 'Bisect failed to reproduce the regression with enough confidence.')
638 642
639 def _results_debug_message(self): 643 def _results_debug_message(self):
640 """Returns a string with values used to debug a bisect result.""" 644 """Returns a string with values used to debug a bisect result."""
641 result = 'bisector.lkgr: %r\n' % self.lkgr 645 result = 'bisector.lkgr: %r\n' % self.lkgr
642 result += 'bisector.fkbr: %r\n\n' % self.fkbr 646 result += 'bisector.fkbr: %r\n\n' % self.fkbr
643 result += self._revision_value_table() 647 result += self._revision_value_table()
644 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): 648 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and
645 result += '\n' + self._t_test_results() 649 self.fkbr.test_run_count):
650 result += '\n' + '\n'.join([
651 'LKGR values: %r' % list(self.lkgr.debug_values),
652 'FKBR values: %r' % list(self.fkbr.debug_values),
653 ])
646 return result 654 return result
647 655
648 def _revision_value_table(self): 656 def _revision_value_table(self):
649 """Returns a string table showing revisions and their values.""" 657 """Returns a string table showing revisions and their values."""
650 header = [['Revision', 'Values']] 658 header = [['Revision', 'Values']]
651 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] 659 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]
652 return self._pretty_table(header + rows) 660 return self._pretty_table(header + rows)
653 661
654 def _pretty_table(self, data): 662 def _pretty_table(self, data):
655 results = [] 663 results = []
656 for row in data: 664 for row in data:
657 results.append('%-15s' * len(row) % tuple(row)) 665 results.append('%-15s' * len(row) % tuple(row))
658 return '\n'.join(results) 666 return '\n'.join(results)
659 667
660 def _t_test_results(self):
661 """Returns a string showing t-test results for lkgr and fkbr."""
662 t, df, p = self.api.m.math_utils.welchs_t_test(
663 self.lkgr.values, self.fkbr.values)
664 lines = [
665 'LKGR values: %r' % self.lkgr.values,
666 'FKBR values: %r' % self.fkbr.values,
667 't-statistic: %r' % t,
668 'deg. of freedom: %r' % df,
669 'p-value: %r' % p,
670 'Confidence score: %r' % (100 * (1 - p))
671 ]
672 return '\n'.join(lines)
673
674 def print_result_debug_info(self): 668 def print_result_debug_info(self):
675 """Prints extra debug info at the end of the bisect process.""" 669 """Prints extra debug info at the end of the bisect process."""
676 lines = self._results_debug_message().splitlines() 670 lines = self._results_debug_message().splitlines()
677 # If we emit a null step then add a log to it, the log should be kept 671 # If we emit a null step then add a log to it, the log should be kept
678 # longer than 7 days (which is often needed to debug some issues). 672 # longer than 7 days (which is often needed to debug some issues).
679 self.api.m.step('Debug Info', []) 673 self.api.m.step('Debug Info', [])
680 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines 674 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines
681 675
682 def post_result(self, halt_on_failure=False): 676 def post_result(self, halt_on_failure=False):
683 """Posts bisect results to Perf Dashboard.""" 677 """Posts bisect results to Perf Dashboard."""
684 self.api.m.perf_dashboard.set_default_config() 678 self.api.m.perf_dashboard.set_default_config()
685 self.api.m.perf_dashboard.post_bisect_results( 679 self.api.m.perf_dashboard.post_bisect_results(
686 self.get_result(), halt_on_failure) 680 self.get_result(), halt_on_failure)
687 681
688 def get_revision_to_eval(self): 682 def get_revision_to_eval(self):
689 """Gets the next RevisionState object in the candidate range. 683 """Gets the next RevisionState object in the candidate range.
690 684
691 Returns: 685 Returns:
692 The next Revision object in a list. 686 The next Revision object in a list.
693 """ 687 """
694 self._update_candidate_range() 688 self._update_candidate_range()
695 candidate_range = [revision for revision in 689 candidate_range = [revision for revision in
696 self.revisions[self.lkgr.list_index + 1: 690 self.revisions[self.lkgr.list_index + 1:
697 self.fkbr.list_index] 691 self.fkbr.list_index]
698 if not revision.tested and not revision.failed] 692 if not revision.failed]
699 if len(candidate_range) == 1: 693 if len(candidate_range) == 1:
700 return candidate_range[0] 694 return candidate_range[0]
701 if len(candidate_range) == 0: 695 if len(candidate_range) == 0:
702 return None 696 return None
703 697
704 default_revision = candidate_range[len(candidate_range) / 2] 698 default_revision = candidate_range[len(candidate_range) / 2]
705 699
706 with self.api.m.step.nest( 700 with self.api.m.step.nest(
707 'Wiggling revision ' + default_revision.revision_string()): 701 'Wiggling revision ' + default_revision.revision_string()):
708 # We'll search up to 25% of the range (in either direction) to try and 702 # We'll search up to 25% of the range (in either direction) to try and
(...skipping 22 matching lines...) Expand all
731 return False 725 return False
732 726
733 def check_bisect_finished(self, revision): 727 def check_bisect_finished(self, revision):
734 """Checks if this revision completes the bisection process. 728 """Checks if this revision completes the bisection process.
735 729
736 In this case 'finished' refers to finding one revision considered 'good' 730 In this case 'finished' refers to finding one revision considered 'good'
737 immediately preceding a revision considered 'bad' where the 'bad' revision 731 immediately preceding a revision considered 'bad' where the 'bad' revision
738 does not contain a DEPS change. 732 does not contain a DEPS change.
739 """ 733 """
740 if (revision.bad and revision.previous_revision and 734 if (revision.bad and revision.previous_revision and
741 revision.previous_revision.good): # pragma: no cover 735 revision.previous_revision.good):
742 if revision.deps_change() and self._expand_deps_revisions(revision): 736 if revision.deps_change() and self._expand_deps_revisions(revision):
743 return False 737 return False
744 self.culprit = revision 738 self.culprit = revision
745 return True 739 return True
746 if (revision.good and revision.next_revision and 740 if (revision.good and revision.next_revision and
747 revision.next_revision.bad): 741 revision.next_revision.bad):
748 if (revision.next_revision.deps_change() 742 if (revision.next_revision.deps_change()
749 and self._expand_deps_revisions(revision.next_revision)): 743 and self._expand_deps_revisions(revision.next_revision)):
750 return False 744 return False
751 self.culprit = revision.next_revision 745 self.culprit = revision.next_revision
752 return True 746 return True
753 return False 747 # We'll never get here because revision adjacency is checked before this
754 748 # function is called.
755 def wait_for_all(self, revision_list): 749 assert False # pragma: no cover
756 """Waits for all revisions in list to finish."""
757 for r in revision_list:
758 self.wait_for(r)
759
760 def wait_for(self, revision, nest_check=True):
761 """Waits for the revision to finish its job."""
762 if nest_check and not self.flags.get(
763 'do_not_nest_wait_for_revision'): # pragma: no cover
764 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):
765 return self.wait_for(revision, nest_check=False)
766 while True:
767 revision.update_status()
768 if revision.in_progress:
769 self.api.m.python.inline(
770 'sleeping',
771 """
772 import sys
773 import time
774 time.sleep(20*60)
775 sys.exit(0)
776 """)
777 else:
778 break
779 750
780 def _update_candidate_range(self): 751 def _update_candidate_range(self):
781 """Updates lkgr and fkbr (last known good/first known bad) revisions. 752 """Updates lkgr and fkbr (last known good/first known bad) revisions.
782 753
783 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in 754 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in
784 bisectors.revisions.""" 755 bisectors.revisions."""
785 for r in self.revisions: 756 for r in self.revisions:
786 if r.tested: 757 if r.test_run_count:
787 if r.good: 758 if r.good:
788 self.lkgr = r 759 self.lkgr = r
789 elif r.bad: 760 elif r.bad:
790 self.fkbr = r 761 self.fkbr = r
791 break 762 break
792 assert self.lkgr and self.fkbr 763 assert self.lkgr and self.fkbr
793 764
794 def get_perf_tester_name(self): 765 def get_perf_tester_name(self):
795 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. 766 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.
796 767
(...skipping 17 matching lines...) Expand all
814 785
815 # TODO(prasadv): Refactor this code to remove hard coded values. 786 # TODO(prasadv): Refactor this code to remove hard coded values.
816 bot_name = self.get_perf_tester_name() 787 bot_name = self.get_perf_tester_name()
817 if 'win' in bot_name: 788 if 'win' in bot_name:
818 if any(b in bot_name for b in ['x64', 'gpu']): 789 if any(b in bot_name for b in ['x64', 'gpu']):
819 return 'winx64_bisect_builder' 790 return 'winx64_bisect_builder'
820 return 'win_perf_bisect_builder' 791 return 'win_perf_bisect_builder'
821 792
822 # TODO(prasadv): Refactor this code to remove hard coded values and use 793 # TODO(prasadv): Refactor this code to remove hard coded values and use
823 # target_bit from the bot config. crbug.com/640287 794 # target_bit from the bot config. crbug.com/640287
824 if 'android' in bot_name: 795 if 'android' in bot_name: # pragma: no cover
825 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): 796 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):
826 return 'android_arm64_perf_bisect_builder' 797 return 'android_arm64_perf_bisect_builder'
827 return 'android_perf_bisect_builder' 798 return 'android_perf_bisect_builder'
828 799
829 if 'mac' in bot_name: 800 if 'mac' in bot_name:
830 return 'mac_perf_bisect_builder' 801 return 'mac_perf_bisect_builder'
831 802
832 return 'linux_perf_bisect_builder' 803 return 'linux_perf_bisect_builder'
833 804
834 def get_platform_gs_prefix(self): 805 def get_platform_gs_prefix(self):
835 """Returns the prefix of a GS URL where a build can be found. 806 """Returns the prefix of a GS URL where a build can be found.
836 807
837 This prefix includes the schema, bucket, directory and beginning 808 This prefix includes the schema, bucket, directory and beginning
838 of filename. It is joined together with the part of the filename 809 of filename. It is joined together with the part of the filename
839 that includes the revision and the file extension to form the 810 that includes the revision and the file extension to form the
840 full GS URL. 811 full GS URL.
841 """ 812 """
842 if self.api.buildurl_gs_prefix: # pragma: no cover 813 if self.api.buildurl_gs_prefix: # pragma: no cover
843 return self.api.buildurl_gs_prefix 814 return self.api.buildurl_gs_prefix
844 815
845 # TODO(prasadv): Refactor this code to remove hard coded values. 816 # TODO(prasadv): Refactor this code to remove hard coded values.
846 bot_name = self.get_perf_tester_name() 817 bot_name = self.get_perf_tester_name()
847 if 'win' in bot_name: 818 if 'win' in bot_name:
848 if any(b in bot_name for b in ['x64', 'gpu']): 819 if any(b in bot_name for b in ['x64', 'gpu']):
849 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_' 820 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_'
850 return 'gs://chrome-perf/Win Builder/full-build-win32_' 821 return 'gs://chrome-perf/Win Builder/full-build-win32_'
851 822
852 # TODO(prasadv): Refactor this code to remove hard coded values and use 823 # TODO(prasadv): Refactor this code to remove hard coded values and use
853 # target_bit from the bot config. crbug.com/640287 824 # target_bit from the bot config. crbug.com/640287
854 if 'android' in bot_name: 825 if 'android' in bot_name: #pragma: no cover
855 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): 826 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):
856 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_' 827 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_'
857 return 'gs://chrome-perf/Android Builder/full-build-linux_' 828 return 'gs://chrome-perf/Android Builder/full-build-linux_'
858 829
859 if 'mac' in bot_name: 830 if 'mac' in bot_name:
860 return 'gs://chrome-perf/Mac Builder/full-build-mac_' 831 return 'gs://chrome-perf/Mac Builder/full-build-mac_'
861 832
862 return 'gs://chrome-perf/Linux Builder/full-build-linux_' 833 return 'gs://chrome-perf/Linux Builder/full-build-linux_'
863 834
864 def ensure_sync_master_branch(self): 835 def ensure_sync_master_branch(self):
865 """Make sure the local master is in sync with the fetched origin/master. 836 """Make sure the local master is in sync with the fetched origin/master.
866 837
867 We have seen on several occasions that the local master branch gets reset 838 We have seen on several occasions that the local master branch gets reset
868 to previous revisions and also detached head states. Running this should 839 to previous revisions and also detached head states. Running this should
869 take care of either situation. 840 take care of either situation.
870 """ 841 """
871 # TODO(robertocn): Investigate what causes the states mentioned in the 842 # TODO(robertocn): Investigate what causes the states mentioned in the
872 # docstring in the first place. 843 # docstring in the first place.
873 self.api.m.git('update-ref', 'refs/heads/master', 844 self.api.m.git('update-ref', 'refs/heads/master',
874 'refs/remotes/origin/master') 845 'refs/remotes/origin/master')
875 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout']) 846 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout'])
876 847
877 def is_return_code_mode(self): 848 def is_return_code_mode(self):
878 """Checks whether this is a bisect on the test's exit code.""" 849 """Checks whether this is a bisect on the test's exit code."""
879 return self.bisect_config.get('test_type') == 'return_code' 850 return self.test_type == 'return_code'
880 851
881 def surface_result(self, result_string): 852 def surface_result(self, result_string):
882 assert result_string in VALID_RESULT_CODES 853 assert result_string in VALID_RESULT_CODES
883 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n). 854 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).
884 result_code = prefix + result_string 855 result_code = prefix + result_string
885 assert len(result_code) <= 20 856 assert len(result_code) <= 20
886 if result_code not in self.result_codes: 857 if result_code not in self.result_codes:
887 self.result_codes.add(result_code) 858 self.result_codes.add(result_code)
888 properties = self.api.m.step.active_result.presentation.properties 859 properties = self.api.m.step.active_result.presentation.properties
889 properties['extra_result_code'] = sorted(self.result_codes) 860 properties['extra_result_code'] = sorted(self.result_codes)
890 861
891 def get_result(self): 862 def get_result(self):
892 """Returns the results as a jsonable object.""" 863 """Returns the results as a jsonable object."""
893 config = self.bisect_config 864 config = self.bisect_config
894 results_confidence = 0
895 if self.culprit:
896 results_confidence = self.api.m.math_utils.confidence_score(
897 self.lkgr.values, self.fkbr.values)
898 865
899 if self.failed: 866 if self.failed:
900 status = 'failed' 867 status = 'failed'
901 elif self.bisect_over: 868 elif self.bisect_over:
902 status = 'completed' 869 status = 'completed'
903 else: 870 else:
904 status = 'started' 871 status = 'started'
905 872
906 aborted_reason = None 873 aborted_reason = None
907 if self.failed_initial_confidence: 874 if self.failed_initial_confidence:
908 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON 875 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON
909 elif self.failed_direction: 876 elif self.failed_direction:
910 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON 877 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON
911 return { 878 return {
912 'try_job_id': config.get('try_job_id'), 879 'try_job_id': config.get('try_job_id'),
913 'bug_id': config.get('bug_id'), 880 'bug_id': config.get('bug_id'),
914 'status': status, 881 'status': status,
915 'buildbot_log_url': self._get_build_url(), 882 'buildbot_log_url': self._get_build_url(),
916 'bisect_bot': self.get_perf_tester_name(), 883 'bisect_bot': self.get_perf_tester_name(),
917 'command': config['command'], 884 'command': config['command'],
918 'test_type': config['test_type'], 885 'test_type': config['test_type'],
919 'metric': config['metric'], 886 'metric': config.get('metric'),
920 'change': self.relative_change, 887 'change': self.relative_change,
921 'score': results_confidence,
922 'good_revision': self.good_rev.commit_hash, 888 'good_revision': self.good_rev.commit_hash,
923 'bad_revision': self.bad_rev.commit_hash, 889 'bad_revision': self.bad_rev.commit_hash,
924 'warnings': self.warnings, 890 'warnings': self.warnings,
925 'aborted_reason': aborted_reason, 891 'aborted_reason': aborted_reason,
926 'culprit_data': self._culprit_data(), 892 'culprit_data': self._culprit_data(),
927 'revision_data': self._revision_data() 893 'revision_data': self._revision_data()
928 } 894 }
929 895
930 def _culprit_data(self): 896 def _culprit_data(self):
931 culprit = self.culprit 897 culprit = self.culprit
(...skipping 11 matching lines...) Expand all
943 'email': culprit_info['email'], 909 'email': culprit_info['email'],
944 'cl_date': culprit_info['date'], 910 'cl_date': culprit_info['date'],
945 'commit_info': culprit_info['body'], 911 'commit_info': culprit_info['body'],
946 'revisions_links': [], 912 'revisions_links': [],
947 'cl': culprit.commit_hash 913 'cl': culprit.commit_hash
948 } 914 }
949 915
950 def _revision_data(self): 916 def _revision_data(self):
951 revision_rows = [] 917 revision_rows = []
952 for r in self.revisions: 918 for r in self.revisions:
953 if r.tested or r.aborted: 919 if r.test_run_count:
954 revision_rows.append({ 920 revision_rows.append({
955 'depot_name': r.depot_name, 921 'depot_name': r.depot_name,
956 'commit_hash': r.commit_hash, 922 'commit_hash': r.commit_hash,
957 'revision_string': r.revision_string(), 923 'revision_string': r.revision_string(),
958 'mean_value': r.mean_value, 924 'mean_value': r.mean,
959 'std_dev': r.std_dev, 925 'std_dev': r.std_dev,
960 'values': r.values, 926 'values': r.debug_values,
961 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', 927 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',
962 }) 928 })
963 return revision_rows 929 return revision_rows
964 930
965 def _get_build_url(self): 931 def _get_build_url(self):
966 properties = self.api.m.properties 932 properties = self.api.m.properties
967 bot_url = properties.get('buildbotURL', 933 bot_url = properties.get('buildbotURL',
968 'http://build.chromium.org/p/chromium/') 934 'http://build.chromium.org/p/chromium/')
969 builder_name = urllib.quote(properties.get('buildername', '')) 935 builder_name = urllib.quote(properties.get('buildername', ''))
970 builder_number = str(properties.get('buildnumber', '')) 936 builder_number = str(properties.get('buildnumber', ''))
971 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) 937 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698