Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1204)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import re 6 import re
7 import time 7 import time
8 import urllib 8 import urllib
9 9
10 from . import config_validation 10 from . import config_validation
(...skipping 29 matching lines...) Expand all
40 ) 40 )
41 41
42 # When we look for the next revision to build, we search nearby revisions 42 # When we look for the next revision to build, we search nearby revisions
43 # looking for a revision that's already been archived. Since we don't want 43 # looking for a revision that's already been archived. Since we don't want
44 # to move *too* far from the original revision, we'll cap the search at 25%. 44 # to move *too* far from the original revision, we'll cap the search at 25%.
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25
46 46
47 # How long to re-test the initial good-bad range for until significant 47 # How long to re-test the initial good-bad range for until significant
48 # difference is established. 48 # difference is established.
49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60
50 # If we reach this number of samples on the reference range and have not
51 # achieved statistical significance, bail.
52 MAX_REQUIRED_SAMPLES = 15
53
54 # Significance level to use for determining difference between revisions via
55 # hypothesis testing.
56 SIGNIFICANCE_LEVEL = 0.01
57 50
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (
59 'The metric values for the initial "good" and "bad" revisions ' 52 'The metric values for the initial "good" and "bad" revisions '
60 'do not represent a clear regression.') 53 'do not represent a clear regression.')
61 54
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (
63 'The metric values for the initial "good" and "bad" revisions match the ' 56 'The metric values for the initial "good" and "bad" revisions match the '
64 'expected direction of improvement. Thus, likely represent an improvement ' 57 'expected direction of improvement. Thus, likely represent an improvement '
65 'and not a regression.') 58 'and not a regression.')
66 59
67 60
68 class Bisector(object): 61 class Bisector(object):
69 """This class abstracts an ongoing bisect (or n-sect) job.""" 62 """This class abstracts an ongoing bisect (or n-sect) job."""
70 63
71 def __init__(self, api, bisect_config, revision_class, init_revisions=True, 64 def __init__(self, api, bisect_config, revision_class, init_revisions=True,
72 **flags): 65 **flags):
73 """Initializes the state of a new bisect job from a dictionary. 66 """Initializes the state of a new bisect job from a dictionary.
74 67
75 Note that the initial good_rev and bad_rev MUST resolve to a commit position 68 Note that the initial good_rev and bad_rev MUST resolve to a commit position
76 in the chromium repo. 69 in the chromium repo.
77 """ 70 """
78 super(Bisector, self).__init__() 71 super(Bisector, self).__init__()
72 self.loopCHECK = {}
79 self.flags = flags 73 self.flags = flags
80 self._api = api 74 self._api = api
81 self.result_codes = set() 75 self.result_codes = set()
82 self.ensure_sync_master_branch() 76 self.ensure_sync_master_branch()
83 self.bisect_config = bisect_config 77 self.bisect_config = bisect_config
84 self.config_step() 78 self.config_step()
85 self._validate_config() 79 self._validate_config()
86 self.revision_class = revision_class 80 self.revision_class = revision_class
87 self.last_tested_revision = None 81 self.last_tested_revision = None
88 82
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 142
149 Returns: 143 Returns:
150 A 40-digit git commit hash string. 144 A 40-digit git commit hash string.
151 """ 145 """
152 if self._is_sha1(rev): # pragma: no cover 146 if self._is_sha1(rev): # pragma: no cover
153 return rev 147 return rev
154 if rev.isdigit(): 148 if rev.isdigit():
155 commit_position = self._api.m.commit_position.construct( 149 commit_position = self._api.m.commit_position.construct(
156 branch='refs/heads/master', value=rev) 150 branch='refs/heads/master', value=rev)
157 try: 151 try:
158 return self._api.m.crrev.to_commit_hash(commit_position) 152 return self._api.m.crrev.to_commit_hash(
153 commit_position,
154 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])
159 except self.api.m.step.StepFailure: # pragma: no cover 155 except self.api.m.step.StepFailure: # pragma: no cover
160 self.surface_result('BAD_REV') 156 self.surface_result('BAD_REV')
161 raise 157 raise
162 self.surface_result('BAD_REV') # pragma: no cover 158 self.surface_result('BAD_REV') # pragma: no cover
163 raise self.api.m.step.StepFailure( 159 raise self.api.m.step.StepFailure(
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover 160 'Invalid input revision: %r' % (rev,)) # pragma: no cover
165 161
166 @staticmethod 162 @staticmethod
167 def _is_sha1(s): 163 def _is_sha1(s):
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) 164 return bool(re.match('^[0-9A-Fa-f]{40}$', s))
169 165
170 def significantly_different( 166 def compare_revisions(self, revision_a, revision_b):
171 self, list_a, list_b, 167 """
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover 168 Returns:
173 """Uses an external script to run hypothesis testing with scipy. 169 True if the samples are significantly different.
170 None if there is not enough data to tell.
171 False if there's enough data but still can't tell the samples apart.
172 """
173 output_format = 'chartjson'
174 values_a = revision_a.chartjson_paths
175 values_b = revision_b.chartjson_paths
176 if revision_a.valueset_paths and revision_b.valueset_paths:
177 output_format = 'valueset'
174 178
175 The reason why we need an external script is that scipy is not available to 179 result = self.api.stat_compare(
176 the default python installed in all platforms. We instead rely on an 180 values_a,
177 anaconda environment to provide those packages. 181 values_b,
182 self.bisect_config['metric'],
183 output_format=output_format,
184 step_test_data=lambda: self.api.test_api.compare_samples_data(
185 self.api._test_data.get('revision_data'), revision_a, revision_b))
178 186
179 Args: 187 revision_a.debug_values = result['sample_a']['debug_values']
180 list_a, list_b: Two lists representing samples to be compared. 188 revision_b.debug_values = result['sample_b']['debug_values']
181 significance_level: Self-describing. As a decimal fraction. 189 revision_a.mean = result['sample_a']['mean']
190 revision_b.mean = result['sample_b']['mean']
191 revision_a.std_dev = result['sample_a']['std_dev']
192 revision_b.std_dev = result['sample_b']['std_dev']
182 193
183 Returns: 194 if result['result'] == 'needMoreData':
184 A boolean indicating whether the null hypothesis ~(that the lists are 195 key = tuple(values_a), tuple(values_b)
185 samples from the same population) can be rejected at the specified 196 self.loopCHECK.setdefault(key, 0)
186 significance level. 197 self.loopCHECK[key] += 1
RobertoCN 2016/08/23 00:26:23 Remove loop check and debug prints
RobertoCN 2016/09/07 00:33:24 Done.
187 """ 198 if self.loopCHECK[key] > 10:
188 step_result = self.api.m.python( 199 raise Exception('loopCHECK!@')
189 'Checking sample difference', 200 print result['result'], revision_a.debug_values, revision_b.debug_values
190 self.api.resource('significantly_different.py'), 201 print revision_a.bisector.revisions
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)], 202 return None
192 stdout=self.api.m.json.output()) 203 return bool(result['result'])
193 results = step_result.stdout
194 if results is None:
195 assert self.dummy_builds
196 return True
197 significantly_different = results['significantly_different']
198 step_result.presentation.logs[str(significantly_different)] = [
199 'See json.output for details']
200 return significantly_different
201 204
202 def config_step(self): 205 def config_step(self):
203 """Yields a step that prints the bisect config.""" 206 """Yields a step that prints the bisect config."""
204 api = self.api 207 api = self.api
205 208
206 # bisect_config may come as a FrozenDict (which is not serializable). 209 # bisect_config may come as a FrozenDict (which is not serializable).
207 bisect_config = dict(self.bisect_config) 210 bisect_config = dict(self.bisect_config)
208 211
209 def fix_windows_backslashes(s): 212 def fix_windows_backslashes(s):
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') 213 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)')
(...skipping 17 matching lines...) Expand all
228 except config_validation.ValidationFail as error: 231 except config_validation.ValidationFail as error:
229 self.surface_result('BAD_CONFIG') 232 self.surface_result('BAD_CONFIG')
230 self.api.m.halt(error.message) 233 self.api.m.halt(error.message)
231 raise self.api.m.step.StepFailure(error.message) 234 raise self.api.m.step.StepFailure(error.message)
232 235
233 @property 236 @property
234 def api(self): 237 def api(self):
235 return self._api 238 return self._api
236 239
237 def compute_relative_change(self): 240 def compute_relative_change(self):
238 old_value = float(self.good_rev.mean_value) 241 old_value = float(self.good_rev.mean)
239 new_value = float(self.bad_rev.mean_value) 242 new_value = float(self.bad_rev.mean)
240 243
241 if new_value and not old_value: # pragma: no cover 244 if new_value and not old_value: # pragma: no cover
242 self.relative_change = ZERO_TO_NON_ZERO 245 self.relative_change = ZERO_TO_NON_ZERO
243 return 246 return
244 247
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) 248 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)
246 self.relative_change = '%.2f%%' % (100 * rel_change) 249 self.relative_change = '%.2f%%' % (100 * rel_change)
247 250
248 def make_deps_sha_file(self, deps_sha): 251 def make_deps_sha_file(self, deps_sha):
249 """Make a diff patch that creates DEPS.sha. 252 """Make a diff patch that creates DEPS.sha.
(...skipping 16 matching lines...) Expand all
266 file is to be written to. 269 file is to be written to.
267 commit_hash (str): An identifier for the step. 270 commit_hash (str): An identifier for the step.
268 271
269 Returns: 272 Returns:
270 A string containing the hash of the interned object. 273 A string containing the hash of the interned object.
271 """ 274 """
272 cmd = 'hash-object -t blob -w --stdin'.split(' ') 275 cmd = 'hash-object -t blob -w --stdin'.split(' ')
273 stdin = self.api.m.raw_io.input(file_contents) 276 stdin = self.api.m.raw_io.input(file_contents)
274 stdout = self.api.m.raw_io.output() 277 stdout = self.api.m.raw_io.output()
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash 278 step_name = 'Hashing modified DEPS file with revision ' + commit_hash
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, 279 step_result = self.api.m.git(
277 name=step_name) 280 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,
281 step_test_data=lambda:
282 self.api.m.raw_io.test_api.stream_output(commit_hash))
278 hash_string = step_result.stdout.splitlines()[0] 283 hash_string = step_result.stdout.splitlines()[0]
279 try: 284 try:
280 if hash_string: 285 if hash_string:
281 int(hash_string, 16) 286 int(hash_string, 16)
282 return hash_string 287 return hash_string
283 except ValueError: # pragma: no cover 288 except ValueError: # pragma: no cover
284 reason = 'Git did not output a valid hash for the interned file.' 289 reason = 'Git did not output a valid hash for the interned file.'
285 self.api.m.halt(reason) 290 self.api.m.halt(reason)
286 raise self.api.m.step.StepFailure(reason) 291 raise self.api.m.step.StepFailure(reason)
287 292
(...skipping 14 matching lines...) Expand all
302 Returns: 307 Returns:
303 A string containing the diff patch as produced by the 'git diff' command. 308 A string containing the diff patch as produced by the 'git diff' command.
304 """ 309 """
305 # The prefixes used in the command below are used to find and replace the 310 # The prefixes used in the command below are used to find and replace the
306 # tree-ish git object id's on the diff output more easily. 311 # tree-ish git object id's on the diff output more easily.
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' 312 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'
308 cmd %= (git_object_a, git_object_b) 313 cmd %= (git_object_a, git_object_b)
309 cmd = cmd.split(' ') 314 cmd = cmd.split(' ')
310 stdout = self.api.m.raw_io.output() 315 stdout = self.api.m.raw_io.output()
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) 316 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) 317 step_result = self.api.m.git(
318 *cmd, cwd=cwd, stdout=stdout, name=step_name,
319 step_test_data=lambda: self.api._test_data['diff_patch'])
313 patch_text = step_result.stdout 320 patch_text = step_result.stdout
314 src_string = 'IAMSRC:' + git_object_a 321 src_string = 'IAMSRC:' + git_object_a
315 dst_string = 'IAMDST:' + git_object_b 322 dst_string = 'IAMDST:' + git_object_b
316 patch_text = patch_text.replace(src_string, src_alias) 323 patch_text = patch_text.replace(src_string, src_alias)
317 patch_text = patch_text.replace(dst_string, dst_alias) 324 patch_text = patch_text.replace(dst_string, dst_alias)
318 return patch_text 325 return patch_text
319 326
320 def make_deps_patch(self, base_revision, base_file_contents, 327 def make_deps_patch(self, base_revision, base_file_contents,
321 depot, new_commit_hash): 328 depot, new_commit_hash):
322 """Make a diff patch that updates a specific dependency revision. 329 """Make a diff patch that updates a specific dependency revision.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 interned_deps_hash, deps_file, deps_file, 372 interned_deps_hash, deps_file, deps_file,
366 cwd=cwd, 373 cwd=cwd,
367 deps_rev=new_commit_hash) 374 deps_rev=new_commit_hash)
368 return patch_text, patched_contents 375 return patch_text, patched_contents
369 376
370 def _expand_initial_revision_range(self): 377 def _expand_initial_revision_range(self):
371 """Sets the initial contents of |self.revisions|.""" 378 """Sets the initial contents of |self.revisions|."""
372 with self.api.m.step.nest('Expanding revision range'): 379 with self.api.m.step.nest('Expanding revision range'):
373 good_hash = self.good_rev.commit_hash 380 good_hash = self.good_rev.commit_hash
374 bad_hash = self.bad_rev.commit_hash 381 bad_hash = self.bad_rev.commit_hash
382 depot = self.good_rev.depot_name
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) 383 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)
376 revisions = self._revision_range( 384 revisions = self._revision_range(
377 start=good_hash, 385 start=good_hash,
378 end=bad_hash, 386 end=bad_hash,
379 depot_name=self.base_depot, 387 depot_name=self.base_depot,
380 step_name=step_name) 388 step_name=step_name,
389 step_test_data=lambda: self.api._test_data['revision_list'][depot]
390 )
381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] 391 self.revisions = [self.good_rev] + revisions + [self.bad_rev]
382 self._update_revision_list_indexes() 392 self._update_revision_list_indexes()
383 393
384 def _revision_range(self, start, end, depot_name, base_revision=None, 394 def _revision_range(self, start, end, depot_name, base_revision=None,
385 step_name=None): 395 step_name=None, **kwargs):
386 """Returns a list of RevisionState objects between |start| and |end|. 396 """Returns a list of RevisionState objects between |start| and |end|.
387 397
388 Args: 398 Args:
389 start (str): Start commit hash. 399 start (str): Start commit hash.
390 end (str): End commit hash. 400 end (str): End commit hash.
391 depot_name (str): Short string name of repo, e.g. chromium or v8. 401 depot_name (str): Short string name of repo, e.g. chromium or v8.
392 base_revision (str): Base revision in the downstream repo (e.g. chromium). 402 base_revision (str): Base revision in the downstream repo (e.g. chromium).
393 step_name (str): Optional step name. 403 step_name (str): Optional step name.
394 404
395 Returns: 405 Returns:
396 A list of RevisionState objects, not including the given start or end. 406 A list of RevisionState objects, not including the given start or end.
397 """ 407 """
398 if self.internal_bisect: # pragma: no cover 408 if self.internal_bisect: # pragma: no cover
399 return self._revision_range_with_gitiles( 409 return self._revision_range_with_gitiles(
400 start, end, depot_name, base_revision, step_name) 410 start, end, depot_name, base_revision, step_name)
401 try: 411 try:
402 step_result = self.api.m.python( 412 step_result = self.api.m.python(
403 step_name, 413 step_name,
404 self.api.resource('fetch_intervening_revisions.py'), 414 self.api.resource('fetch_intervening_revisions.py'),
405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], 415 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],
406 stdout=self.api.m.json.output()) 416 stdout=self.api.m.json.output(), **kwargs)
407 except self.api.m.step.StepFailure: # pragma: no cover 417 except self.api.m.step.StepFailure: # pragma: no cover
408 self.surface_result('BAD_REV') 418 self.surface_result('BAD_REV')
409 raise 419 raise
410 revisions = [] 420 revisions = []
411 for commit_hash, _ in step_result.stdout: 421 for commit_hash, _ in step_result.stdout:
412 revisions.append(self.revision_class( 422 revisions.append(self.revision_class(
413 bisector=self, 423 bisector=self,
414 commit_hash=commit_hash, 424 commit_hash=commit_hash,
415 depot_name=depot_name, 425 depot_name=depot_name,
416 base_revision=base_revision)) 426 base_revision=base_revision))
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 dep_revision_max = max_revision.deps[depot_name] 512 dep_revision_max = max_revision.deps[depot_name]
503 if (dep_revision_min and dep_revision_max and 513 if (dep_revision_min and dep_revision_max and
504 dep_revision_min != dep_revision_max): 514 dep_revision_min != dep_revision_max):
505 step_name = ('Expanding revision range for revision %s' 515 step_name = ('Expanding revision range for revision %s'
506 ' on depot %s' % (dep_revision_max, depot_name)) 516 ' on depot %s' % (dep_revision_max, depot_name))
507 rev_list = self._revision_range( 517 rev_list = self._revision_range(
508 start=dep_revision_min, 518 start=dep_revision_min,
509 end=dep_revision_max, 519 end=dep_revision_max,
510 depot_name=depot_name, 520 depot_name=depot_name,
511 base_revision=min_revision, 521 base_revision=min_revision,
512 step_name=step_name) 522 step_name=step_name,
523 step_test_data=lambda:
524 self.api._test_data['revision_list'][depot_name])
513 new_revisions = self.revisions[:max_revision.list_index] 525 new_revisions = self.revisions[:max_revision.list_index]
514 new_revisions += rev_list 526 new_revisions += rev_list
515 new_revisions += self.revisions[max_revision.list_index:] 527 new_revisions += self.revisions[max_revision.list_index:]
516 self.revisions = new_revisions 528 self.revisions = new_revisions
517 self._update_revision_list_indexes() 529 self._update_revision_list_indexes()
518 return True 530 return True
519 except RuntimeError: # pragma: no cover 531 except RuntimeError: # pragma: no cover
520 warning_text = ('Could not expand dependency revisions for ' + 532 warning_text = ('Could not expand dependency revisions for ' +
521 revision_to_expand.commit_hash) 533 revision_to_expand.commit_hash)
522 self.surface_result('BAD_REV') 534 self.surface_result('BAD_REV')
(...skipping 16 matching lines...) Expand all
539 551
540 The change between the test results obtained for the given 'good' and 552 The change between the test results obtained for the given 'good' and
541 'bad' revisions is expected to be considered a regression. The 553 'bad' revisions is expected to be considered a regression. The
542 `improvement_direction` attribute is positive if a larger number is 554 `improvement_direction` attribute is positive if a larger number is
543 considered better, and negative if a smaller number is considered better. 555 considered better, and negative if a smaller number is considered better.
544 556
545 Returns: 557 Returns:
546 True if the check passes (i.e. no problem), False if the change is not 558 True if the check passes (i.e. no problem), False if the change is not
547 a regression according to the improvement direction. 559 a regression according to the improvement direction.
548 """ 560 """
549 good = self.good_rev.mean_value 561 good = self.good_rev.mean
550 bad = self.bad_rev.mean_value 562 bad = self.bad_rev.mean
551 563
552 if self.is_return_code_mode(): 564 if self.is_return_code_mode():
553 return True 565 return True
554 566
555 direction = self.improvement_direction 567 direction = self.improvement_direction
556 if direction is None: 568 if direction is None:
557 return True 569 return True
558 if (bad > good and direction > 0) or (bad < good and direction < 0): 570 if (bad > good and direction > 0) or (bad < good and direction < 0):
559 self._set_failed_direction_results() 571 self._set_failed_direction_results()
560 return False 572 return False
(...skipping 19 matching lines...) Expand all
580 592
581 Returns: True if the revisions produced results that differ from each 593 Returns: True if the revisions produced results that differ from each
582 other in a statistically significant manner. False if such difference could 594 other in a statistically significant manner. False if such difference could
583 not be established in the time or sample size allowed. 595 not be established in the time or sample size allowed.
584 """ 596 """
585 if self.test_type == 'return_code': 597 if self.test_type == 'return_code':
586 return (self.good_rev.overall_return_code != 598 return (self.good_rev.overall_return_code !=
587 self.bad_rev.overall_return_code) 599 self.bad_rev.overall_return_code)
588 600
589 if self.bypass_stats_check: 601 if self.bypass_stats_check:
590 dummy_result = self.good_rev.values != self.bad_rev.values 602 self.compare_revisions(self.good_rev, self.bad_rev)
603 dummy_result = self.good_rev.mean != self.bad_rev.mean
591 if not dummy_result: 604 if not dummy_result:
592 self._set_insufficient_confidence_warning() 605 self._set_insufficient_confidence_warning()
593 return dummy_result 606 return dummy_result
594 607
608 # TODO(robertocn): This step should not be necessary in some cases.
595 with self.api.m.step.nest('Re-testing reference range'): 609 with self.api.m.step.nest('Re-testing reference range'):
596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT 610 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT
597 while time.time() < expiration_time: 611 while time.time() < expiration_time:
598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: 612 if (self.good_rev.test_run_count >= 5
599 if self.significantly_different(self.good_rev.values, 613 and self.bad_rev.test_run_count >= 5):
600 self.bad_rev.values): 614 if self.compare_revisions(self.good_rev, self.bad_rev):
601 return True 615 return True
602 if len(self.good_rev.values) == len(self.bad_rev.values): 616 if self.good_rev.test_run_count == self.bad_rev.test_run_count:
603 revision_to_retest = self.last_tested_revision 617 revision_to_retest = self.last_tested_revision
604 else: 618 else:
605 revision_to_retest = min(self.good_rev, self.bad_rev, 619 revision_to_retest = min(self.good_rev, self.bad_rev,
606 key=lambda x: len(x.values)) 620 key=lambda x: x.test_run_count)
607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES:
608 revision_to_retest.retest()
609 else:
610 break
611 self._set_insufficient_confidence_warning() 621 self._set_insufficient_confidence_warning()
612 return False 622 return False
613 623
614 624
615 def get_exception(self): 625 def get_exception(self):
616 raise NotImplementedError() # pragma: no cover 626 raise NotImplementedError() # pragma: no cover
617 # TODO: should return an exception with the details of the failure. 627 # TODO: should return an exception with the details of the failure.
618 628
619 def _set_insufficient_confidence_warning( 629 def _set_insufficient_confidence_warning(
620 self): # pragma: no cover 630 self): # pragma: no cover
621 """Adds a warning about the lack of initial regression confidence.""" 631 """Adds a warning about the lack of initial regression confidence."""
622 self.failed_initial_confidence = True 632 self.failed_initial_confidence = True
623 self.surface_result('LO_INIT_CONF') 633 self.surface_result('LO_INIT_CONF')
624 self.warnings.append( 634 self.warnings.append(
625 'Bisect failed to reproduce the regression with enough confidence.') 635 'Bisect failed to reproduce the regression with enough confidence.')
626 636
627 def _results_debug_message(self): 637 def _results_debug_message(self):
628 """Returns a string with values used to debug a bisect result.""" 638 """Returns a string with values used to debug a bisect result."""
629 result = 'bisector.lkgr: %r\n' % self.lkgr 639 result = 'bisector.lkgr: %r\n' % self.lkgr
630 result += 'bisector.fkbr: %r\n\n' % self.fkbr 640 result += 'bisector.fkbr: %r\n\n' % self.fkbr
631 result += self._revision_value_table() 641 result += self._revision_value_table()
632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): 642 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and
633 result += '\n' + self._t_test_results() 643 self.fkbr.test_run_count):
644 result += '\n' + '\n'.join([
645 'LKGR values: %r' % list(self.lkgr.debug_values),
646 'FKBR values: %r' % list(self.fkbr.debug_values),
647 ])
634 return result 648 return result
635 649
636 def _revision_value_table(self): 650 def _revision_value_table(self):
637 """Returns a string table showing revisions and their values.""" 651 """Returns a string table showing revisions and their values."""
638 header = [['Revision', 'Values']] 652 header = [['Revision', 'Values']]
639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] 653 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]
640 return self._pretty_table(header + rows) 654 return self._pretty_table(header + rows)
641 655
642 def _pretty_table(self, data): 656 def _pretty_table(self, data):
643 results = [] 657 results = []
644 for row in data: 658 for row in data:
645 results.append('%-15s' * len(row) % tuple(row)) 659 results.append('%-15s' * len(row) % tuple(row))
646 return '\n'.join(results) 660 return '\n'.join(results)
647 661
648 def _t_test_results(self):
649 """Returns a string showing t-test results for lkgr and fkbr."""
650 t, df, p = self.api.m.math_utils.welchs_t_test(
651 self.lkgr.values, self.fkbr.values)
652 lines = [
653 'LKGR values: %r' % self.lkgr.values,
654 'FKBR values: %r' % self.fkbr.values,
655 't-statistic: %r' % t,
656 'deg. of freedom: %r' % df,
657 'p-value: %r' % p,
658 'Confidence score: %r' % (100 * (1 - p))
659 ]
660 return '\n'.join(lines)
661
662 def print_result_debug_info(self): 662 def print_result_debug_info(self):
663 """Prints extra debug info at the end of the bisect process.""" 663 """Prints extra debug info at the end of the bisect process."""
664 lines = self._results_debug_message().splitlines() 664 lines = self._results_debug_message().splitlines()
665 # If we emit a null step then add a log to it, the log should be kept 665 # If we emit a null step then add a log to it, the log should be kept
666 # longer than 7 days (which is often needed to debug some issues). 666 # longer than 7 days (which is often needed to debug some issues).
667 self.api.m.step('Debug Info', []) 667 self.api.m.step('Debug Info', [])
668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines 668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines
669 669
670 def post_result(self, halt_on_failure=False): 670 def post_result(self, halt_on_failure=False):
671 """Posts bisect results to Perf Dashboard.""" 671 """Posts bisect results to Perf Dashboard."""
672 self.api.m.perf_dashboard.set_default_config() 672 self.api.m.perf_dashboard.set_default_config()
673 self.api.m.perf_dashboard.post_bisect_results( 673 self.api.m.perf_dashboard.post_bisect_results(
674 self.get_result(), halt_on_failure) 674 self.get_result(), halt_on_failure)
675 675
676 def get_revision_to_eval(self): 676 def get_revision_to_eval(self):
677 """Gets the next RevisionState object in the candidate range. 677 """Gets the next RevisionState object in the candidate range.
678 678
679 Returns: 679 Returns:
680 The next Revision object in a list. 680 The next Revision object in a list.
681 """ 681 """
682 self._update_candidate_range() 682 self._update_candidate_range()
683 candidate_range = [revision for revision in 683 candidate_range = [revision for revision in
684 self.revisions[self.lkgr.list_index + 1: 684 self.revisions[self.lkgr.list_index + 1:
685 self.fkbr.list_index] 685 self.fkbr.list_index]
686 if not revision.tested and not revision.failed] 686 if not revision.failed]
687 if len(candidate_range) == 1: 687 if len(candidate_range) == 1:
688 return candidate_range[0] 688 return candidate_range[0]
689 if len(candidate_range) == 0: 689 if len(candidate_range) == 0:
690 return None 690 return None
691 691
692 default_revision = candidate_range[len(candidate_range) / 2] 692 default_revision = candidate_range[len(candidate_range) / 2]
693 693
694 with self.api.m.step.nest( 694 with self.api.m.step.nest(
695 'Wiggling revision ' + default_revision.revision_string()): 695 'Wiggling revision ' + default_revision.revision_string()):
696 # We'll search up to 25% of the range (in either direction) to try and 696 # We'll search up to 25% of the range (in either direction) to try and
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 return True 733 return True
734 if (revision.good and revision.next_revision and 734 if (revision.good and revision.next_revision and
735 revision.next_revision.bad): 735 revision.next_revision.bad):
736 if (revision.next_revision.deps_change() 736 if (revision.next_revision.deps_change()
737 and self._expand_deps_revisions(revision.next_revision)): 737 and self._expand_deps_revisions(revision.next_revision)):
738 return False 738 return False
739 self.culprit = revision.next_revision 739 self.culprit = revision.next_revision
740 return True 740 return True
741 return False 741 return False
742 742
743 def wait_for_all(self, revision_list):
744 """Waits for all revisions in list to finish."""
745 for r in revision_list:
746 self.wait_for(r)
747
748 def wait_for(self, revision, nest_check=True):
749 """Waits for the revision to finish its job."""
750 if nest_check and not self.flags.get(
751 'do_not_nest_wait_for_revision'): # pragma: no cover
752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):
753 return self.wait_for(revision, nest_check=False)
754 while True:
755 revision.update_status()
756 if revision.in_progress:
757 self.api.m.python.inline(
758 'sleeping',
759 """
760 import sys
761 import time
762 time.sleep(20*60)
763 sys.exit(0)
764 """)
765 else:
766 break
767
768 def _update_candidate_range(self): 743 def _update_candidate_range(self):
769 """Updates lkgr and fkbr (last known good/first known bad) revisions. 744 """Updates lkgr and fkbr (last known good/first known bad) revisions.
770 745
771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in 746 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in
772 bisectors.revisions.""" 747 bisectors.revisions."""
773 for r in self.revisions: 748 for r in self.revisions:
774 if r.tested: 749 if r.test_run_count:
775 if r.good: 750 if r.good:
776 self.lkgr = r 751 self.lkgr = r
777 elif r.bad: 752 elif r.bad:
778 self.fkbr = r 753 self.fkbr = r
779 break 754 break
780 assert self.lkgr and self.fkbr 755 assert self.lkgr and self.fkbr
781 756
782 def get_perf_tester_name(self): 757 def get_perf_tester_name(self):
783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. 758 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.
784 759
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 assert len(result_code) <= 20 844 assert len(result_code) <= 20
870 if result_code not in self.result_codes: 845 if result_code not in self.result_codes:
871 self.result_codes.add(result_code) 846 self.result_codes.add(result_code)
872 properties = self.api.m.step.active_result.presentation.properties 847 properties = self.api.m.step.active_result.presentation.properties
873 properties['extra_result_code'] = sorted(self.result_codes) 848 properties['extra_result_code'] = sorted(self.result_codes)
874 849
875 def get_result(self): 850 def get_result(self):
876 """Returns the results as a jsonable object.""" 851 """Returns the results as a jsonable object."""
877 config = self.bisect_config 852 config = self.bisect_config
878 results_confidence = 0 853 results_confidence = 0
879 if self.culprit:
880 results_confidence = self.api.m.math_utils.confidence_score(
881 self.lkgr.values, self.fkbr.values)
882 854
883 if self.failed: 855 if self.failed:
884 status = 'failed' 856 status = 'failed'
885 elif self.bisect_over: 857 elif self.bisect_over:
886 status = 'completed' 858 status = 'completed'
887 else: 859 else:
888 status = 'started' 860 status = 'started'
889 861
890 aborted_reason = None 862 aborted_reason = None
891 if self.failed_initial_confidence: 863 if self.failed_initial_confidence:
892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON 864 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON
893 elif self.failed_direction: 865 elif self.failed_direction:
894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON 866 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON
895 return { 867 return {
896 'try_job_id': config.get('try_job_id'), 868 'try_job_id': config.get('try_job_id'),
897 'bug_id': config.get('bug_id'), 869 'bug_id': config.get('bug_id'),
898 'status': status, 870 'status': status,
899 'buildbot_log_url': self._get_build_url(), 871 'buildbot_log_url': self._get_build_url(),
900 'bisect_bot': self.get_perf_tester_name(), 872 'bisect_bot': self.get_perf_tester_name(),
901 'command': config['command'], 873 'command': config['command'],
902 'test_type': config['test_type'], 874 'test_type': config['test_type'],
903 'metric': config['metric'], 875 'metric': config['metric'],
904 'change': self.relative_change, 876 'change': self.relative_change,
905 'score': results_confidence,
906 'good_revision': self.good_rev.commit_hash, 877 'good_revision': self.good_rev.commit_hash,
907 'bad_revision': self.bad_rev.commit_hash, 878 'bad_revision': self.bad_rev.commit_hash,
908 'warnings': self.warnings, 879 'warnings': self.warnings,
909 'aborted_reason': aborted_reason, 880 'aborted_reason': aborted_reason,
910 'culprit_data': self._culprit_data(), 881 'culprit_data': self._culprit_data(),
911 'revision_data': self._revision_data() 882 'revision_data': self._revision_data()
912 } 883 }
913 884
914 def _culprit_data(self): 885 def _culprit_data(self):
915 culprit = self.culprit 886 culprit = self.culprit
(...skipping 11 matching lines...) Expand all
927 'email': culprit_info['email'], 898 'email': culprit_info['email'],
928 'cl_date': culprit_info['date'], 899 'cl_date': culprit_info['date'],
929 'commit_info': culprit_info['body'], 900 'commit_info': culprit_info['body'],
930 'revisions_links': [], 901 'revisions_links': [],
931 'cl': culprit.commit_hash 902 'cl': culprit.commit_hash
932 } 903 }
933 904
934 def _revision_data(self): 905 def _revision_data(self):
935 revision_rows = [] 906 revision_rows = []
936 for r in self.revisions: 907 for r in self.revisions:
937 if r.tested or r.aborted: 908 if r.test_run_count:
938 revision_rows.append({ 909 revision_rows.append({
939 'depot_name': r.depot_name, 910 'depot_name': r.depot_name,
940 'commit_hash': r.commit_hash, 911 'commit_hash': r.commit_hash,
941 'revision_string': r.revision_string(), 912 'revision_string': r.revision_string(),
942 'mean_value': r.mean_value, 913 'mean_value': r.mean,
943 'std_dev': r.std_dev, 914 'std_dev': r.std_dev,
944 'values': r.values, 915 'values': r.debug_values,
945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', 916 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',
946 }) 917 })
947 return revision_rows 918 return revision_rows
948 919
949 def _get_build_url(self): 920 def _get_build_url(self):
950 properties = self.api.m.properties 921 properties = self.api.m.properties
951 bot_url = properties.get('buildbotURL', 922 bot_url = properties.get('buildbotURL',
952 'http://build.chromium.org/p/chromium/') 923 'http://build.chromium.org/p/chromium/')
953 builder_name = urllib.quote(properties.get('buildername', '')) 924 builder_name = urllib.quote(properties.get('buildername', ''))
954 builder_number = str(properties.get('buildnumber', '')) 925 builder_number = str(properties.get('buildnumber', ''))
955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) 926 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698