Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(495)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Removing debug prints. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import re 6 import re
7 import time 7 import time
8 import urllib 8 import urllib
9 9
10 from . import config_validation 10 from . import config_validation
(...skipping 29 matching lines...) Expand all
40 ) 40 )
41 41
42 # When we look for the next revision to build, we search nearby revisions 42 # When we look for the next revision to build, we search nearby revisions
43 # looking for a revision that's already been archived. Since we don't want 43 # looking for a revision that's already been archived. Since we don't want
44 # to move *too* far from the original revision, we'll cap the search at 25%. 44 # to move *too* far from the original revision, we'll cap the search at 25%.
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25
46 46
47 # How long to re-test the initial good-bad range for until significant 47 # How long to re-test the initial good-bad range for until significant
48 # difference is established. 48 # difference is established.
49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60 49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60
50 # If we reach this number of samples on the reference range and have not
51 # achieved statistical significance, bail.
52 MAX_REQUIRED_SAMPLES = 15
53
54 # Significance level to use for determining difference between revisions via
55 # hypothesis testing.
56 SIGNIFICANCE_LEVEL = 0.01
57 50
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (
59 'The metric values for the initial "good" and "bad" revisions ' 52 'The metric values for the initial "good" and "bad" revisions '
60 'do not represent a clear regression.') 53 'do not represent a clear regression.')
61 54
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (
63 'The metric values for the initial "good" and "bad" revisions match the ' 56 'The metric values for the initial "good" and "bad" revisions match the '
64 'expected direction of improvement. Thus, likely represent an improvement ' 57 'expected direction of improvement. Thus, likely represent an improvement '
65 'and not a regression.') 58 'and not a regression.')
66 59
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 141
149 Returns: 142 Returns:
150 A 40-digit git commit hash string. 143 A 40-digit git commit hash string.
151 """ 144 """
152 if self._is_sha1(rev): # pragma: no cover 145 if self._is_sha1(rev): # pragma: no cover
153 return rev 146 return rev
154 if rev.isdigit(): 147 if rev.isdigit():
155 commit_position = self._api.m.commit_position.construct( 148 commit_position = self._api.m.commit_position.construct(
156 branch='refs/heads/master', value=rev) 149 branch='refs/heads/master', value=rev)
157 try: 150 try:
158 return self._api.m.crrev.to_commit_hash(commit_position) 151 return self._api.m.crrev.to_commit_hash(
152 commit_position,
153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])
159 except self.api.m.step.StepFailure: # pragma: no cover 154 except self.api.m.step.StepFailure: # pragma: no cover
160 self.surface_result('BAD_REV') 155 self.surface_result('BAD_REV')
161 raise 156 raise
162 self.surface_result('BAD_REV') # pragma: no cover 157 self.surface_result('BAD_REV') # pragma: no cover
163 raise self.api.m.step.StepFailure( 158 raise self.api.m.step.StepFailure(
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover 159 'Invalid input revision: %r' % (rev,)) # pragma: no cover
165 160
166 @staticmethod 161 @staticmethod
167 def _is_sha1(s): 162 def _is_sha1(s):
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) 163 return bool(re.match('^[0-9A-Fa-f]{40}$', s))
169 164
170 def significantly_different( 165 def compare_revisions(self, revision_a, revision_b):
171 self, list_a, list_b, 166 """
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover 167 Returns:
173 """Uses an external script to run hypothesis testing with scipy. 168 True if the samples are significantly different.
169 None if there is not enough data to tell.
170 False if there's enough data but still can't tell the samples apart.
171 """
172 output_format = 'chartjson'
173 values_a = revision_a.chartjson_paths
174 values_b = revision_b.chartjson_paths
175 if revision_a.valueset_paths and revision_b.valueset_paths:
176 output_format = 'valueset'
174 177
175 The reason why we need an external script is that scipy is not available to 178 result = self.api.stat_compare(
176 the default python installed in all platforms. We instead rely on an 179 values_a,
177 anaconda environment to provide those packages. 180 values_b,
181 self.bisect_config['metric'],
182 output_format=output_format,
183 step_test_data=lambda: self.api.test_api.compare_samples_data(
184 self.api._test_data.get('revision_data'), revision_a, revision_b))
178 185
179 Args: 186 revision_a.debug_values = result['sample_a']['debug_values']
180 list_a, list_b: Two lists representing samples to be compared. 187 revision_b.debug_values = result['sample_b']['debug_values']
181 significance_level: Self-describing. As a decimal fraction. 188 revision_a.mean = result['sample_a']['mean']
RobertoCN 2016/08/23 00:26:24 Make mean and std_dev properties of revision_state
RobertoCN 2016/09/07 00:33:24 Done.
189 revision_b.mean = result['sample_b']['mean']
190 revision_a.std_dev = result['sample_a']['std_dev']
191 revision_b.std_dev = result['sample_b']['std_dev']
182 192
183 Returns: 193 if result['result'] == 'needMoreData':
RobertoCN 2016/08/23 00:26:24 Make 3 constants for true, false and needMore.
RobertoCN 2016/09/07 00:33:24 Done.
184 A boolean indicating whether the null hypothesis ~(that the lists are 194 return None
185 samples from the same population) can be rejected at the specified 195 return bool(result['result'])
186 significance level.
187 """
188 step_result = self.api.m.python(
189 'Checking sample difference',
190 self.api.resource('significantly_different.py'),
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],
192 stdout=self.api.m.json.output())
193 results = step_result.stdout
194 if results is None:
195 assert self.dummy_builds
196 return True
197 significantly_different = results['significantly_different']
198 step_result.presentation.logs[str(significantly_different)] = [
199 'See json.output for details']
200 return significantly_different
201 196
202 def config_step(self): 197 def config_step(self):
203 """Yields a step that prints the bisect config.""" 198 """Yields a step that prints the bisect config."""
204 api = self.api 199 api = self.api
205 200
206 # bisect_config may come as a FrozenDict (which is not serializable). 201 # bisect_config may come as a FrozenDict (which is not serializable).
207 bisect_config = dict(self.bisect_config) 202 bisect_config = dict(self.bisect_config)
208 203
209 def fix_windows_backslashes(s): 204 def fix_windows_backslashes(s):
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') 205 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)')
(...skipping 17 matching lines...) Expand all
228 except config_validation.ValidationFail as error: 223 except config_validation.ValidationFail as error:
229 self.surface_result('BAD_CONFIG') 224 self.surface_result('BAD_CONFIG')
230 self.api.m.halt(error.message) 225 self.api.m.halt(error.message)
231 raise self.api.m.step.StepFailure(error.message) 226 raise self.api.m.step.StepFailure(error.message)
232 227
233 @property 228 @property
234 def api(self): 229 def api(self):
235 return self._api 230 return self._api
236 231
237 def compute_relative_change(self): 232 def compute_relative_change(self):
238 old_value = float(self.good_rev.mean_value) 233 old_value = float(self.good_rev.mean)
239 new_value = float(self.bad_rev.mean_value) 234 new_value = float(self.bad_rev.mean)
240 235
241 if new_value and not old_value: # pragma: no cover 236 if new_value and not old_value: # pragma: no cover
242 self.relative_change = ZERO_TO_NON_ZERO 237 self.relative_change = ZERO_TO_NON_ZERO
243 return 238 return
244 239
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) 240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)
246 self.relative_change = '%.2f%%' % (100 * rel_change) 241 self.relative_change = '%.2f%%' % (100 * rel_change)
247 242
248 def make_deps_sha_file(self, deps_sha): 243 def make_deps_sha_file(self, deps_sha):
249 """Make a diff patch that creates DEPS.sha. 244 """Make a diff patch that creates DEPS.sha.
(...skipping 16 matching lines...) Expand all
266 file is to be written to. 261 file is to be written to.
267 commit_hash (str): An identifier for the step. 262 commit_hash (str): An identifier for the step.
268 263
269 Returns: 264 Returns:
270 A string containing the hash of the interned object. 265 A string containing the hash of the interned object.
271 """ 266 """
272 cmd = 'hash-object -t blob -w --stdin'.split(' ') 267 cmd = 'hash-object -t blob -w --stdin'.split(' ')
273 stdin = self.api.m.raw_io.input(file_contents) 268 stdin = self.api.m.raw_io.input(file_contents)
274 stdout = self.api.m.raw_io.output() 269 stdout = self.api.m.raw_io.output()
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash 270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout, 271 step_result = self.api.m.git(
277 name=step_name) 272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,
273 step_test_data=lambda:
274 self.api.m.raw_io.test_api.stream_output(commit_hash))
278 hash_string = step_result.stdout.splitlines()[0] 275 hash_string = step_result.stdout.splitlines()[0]
279 try: 276 try:
280 if hash_string: 277 if hash_string:
281 int(hash_string, 16) 278 int(hash_string, 16)
282 return hash_string 279 return hash_string
283 except ValueError: # pragma: no cover 280 except ValueError: # pragma: no cover
284 reason = 'Git did not output a valid hash for the interned file.' 281 reason = 'Git did not output a valid hash for the interned file.'
285 self.api.m.halt(reason) 282 self.api.m.halt(reason)
286 raise self.api.m.step.StepFailure(reason) 283 raise self.api.m.step.StepFailure(reason)
287 284
(...skipping 14 matching lines...) Expand all
302 Returns: 299 Returns:
303 A string containing the diff patch as produced by the 'git diff' command. 300 A string containing the diff patch as produced by the 'git diff' command.
304 """ 301 """
305 # The prefixes used in the command below are used to find and replace the 302 # The prefixes used in the command below are used to find and replace the
306 # tree-ish git object id's on the diff output more easily. 303 # tree-ish git object id's on the diff output more easily.
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:' 304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'
308 cmd %= (git_object_a, git_object_b) 305 cmd %= (git_object_a, git_object_b)
309 cmd = cmd.split(' ') 306 cmd = cmd.split(' ')
310 stdout = self.api.m.raw_io.output() 307 stdout = self.api.m.raw_io.output()
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev) 308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name) 309 step_result = self.api.m.git(
310 *cmd, cwd=cwd, stdout=stdout, name=step_name,
311 step_test_data=lambda: self.api._test_data['diff_patch'])
313 patch_text = step_result.stdout 312 patch_text = step_result.stdout
314 src_string = 'IAMSRC:' + git_object_a 313 src_string = 'IAMSRC:' + git_object_a
315 dst_string = 'IAMDST:' + git_object_b 314 dst_string = 'IAMDST:' + git_object_b
316 patch_text = patch_text.replace(src_string, src_alias) 315 patch_text = patch_text.replace(src_string, src_alias)
317 patch_text = patch_text.replace(dst_string, dst_alias) 316 patch_text = patch_text.replace(dst_string, dst_alias)
318 return patch_text 317 return patch_text
319 318
320 def make_deps_patch(self, base_revision, base_file_contents, 319 def make_deps_patch(self, base_revision, base_file_contents,
321 depot, new_commit_hash): 320 depot, new_commit_hash):
322 """Make a diff patch that updates a specific dependency revision. 321 """Make a diff patch that updates a specific dependency revision.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 interned_deps_hash, deps_file, deps_file, 364 interned_deps_hash, deps_file, deps_file,
366 cwd=cwd, 365 cwd=cwd,
367 deps_rev=new_commit_hash) 366 deps_rev=new_commit_hash)
368 return patch_text, patched_contents 367 return patch_text, patched_contents
369 368
370 def _expand_initial_revision_range(self): 369 def _expand_initial_revision_range(self):
371 """Sets the initial contents of |self.revisions|.""" 370 """Sets the initial contents of |self.revisions|."""
372 with self.api.m.step.nest('Expanding revision range'): 371 with self.api.m.step.nest('Expanding revision range'):
373 good_hash = self.good_rev.commit_hash 372 good_hash = self.good_rev.commit_hash
374 bad_hash = self.bad_rev.commit_hash 373 bad_hash = self.bad_rev.commit_hash
374 depot = self.good_rev.depot_name
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) 375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)
376 revisions = self._revision_range( 376 revisions = self._revision_range(
377 start=good_hash, 377 start=good_hash,
378 end=bad_hash, 378 end=bad_hash,
379 depot_name=self.base_depot, 379 depot_name=self.base_depot,
380 step_name=step_name) 380 step_name=step_name,
381 step_test_data=lambda: self.api._test_data['revision_list'][depot]
382 )
381 self.revisions = [self.good_rev] + revisions + [self.bad_rev] 383 self.revisions = [self.good_rev] + revisions + [self.bad_rev]
382 self._update_revision_list_indexes() 384 self._update_revision_list_indexes()
383 385
384 def _revision_range(self, start, end, depot_name, base_revision=None, 386 def _revision_range(self, start, end, depot_name, base_revision=None,
385 step_name=None): 387 step_name=None, **kwargs):
386 """Returns a list of RevisionState objects between |start| and |end|. 388 """Returns a list of RevisionState objects between |start| and |end|.
387 389
388 Args: 390 Args:
389 start (str): Start commit hash. 391 start (str): Start commit hash.
390 end (str): End commit hash. 392 end (str): End commit hash.
391 depot_name (str): Short string name of repo, e.g. chromium or v8. 393 depot_name (str): Short string name of repo, e.g. chromium or v8.
392 base_revision (str): Base revision in the downstream repo (e.g. chromium). 394 base_revision (str): Base revision in the downstream repo (e.g. chromium).
393 step_name (str): Optional step name. 395 step_name (str): Optional step name.
394 396
395 Returns: 397 Returns:
396 A list of RevisionState objects, not including the given start or end. 398 A list of RevisionState objects, not including the given start or end.
397 """ 399 """
398 if self.internal_bisect: # pragma: no cover 400 if self.internal_bisect: # pragma: no cover
399 return self._revision_range_with_gitiles( 401 return self._revision_range_with_gitiles(
400 start, end, depot_name, base_revision, step_name) 402 start, end, depot_name, base_revision, step_name)
401 try: 403 try:
402 step_result = self.api.m.python( 404 step_result = self.api.m.python(
403 step_name, 405 step_name,
404 self.api.resource('fetch_intervening_revisions.py'), 406 self.api.resource('fetch_intervening_revisions.py'),
405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], 407 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],
406 stdout=self.api.m.json.output()) 408 stdout=self.api.m.json.output(), **kwargs)
407 except self.api.m.step.StepFailure: # pragma: no cover 409 except self.api.m.step.StepFailure: # pragma: no cover
408 self.surface_result('BAD_REV') 410 self.surface_result('BAD_REV')
409 raise 411 raise
410 revisions = [] 412 revisions = []
411 for commit_hash, _ in step_result.stdout: 413 for commit_hash, _ in step_result.stdout:
412 revisions.append(self.revision_class( 414 revisions.append(self.revision_class(
413 bisector=self, 415 bisector=self,
414 commit_hash=commit_hash, 416 commit_hash=commit_hash,
415 depot_name=depot_name, 417 depot_name=depot_name,
416 base_revision=base_revision)) 418 base_revision=base_revision))
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 dep_revision_max = max_revision.deps[depot_name] 504 dep_revision_max = max_revision.deps[depot_name]
503 if (dep_revision_min and dep_revision_max and 505 if (dep_revision_min and dep_revision_max and
504 dep_revision_min != dep_revision_max): 506 dep_revision_min != dep_revision_max):
505 step_name = ('Expanding revision range for revision %s' 507 step_name = ('Expanding revision range for revision %s'
506 ' on depot %s' % (dep_revision_max, depot_name)) 508 ' on depot %s' % (dep_revision_max, depot_name))
507 rev_list = self._revision_range( 509 rev_list = self._revision_range(
508 start=dep_revision_min, 510 start=dep_revision_min,
509 end=dep_revision_max, 511 end=dep_revision_max,
510 depot_name=depot_name, 512 depot_name=depot_name,
511 base_revision=min_revision, 513 base_revision=min_revision,
512 step_name=step_name) 514 step_name=step_name,
515 step_test_data=lambda:
516 self.api._test_data['revision_list'][depot_name])
513 new_revisions = self.revisions[:max_revision.list_index] 517 new_revisions = self.revisions[:max_revision.list_index]
514 new_revisions += rev_list 518 new_revisions += rev_list
515 new_revisions += self.revisions[max_revision.list_index:] 519 new_revisions += self.revisions[max_revision.list_index:]
516 self.revisions = new_revisions 520 self.revisions = new_revisions
517 self._update_revision_list_indexes() 521 self._update_revision_list_indexes()
518 return True 522 return True
519 except RuntimeError: # pragma: no cover 523 except RuntimeError: # pragma: no cover
520 warning_text = ('Could not expand dependency revisions for ' + 524 warning_text = ('Could not expand dependency revisions for ' +
521 revision_to_expand.commit_hash) 525 revision_to_expand.commit_hash)
522 self.surface_result('BAD_REV') 526 self.surface_result('BAD_REV')
(...skipping 16 matching lines...) Expand all
539 543
540 The change between the test results obtained for the given 'good' and 544 The change between the test results obtained for the given 'good' and
541 'bad' revisions is expected to be considered a regression. The 545 'bad' revisions is expected to be considered a regression. The
542 `improvement_direction` attribute is positive if a larger number is 546 `improvement_direction` attribute is positive if a larger number is
543 considered better, and negative if a smaller number is considered better. 547 considered better, and negative if a smaller number is considered better.
544 548
545 Returns: 549 Returns:
546 True if the check passes (i.e. no problem), False if the change is not 550 True if the check passes (i.e. no problem), False if the change is not
547 a regression according to the improvement direction. 551 a regression according to the improvement direction.
548 """ 552 """
549 good = self.good_rev.mean_value 553 good = self.good_rev.mean
550 bad = self.bad_rev.mean_value 554 bad = self.bad_rev.mean
551 555
552 if self.is_return_code_mode(): 556 if self.is_return_code_mode():
553 return True 557 return True
554 558
555 direction = self.improvement_direction 559 direction = self.improvement_direction
556 if direction is None: 560 if direction is None:
557 return True 561 return True
558 if (bad > good and direction > 0) or (bad < good and direction < 0): 562 if (bad > good and direction > 0) or (bad < good and direction < 0):
559 self._set_failed_direction_results() 563 self._set_failed_direction_results()
560 return False 564 return False
(...skipping 18 matching lines...) Expand all
579 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. 583 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.
580 584
581 Returns: True if the revisions produced results that differ from each 585 Returns: True if the revisions produced results that differ from each
582 other in a statistically significant manner. False if such difference could 586 other in a statistically significant manner. False if such difference could
583 not be established in the time or sample size allowed. 587 not be established in the time or sample size allowed.
584 """ 588 """
585 if self.test_type == 'return_code': 589 if self.test_type == 'return_code':
586 return (self.good_rev.overall_return_code != 590 return (self.good_rev.overall_return_code !=
587 self.bad_rev.overall_return_code) 591 self.bad_rev.overall_return_code)
588 592
589 if self.bypass_stats_check: 593 if self.bypass_stats_check:
RobertoCN 2016/08/23 00:26:24 Remove this flag
RobertoCN 2016/09/07 00:33:24 Acknowledged.
590 dummy_result = self.good_rev.values != self.bad_rev.values 594 self.compare_revisions(self.good_rev, self.bad_rev)
595 dummy_result = self.good_rev.mean != self.bad_rev.mean
591 if not dummy_result: 596 if not dummy_result:
592 self._set_insufficient_confidence_warning() 597 self._set_insufficient_confidence_warning()
593 return dummy_result 598 return dummy_result
594 599
600 # TODO(robertocn): This step should not be necessary in some cases.
595 with self.api.m.step.nest('Re-testing reference range'): 601 with self.api.m.step.nest('Re-testing reference range'):
596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT 602 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT
597 while time.time() < expiration_time: 603 while time.time() < expiration_time:
598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: 604 if (self.good_rev.test_run_count >= 5
599 if self.significantly_different(self.good_rev.values, 605 and self.bad_rev.test_run_count >= 5):
600 self.bad_rev.values): 606 if self.compare_revisions(self.good_rev, self.bad_rev):
601 return True 607 return True
602 if len(self.good_rev.values) == len(self.bad_rev.values): 608 if self.good_rev.test_run_count == self.bad_rev.test_run_count:
603 revision_to_retest = self.last_tested_revision 609 revision_to_retest = self.last_tested_revision
604 else: 610 else:
605 revision_to_retest = min(self.good_rev, self.bad_rev, 611 revision_to_retest = min(self.good_rev, self.bad_rev,
606 key=lambda x: len(x.values)) 612 key=lambda x: x.test_run_count)
607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES:
608 revision_to_retest.retest()
609 else:
610 break
611 self._set_insufficient_confidence_warning() 613 self._set_insufficient_confidence_warning()
612 return False 614 return False
613 615
614 616
615 def get_exception(self): 617 def get_exception(self):
616 raise NotImplementedError() # pragma: no cover 618 raise NotImplementedError() # pragma: no cover
617 # TODO: should return an exception with the details of the failure. 619 # TODO: should return an exception with the details of the failure.
618 620
619 def _set_insufficient_confidence_warning( 621 def _set_insufficient_confidence_warning(
620 self): # pragma: no cover 622 self): # pragma: no cover
621 """Adds a warning about the lack of initial regression confidence.""" 623 """Adds a warning about the lack of initial regression confidence."""
622 self.failed_initial_confidence = True 624 self.failed_initial_confidence = True
623 self.surface_result('LO_INIT_CONF') 625 self.surface_result('LO_INIT_CONF')
624 self.warnings.append( 626 self.warnings.append(
625 'Bisect failed to reproduce the regression with enough confidence.') 627 'Bisect failed to reproduce the regression with enough confidence.')
626 628
627 def _results_debug_message(self): 629 def _results_debug_message(self):
628 """Returns a string with values used to debug a bisect result.""" 630 """Returns a string with values used to debug a bisect result."""
629 result = 'bisector.lkgr: %r\n' % self.lkgr 631 result = 'bisector.lkgr: %r\n' % self.lkgr
630 result += 'bisector.fkbr: %r\n\n' % self.fkbr 632 result += 'bisector.fkbr: %r\n\n' % self.fkbr
631 result += self._revision_value_table() 633 result += self._revision_value_table()
632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): 634 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and
633 result += '\n' + self._t_test_results() 635 self.fkbr.test_run_count):
636 result += '\n' + '\n'.join([
637 'LKGR values: %r' % list(self.lkgr.debug_values),
638 'FKBR values: %r' % list(self.fkbr.debug_values),
639 ])
634 return result 640 return result
635 641
636 def _revision_value_table(self): 642 def _revision_value_table(self):
637 """Returns a string table showing revisions and their values.""" 643 """Returns a string table showing revisions and their values."""
638 header = [['Revision', 'Values']] 644 header = [['Revision', 'Values']]
639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] 645 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]
640 return self._pretty_table(header + rows) 646 return self._pretty_table(header + rows)
641 647
642 def _pretty_table(self, data): 648 def _pretty_table(self, data):
643 results = [] 649 results = []
644 for row in data: 650 for row in data:
645 results.append('%-15s' * len(row) % tuple(row)) 651 results.append('%-15s' * len(row) % tuple(row))
646 return '\n'.join(results) 652 return '\n'.join(results)
647 653
648 def _t_test_results(self):
649 """Returns a string showing t-test results for lkgr and fkbr."""
650 t, df, p = self.api.m.math_utils.welchs_t_test(
651 self.lkgr.values, self.fkbr.values)
652 lines = [
653 'LKGR values: %r' % self.lkgr.values,
654 'FKBR values: %r' % self.fkbr.values,
655 't-statistic: %r' % t,
656 'deg. of freedom: %r' % df,
657 'p-value: %r' % p,
658 'Confidence score: %r' % (100 * (1 - p))
659 ]
660 return '\n'.join(lines)
661
662 def print_result_debug_info(self): 654 def print_result_debug_info(self):
663 """Prints extra debug info at the end of the bisect process.""" 655 """Prints extra debug info at the end of the bisect process."""
664 lines = self._results_debug_message().splitlines() 656 lines = self._results_debug_message().splitlines()
665 # If we emit a null step then add a log to it, the log should be kept 657 # If we emit a null step then add a log to it, the log should be kept
666 # longer than 7 days (which is often needed to debug some issues). 658 # longer than 7 days (which is often needed to debug some issues).
667 self.api.m.step('Debug Info', []) 659 self.api.m.step('Debug Info', [])
668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines 660 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines
669 661
670 def post_result(self, halt_on_failure=False): 662 def post_result(self, halt_on_failure=False):
671 """Posts bisect results to Perf Dashboard.""" 663 """Posts bisect results to Perf Dashboard."""
672 self.api.m.perf_dashboard.set_default_config() 664 self.api.m.perf_dashboard.set_default_config()
673 self.api.m.perf_dashboard.post_bisect_results( 665 self.api.m.perf_dashboard.post_bisect_results(
674 self.get_result(), halt_on_failure) 666 self.get_result(), halt_on_failure)
675 667
676 def get_revision_to_eval(self): 668 def get_revision_to_eval(self):
677 """Gets the next RevisionState object in the candidate range. 669 """Gets the next RevisionState object in the candidate range.
678 670
679 Returns: 671 Returns:
680 The next Revision object in a list. 672 The next Revision object in a list.
681 """ 673 """
682 self._update_candidate_range() 674 self._update_candidate_range()
683 candidate_range = [revision for revision in 675 candidate_range = [revision for revision in
684 self.revisions[self.lkgr.list_index + 1: 676 self.revisions[self.lkgr.list_index + 1:
685 self.fkbr.list_index] 677 self.fkbr.list_index]
686 if not revision.tested and not revision.failed] 678 if not revision.failed]
687 if len(candidate_range) == 1: 679 if len(candidate_range) == 1:
688 return candidate_range[0] 680 return candidate_range[0]
689 if len(candidate_range) == 0: 681 if len(candidate_range) == 0:
690 return None 682 return None
691 683
692 default_revision = candidate_range[len(candidate_range) / 2] 684 default_revision = candidate_range[len(candidate_range) / 2]
693 685
694 with self.api.m.step.nest( 686 with self.api.m.step.nest(
695 'Wiggling revision ' + default_revision.revision_string()): 687 'Wiggling revision ' + default_revision.revision_string()):
696 # We'll search up to 25% of the range (in either direction) to try and 688 # We'll search up to 25% of the range (in either direction) to try and
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 return True 725 return True
734 if (revision.good and revision.next_revision and 726 if (revision.good and revision.next_revision and
735 revision.next_revision.bad): 727 revision.next_revision.bad):
736 if (revision.next_revision.deps_change() 728 if (revision.next_revision.deps_change()
737 and self._expand_deps_revisions(revision.next_revision)): 729 and self._expand_deps_revisions(revision.next_revision)):
738 return False 730 return False
739 self.culprit = revision.next_revision 731 self.culprit = revision.next_revision
740 return True 732 return True
741 return False 733 return False
742 734
743 def wait_for_all(self, revision_list):
744 """Waits for all revisions in list to finish."""
745 for r in revision_list:
746 self.wait_for(r)
747
748 def wait_for(self, revision, nest_check=True):
749 """Waits for the revision to finish its job."""
750 if nest_check and not self.flags.get(
751 'do_not_nest_wait_for_revision'): # pragma: no cover
752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):
753 return self.wait_for(revision, nest_check=False)
754 while True:
755 revision.update_status()
756 if revision.in_progress:
757 self.api.m.python.inline(
758 'sleeping',
759 """
760 import sys
761 import time
762 time.sleep(20*60)
763 sys.exit(0)
764 """)
765 else:
766 break
767
768 def _update_candidate_range(self): 735 def _update_candidate_range(self):
769 """Updates lkgr and fkbr (last known good/first known bad) revisions. 736 """Updates lkgr and fkbr (last known good/first known bad) revisions.
770 737
771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in 738 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in
772 bisectors.revisions.""" 739 bisectors.revisions."""
773 for r in self.revisions: 740 for r in self.revisions:
774 if r.tested: 741 if r.test_run_count:
775 if r.good: 742 if r.good:
776 self.lkgr = r 743 self.lkgr = r
777 elif r.bad: 744 elif r.bad:
778 self.fkbr = r 745 self.fkbr = r
779 break 746 break
780 assert self.lkgr and self.fkbr 747 assert self.lkgr and self.fkbr
781 748
782 def get_perf_tester_name(self): 749 def get_perf_tester_name(self):
783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. 750 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.
784 751
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 assert len(result_code) <= 20 836 assert len(result_code) <= 20
870 if result_code not in self.result_codes: 837 if result_code not in self.result_codes:
871 self.result_codes.add(result_code) 838 self.result_codes.add(result_code)
872 properties = self.api.m.step.active_result.presentation.properties 839 properties = self.api.m.step.active_result.presentation.properties
873 properties['extra_result_code'] = sorted(self.result_codes) 840 properties['extra_result_code'] = sorted(self.result_codes)
874 841
875 def get_result(self): 842 def get_result(self):
876 """Returns the results as a jsonable object.""" 843 """Returns the results as a jsonable object."""
877 config = self.bisect_config 844 config = self.bisect_config
878 results_confidence = 0 845 results_confidence = 0
879 if self.culprit:
880 results_confidence = self.api.m.math_utils.confidence_score(
881 self.lkgr.values, self.fkbr.values)
882 846
883 if self.failed: 847 if self.failed:
884 status = 'failed' 848 status = 'failed'
885 elif self.bisect_over: 849 elif self.bisect_over:
886 status = 'completed' 850 status = 'completed'
887 else: 851 else:
888 status = 'started' 852 status = 'started'
889 853
890 aborted_reason = None 854 aborted_reason = None
891 if self.failed_initial_confidence: 855 if self.failed_initial_confidence:
892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON 856 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON
893 elif self.failed_direction: 857 elif self.failed_direction:
894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON 858 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON
895 return { 859 return {
896 'try_job_id': config.get('try_job_id'), 860 'try_job_id': config.get('try_job_id'),
897 'bug_id': config.get('bug_id'), 861 'bug_id': config.get('bug_id'),
898 'status': status, 862 'status': status,
899 'buildbot_log_url': self._get_build_url(), 863 'buildbot_log_url': self._get_build_url(),
900 'bisect_bot': self.get_perf_tester_name(), 864 'bisect_bot': self.get_perf_tester_name(),
901 'command': config['command'], 865 'command': config['command'],
902 'test_type': config['test_type'], 866 'test_type': config['test_type'],
903 'metric': config['metric'], 867 'metric': config['metric'],
904 'change': self.relative_change, 868 'change': self.relative_change,
905 'score': results_confidence,
906 'good_revision': self.good_rev.commit_hash, 869 'good_revision': self.good_rev.commit_hash,
907 'bad_revision': self.bad_rev.commit_hash, 870 'bad_revision': self.bad_rev.commit_hash,
908 'warnings': self.warnings, 871 'warnings': self.warnings,
909 'aborted_reason': aborted_reason, 872 'aborted_reason': aborted_reason,
910 'culprit_data': self._culprit_data(), 873 'culprit_data': self._culprit_data(),
911 'revision_data': self._revision_data() 874 'revision_data': self._revision_data()
912 } 875 }
913 876
914 def _culprit_data(self): 877 def _culprit_data(self):
915 culprit = self.culprit 878 culprit = self.culprit
(...skipping 11 matching lines...) Expand all
927 'email': culprit_info['email'], 890 'email': culprit_info['email'],
928 'cl_date': culprit_info['date'], 891 'cl_date': culprit_info['date'],
929 'commit_info': culprit_info['body'], 892 'commit_info': culprit_info['body'],
930 'revisions_links': [], 893 'revisions_links': [],
931 'cl': culprit.commit_hash 894 'cl': culprit.commit_hash
932 } 895 }
933 896
934 def _revision_data(self): 897 def _revision_data(self):
935 revision_rows = [] 898 revision_rows = []
936 for r in self.revisions: 899 for r in self.revisions:
937 if r.tested or r.aborted: 900 if r.test_run_count:
938 revision_rows.append({ 901 revision_rows.append({
939 'depot_name': r.depot_name, 902 'depot_name': r.depot_name,
940 'commit_hash': r.commit_hash, 903 'commit_hash': r.commit_hash,
941 'revision_string': r.revision_string(), 904 'revision_string': r.revision_string(),
942 'mean_value': r.mean_value, 905 'mean_value': r.mean,
943 'std_dev': r.std_dev, 906 'std_dev': r.std_dev,
944 'values': r.values, 907 'values': r.debug_values,
945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', 908 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',
946 }) 909 })
947 return revision_rows 910 return revision_rows
948 911
949 def _get_build_url(self): 912 def _get_build_url(self):
950 properties = self.api.m.properties 913 properties = self.api.m.properties
951 bot_url = properties.get('buildbotURL', 914 bot_url = properties.get('buildbotURL',
952 'http://build.chromium.org/p/chromium/') 915 'http://build.chromium.org/p/chromium/')
953 builder_name = urllib.quote(properties.get('buildername', '')) 916 builder_name = urllib.quote(properties.get('buildername', ''))
954 builder_number = str(properties.get('buildnumber', '')) 917 builder_number = str(properties.get('buildnumber', ''))
955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) 918 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)
OLDNEW
« no previous file with comments | « scripts/slave/recipe_modules/auto_bisect/api.py ('k') | scripts/slave/recipe_modules/auto_bisect/bisector_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698