Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(809)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Addressing all early feedback. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import re 6 import re
7 import time 7 import time
8 import urllib 8 import urllib
9 9
10 from . import config_validation 10 from auto_bisect import config_validation
11 from . import depot_config 11 from auto_bisect import depot_config
12 from . import revision_state 12 from auto_bisect import revision_state
13 13
14 _DEPS_SHA_PATCH = """ 14 _DEPS_SHA_PATCH = """
15 diff --git DEPS.sha DEPS.sha 15 diff --git DEPS.sha DEPS.sha
16 new file mode 100644 16 new file mode 100644
17 --- /dev/null 17 --- /dev/null
18 +++ DEPS.sha 18 +++ DEPS.sha
19 @@ -0,0 +1 @@ 19 @@ -0,0 +1 @@
20 +%(deps_sha)s 20 +%(deps_sha)s
21 """ 21 """
22 22
(...skipping 17 matching lines...) Expand all
40 ) 40 )
41 41
42 # When we look for the next revision to build, we search nearby revisions 42 # When we look for the next revision to build, we search nearby revisions
43 # looking for a revision that's already been archived. Since we don't want 43 # looking for a revision that's already been archived. Since we don't want
44 # to move *too* far from the original revision, we'll cap the search at 25%. 44 # to move *too* far from the original revision, we'll cap the search at 25%.
45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25 45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25
46 46
47 # How long to re-test the initial good-bad range for until significant 47 # How long to re-test the initial good-bad range for until significant
48 # difference is established. 48 # difference is established.
49 REGRESSION_CHECK_TIMEOUT = 20 * 60 * 60 # 20 hours. A build times out after 24. 49 REGRESSION_CHECK_TIMEOUT = 20 * 60 * 60 # 20 hours. A build times out after 24.
50 # If we reach this number of samples on the reference range and have not
51 # achieved statistical significance, bail.
52 MAX_REQUIRED_SAMPLES = 15
53
54 # Significance level to use for determining difference between revisions via
55 # hypothesis testing.
56 SIGNIFICANCE_LEVEL = 0.01
57 50
58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = ( 51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (
59 'The metric values for the initial "good" and "bad" revisions ' 52 'The metric values for the initial "good" and "bad" revisions '
60 'do not represent a clear regression.') 53 'do not represent a clear regression.')
61 54
62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = ( 55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (
63 'The metric values for the initial "good" and "bad" revisions match the ' 56 'The metric values for the initial "good" and "bad" revisions match the '
64 'expected direction of improvement. Thus, likely represent an improvement ' 57 'expected direction of improvement. Thus, likely represent an improvement '
65 'and not a regression.') 58 'and not a regression.')
66 59
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 141
149 Returns: 142 Returns:
150 A 40-digit git commit hash string. 143 A 40-digit git commit hash string.
151 """ 144 """
152 if self._is_sha1(rev): # pragma: no cover 145 if self._is_sha1(rev): # pragma: no cover
153 return rev 146 return rev
154 if rev.isdigit(): 147 if rev.isdigit():
155 commit_position = self._api.m.commit_position.construct( 148 commit_position = self._api.m.commit_position.construct(
156 branch='refs/heads/master', value=rev) 149 branch='refs/heads/master', value=rev)
157 try: 150 try:
158 return self._api.m.crrev.to_commit_hash(commit_position) 151 return self._api.m.crrev.to_commit_hash(
152 commit_position,
153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])
159 except self.api.m.step.StepFailure: # pragma: no cover 154 except self.api.m.step.StepFailure: # pragma: no cover
160 self.surface_result('BAD_REV') 155 self.surface_result('BAD_REV')
161 raise 156 raise
162 self.surface_result('BAD_REV') # pragma: no cover 157 self.surface_result('BAD_REV') # pragma: no cover
163 raise self.api.m.step.StepFailure( 158 raise self.api.m.step.StepFailure(
164 'Invalid input revision: %r' % (rev,)) # pragma: no cover 159 'Invalid input revision: %r' % (rev,)) # pragma: no cover
165 160
166 @staticmethod 161 @staticmethod
167 def _is_sha1(s): 162 def _is_sha1(s):
168 return bool(re.match('^[0-9A-Fa-f]{40}$', s)) 163 return bool(re.match('^[0-9A-Fa-f]{40}$', s))
169 164
170 def significantly_different( 165 def compare_revisions(self, revision_a, revision_b):
171 self, list_a, list_b, 166 """
172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover 167 Returns:
173 """Uses an external script to run hypothesis testing with scipy. 168 SIGNIFICANTLY_DIFFERENT if the samples are significantly different.
169 NEED_MORE_DATA if there is not enough data to tell.
170 NOT_SIGNIFICANTLY_DIFFERENT if if there's enough data but still can't
171 tell the samples apart.
172 """
173 output_format = 'chartjson'
174 values_a = revision_a.chartjson_paths
175 values_b = revision_b.chartjson_paths
176 if revision_a.valueset_paths and revision_b.valueset_paths:
177 output_format = 'valueset'
178 values_a = revision_a.valueset_paths
179 values_b = revision_b.valueset_paths
174 180
175 The reason why we need an external script is that scipy is not available to 181 result = self.api.stat_compare(
176 the default python installed in all platforms. We instead rely on an 182 values_a,
177 anaconda environment to provide those packages. 183 values_b,
184 self.bisect_config['metric'],
185 output_format=output_format,
186 step_test_data=lambda: self.api.test_api.compare_samples_data(
187 self.api._test_data.get('revision_data'), revision_a, revision_b))
178 188
179 Args: 189 revision_a.debug_values = result['sample_a']['debug_values']
180 list_a, list_b: Two lists representing samples to be compared. 190 revision_b.debug_values = result['sample_b']['debug_values']
181 significance_level: Self-describing. As a decimal fraction.
182 191
183 Returns: 192 if result['result'] == revision_state.NEED_MORE_DATA:
184 A boolean indicating whether the null hypothesis ~(that the lists are 193 return revision_state.NEED_MORE_DATA
185 samples from the same population) can be rejected at the specified 194 elif result['result']:
186 significance level. 195 return revision_state.SIGNIFICANTLY_DIFFERENT
187 """ 196 else: # pragma: no cover
188 step_result = self.api.m.python( 197 return revision_state.NOT_SIGNIFICANTLY_DIFFERENT
189 'Checking sample difference',
190 self.api.resource('significantly_different.py'),
191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],
192 stdout=self.api.m.json.output())
193 results = step_result.stdout
194 if results is None:
195 assert self.dummy_builds
196 return True
197 significantly_different = results['significantly_different']
198 step_result.presentation.logs[str(significantly_different)] = [
199 'See json.output for details']
200 return significantly_different
201 198
202 def config_step(self): 199 def config_step(self):
203 """Yields a step that prints the bisect config.""" 200 """Yields a step that prints the bisect config."""
204 api = self.api 201 api = self.api
205 202
206 # bisect_config may come as a FrozenDict (which is not serializable). 203 # bisect_config may come as a FrozenDict (which is not serializable).
207 bisect_config = dict(self.bisect_config) 204 bisect_config = dict(self.bisect_config)
208 205
209 def fix_windows_backslashes(s): 206 def fix_windows_backslashes(s):
210 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)') 207 backslash_regex = re.compile(r'(?<!\\)\\(?!\\)')
(...skipping 17 matching lines...) Expand all
228 except config_validation.ValidationFail as error: 225 except config_validation.ValidationFail as error:
229 self.surface_result('BAD_CONFIG') 226 self.surface_result('BAD_CONFIG')
230 self.api.m.halt(error.message) 227 self.api.m.halt(error.message)
231 raise self.api.m.step.StepFailure(error.message) 228 raise self.api.m.step.StepFailure(error.message)
232 229
233 @property 230 @property
234 def api(self): 231 def api(self):
235 return self._api 232 return self._api
236 233
237 def compute_relative_change(self): 234 def compute_relative_change(self):
238 old_value = float(self.good_rev.mean_value) 235 old_value = float(self.good_rev.mean or 0)
239 new_value = float(self.bad_rev.mean_value) 236 new_value = float(self.bad_rev.mean or 0)
240 237
241 if new_value and not old_value: # pragma: no cover 238 if new_value and not old_value: # pragma: no cover
242 self.relative_change = ZERO_TO_NON_ZERO 239 self.relative_change = ZERO_TO_NON_ZERO
243 return 240 return
244 241
245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value) 242 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)
246 self.relative_change = '%.2f%%' % (100 * rel_change) 243 self.relative_change = '%.2f%%' % (100 * rel_change)
247 244
248 def make_deps_sha_file(self, deps_sha):
249 """Make a diff patch that creates DEPS.sha.
250
251 Args:
252 deps_sha (str): The hex digest of a SHA1 hash of the diff that patches
253 DEPS.
254
255 Returns:
256 A string containing a git diff.
257 """
258 return _DEPS_SHA_PATCH % {'deps_sha': deps_sha}
259
260 def _git_intern_file(self, file_contents, cwd, commit_hash):
261 """Writes a file to the git database and produces its git hash.
262
263 Args:
264 file_contents (str): The contents of the file to be hashed and interned.
265 cwd (recipe_config_types.Path): Path to the checkout whose repository the
266 file is to be written to.
267 commit_hash (str): An identifier for the step.
268
269 Returns:
270 A string containing the hash of the interned object.
271 """
272 cmd = 'hash-object -t blob -w --stdin'.split(' ')
273 stdin = self.api.m.raw_io.input(file_contents)
274 stdout = self.api.m.raw_io.output()
275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash
276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout,
277 name=step_name)
278 hash_string = step_result.stdout.splitlines()[0]
279 try:
280 if hash_string:
281 int(hash_string, 16)
282 return hash_string
283 except ValueError: # pragma: no cover
284 reason = 'Git did not output a valid hash for the interned file.'
285 self.api.m.halt(reason)
286 raise self.api.m.step.StepFailure(reason)
287
288 def _gen_diff_patch(self, git_object_a, git_object_b, src_alias, dst_alias,
289 cwd, deps_rev):
290 """Produces a git diff patch.
291
292 Args:
293 git_object_a (str): Tree-ish git object identifier.
294 git_object_b (str): Another tree-ish git object identifier.
295 src_alias (str): Label to replace the tree-ish identifier on
296 the resulting diff patch. (git_object_a)
297 dst_alias (str): Same as above for (git_object_b)
298 cwd (recipe_config_types.Path): Path to the checkout whose repo contains
299 the objects to be compared.
300 deps_rev (str): Deps revision to identify the patch generating step.
301
302 Returns:
303 A string containing the diff patch as produced by the 'git diff' command.
304 """
305 # The prefixes used in the command below are used to find and replace the
306 # tree-ish git object id's on the diff output more easily.
307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'
308 cmd %= (git_object_a, git_object_b)
309 cmd = cmd.split(' ')
310 stdout = self.api.m.raw_io.output()
311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)
312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name)
313 patch_text = step_result.stdout
314 src_string = 'IAMSRC:' + git_object_a
315 dst_string = 'IAMDST:' + git_object_b
316 patch_text = patch_text.replace(src_string, src_alias)
317 patch_text = patch_text.replace(dst_string, dst_alias)
318 return patch_text
319
320 def make_deps_patch(self, base_revision, base_file_contents,
321 depot, new_commit_hash):
322 """Make a diff patch that updates a specific dependency revision.
323
324 Args:
325 base_revision (RevisionState): The revision for which the DEPS file is to
326 be patched.
327 base_file_contents (str): The contents of the original DEPS file.
328 depot (str): The dependency to modify.
329 new_commit_hash (str): The revision to put in place of the old one.
330
331 Returns:
332 A pair containing the git diff patch that updates DEPS, and the
333 full text of the modified DEPS file, both as strings.
334 """
335 original_contents = str(base_file_contents)
336 patched_contents = str(original_contents)
337
338 # Modify DEPS.
339 deps_var = depot['deps_var']
340 deps_item_regexp = re.compile(
341 r'(?<=["\']%s["\']: ["\'])([a-fA-F0-9]+)(?=["\'])' % deps_var,
342 re.MULTILINE)
343 if not re.search(deps_item_regexp, original_contents): # pragma: no cover
344 reason = 'DEPS file does not contain entry for ' + deps_var
345 self.api.m.halt(reason)
346 raise self.api.m.step.StepFailure(reason)
347
348 patched_contents = re.sub(deps_item_regexp, new_commit_hash,
349 original_contents)
350
351 cwd = self.api.working_dir.join(
352 depot_config.DEPOT_DEPS_NAME[base_revision.depot_name]['src'])
353 deps_file = 'DEPS'
354 # This is to support backward compatibility on android-chrome because
355 # .DEPS.git got migrated to DEPS on April 5, 2016
356 if (base_revision.depot_name == 'android-chrome' and
357 self.api.m.path.exists(self.api.m.path.join(cwd, '.DEPS.git'))):
358 deps_file = '.DEPS.git' # pragma: no cover
359
360 interned_deps_hash = self._git_intern_file(
361 patched_contents, cwd, new_commit_hash)
362
363 patch_text = self._gen_diff_patch(
364 '%s:%s' % (base_revision.commit_hash, deps_file),
365 interned_deps_hash, deps_file, deps_file,
366 cwd=cwd,
367 deps_rev=new_commit_hash)
368 return patch_text, patched_contents
369
370 def _expand_initial_revision_range(self): 245 def _expand_initial_revision_range(self):
371 """Sets the initial contents of |self.revisions|.""" 246 """Sets the initial contents of |self.revisions|."""
372 with self.api.m.step.nest('Expanding revision range'): 247 with self.api.m.step.nest('Expanding revision range'):
373 good_hash = self.good_rev.commit_hash 248 good_hash = self.good_rev.commit_hash
374 bad_hash = self.bad_rev.commit_hash 249 bad_hash = self.bad_rev.commit_hash
250 depot = self.good_rev.depot_name
375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash) 251 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)
376 revisions = self._revision_range( 252 revisions = self._revision_range(
377 start=good_hash, 253 start=good_hash,
378 end=bad_hash, 254 end=bad_hash,
379 depot_name=self.base_depot, 255 depot_name=self.base_depot,
380 step_name=step_name, 256 step_name=step_name,
381 exclude_end=True) 257 exclude_end=True,
258 step_test_data=lambda: self.api._test_data['revision_list'][depot]
dtu 2016/09/13 23:57:27 How do you decide whether to pass step_test_data h
RobertoCN 2016/09/21 22:22:48 I also do not like this. Ideally gsutil (for examp
259 )
382 self.revisions = [self.good_rev] + revisions + [self.bad_rev] 260 self.revisions = [self.good_rev] + revisions + [self.bad_rev]
383 self._update_revision_list_indexes() 261 self._update_revision_list_indexes()
384 262
385 def _revision_range(self, start, end, depot_name, base_revision=None, 263 def _revision_range(self, start, end, depot_name, base_revision=None,
386 step_name=None, exclude_end=False): 264 step_name=None, exclude_end=False, **kwargs):
387 """Returns a list of RevisionState objects between |start| and |end|. 265 """Returns a list of RevisionState objects between |start| and |end|.
388 266
389 When expanding the initial revision range we want to exclude the last 267 When expanding the initial revision range we want to exclude the last
390 revision, since both good and bad have already been created and tested. 268 revision, since both good and bad have already been created and tested.
391 When bisecting into a roll on the other hand, we want to include the last 269 When bisecting into a roll on the other hand, we want to include the last
392 revision in the roll, because although the code should be equivalent to 270 revision in the roll, because although the code should be equivalent to
393 the roll, we want to blame the right culprit and not the roll. 271 the roll, we want to blame the right culprit and not the roll.
394 272
395 Args: 273 Args:
396 start (str): Start commit hash. 274 start (str): Start commit hash.
397 end (str): End commit hash. 275 end (str): End commit hash.
398 depot_name (str): Short string name of repo, e.g. chromium or v8. 276 depot_name (str): Short string name of repo, e.g. chromium or v8.
399 base_revision (str): Base revision in the downstream repo (e.g. chromium). 277 base_revision (str): Base revision in the downstream repo (e.g. chromium).
400 step_name (str): Optional step name. 278 step_name (str): Optional step name.
401 exclude_end (bool): Whether to exclude the last revision in the range, 279 exclude_end (bool): Whether to exclude the last revision in the range,
402 i.e. the revision given as end. 280 i.e. the revision given as end.
403 281
404 Returns: 282 Returns:
405 A list of RevisionState objects. 283 A list of RevisionState objects.
406 """ 284 """
407 if self.internal_bisect: # pragma: no cover 285 if self.internal_bisect: # pragma: no cover
408 return self._revision_range_with_gitiles( 286 return self._revision_range_with_gitiles(
409 start, end, depot_name, base_revision, step_name) 287 start, end, depot_name, base_revision, step_name)
410 try: 288 try:
411 step_result = self.api.m.python( 289 step_result = self.api.m.python(
412 step_name, 290 step_name,
413 self.api.resource('fetch_intervening_revisions.py'), 291 self.api.resource('fetch_intervening_revisions.py'),
414 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']], 292 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],
415 stdout=self.api.m.json.output()) 293 stdout=self.api.m.json.output(), **kwargs)
416 except self.api.m.step.StepFailure: # pragma: no cover 294 except self.api.m.step.StepFailure: # pragma: no cover
417 self.surface_result('BAD_REV') 295 self.surface_result('BAD_REV')
418 raise 296 raise
419 revisions = [] 297 revisions = []
420 revision_hashes = step_result.stdout 298 revision_hashes = step_result.stdout
421 if exclude_end: 299 if exclude_end:
422 revision_hashes = revision_hashes[:-1] 300 revision_hashes = revision_hashes[:-1]
423 for commit_hash, _ in revision_hashes: 301 for commit_hash, _ in revision_hashes:
424 revisions.append(self.revision_class( 302 revisions.append(self.revision_class(
425 bisector=self, 303 bisector=self,
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
514 dep_revision_max = max_revision.deps[depot_name] 392 dep_revision_max = max_revision.deps[depot_name]
515 if (dep_revision_min and dep_revision_max and 393 if (dep_revision_min and dep_revision_max and
516 dep_revision_min != dep_revision_max): 394 dep_revision_min != dep_revision_max):
517 step_name = ('Expanding revision range for revision %s' 395 step_name = ('Expanding revision range for revision %s'
518 ' on depot %s' % (dep_revision_max, depot_name)) 396 ' on depot %s' % (dep_revision_max, depot_name))
519 rev_list = self._revision_range( 397 rev_list = self._revision_range(
520 start=dep_revision_min, 398 start=dep_revision_min,
521 end=dep_revision_max, 399 end=dep_revision_max,
522 depot_name=depot_name, 400 depot_name=depot_name,
523 base_revision=min_revision, 401 base_revision=min_revision,
524 step_name=step_name) 402 step_name=step_name,
403 step_test_data=lambda:
404 self.api._test_data['revision_list'][depot_name])
525 new_revisions = self.revisions[:max_revision.list_index] 405 new_revisions = self.revisions[:max_revision.list_index]
526 new_revisions += rev_list 406 new_revisions += rev_list
527 new_revisions += self.revisions[max_revision.list_index:] 407 new_revisions += self.revisions[max_revision.list_index:]
528 self.revisions = new_revisions 408 self.revisions = new_revisions
529 self._update_revision_list_indexes() 409 self._update_revision_list_indexes()
530 return True 410 return True
531 except RuntimeError: # pragma: no cover 411 except RuntimeError: # pragma: no cover
532 warning_text = ('Could not expand dependency revisions for ' + 412 warning_text = ('Could not expand dependency revisions for ' +
533 revision_to_expand.commit_hash) 413 revision_to_expand.commit_hash)
534 self.surface_result('BAD_REV') 414 self.surface_result('BAD_REV')
(...skipping 16 matching lines...) Expand all
551 431
552 The change between the test results obtained for the given 'good' and 432 The change between the test results obtained for the given 'good' and
553 'bad' revisions is expected to be considered a regression. The 433 'bad' revisions is expected to be considered a regression. The
554 `improvement_direction` attribute is positive if a larger number is 434 `improvement_direction` attribute is positive if a larger number is
555 considered better, and negative if a smaller number is considered better. 435 considered better, and negative if a smaller number is considered better.
556 436
557 Returns: 437 Returns:
558 True if the check passes (i.e. no problem), False if the change is not 438 True if the check passes (i.e. no problem), False if the change is not
559 a regression according to the improvement direction. 439 a regression according to the improvement direction.
560 """ 440 """
561 good = self.good_rev.mean_value 441 good = self.good_rev.mean
562 bad = self.bad_rev.mean_value 442 bad = self.bad_rev.mean
563 443
564 if self.is_return_code_mode(): 444 if self.is_return_code_mode():
565 return True 445 return True
566 446
567 direction = self.improvement_direction 447 direction = self.improvement_direction
568 if direction is None: 448 if direction is None:
569 return True 449 return True
570 if (bad > good and direction > 0) or (bad < good and direction < 0): 450 if (bad > good and direction > 0) or (bad < good and direction < 0):
571 self._set_failed_direction_results() 451 self._set_failed_direction_results()
572 return False 452 return False
(...skipping 14 matching lines...) Expand all
587 """Checks that the initial range presents a clear enough regression. 467 """Checks that the initial range presents a clear enough regression.
588 468
589 We ensure that the good and bad revisions produce significantly different 469 We ensure that the good and bad revisions produce significantly different
590 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached 470 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached
591 or REGRESSION_CHECK_TIMEOUT seconds have elapsed. 471 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.
592 472
593 Returns: True if the revisions produced results that differ from each 473 Returns: True if the revisions produced results that differ from each
594 other in a statistically significant manner. False if such difference could 474 other in a statistically significant manner. False if such difference could
595 not be established in the time or sample size allowed. 475 not be established in the time or sample size allowed.
596 """ 476 """
597 if self.test_type == 'return_code': 477 if self.is_return_code_mode():
598 return (self.good_rev.overall_return_code != 478 return (self.good_rev.overall_return_code !=
599 self.bad_rev.overall_return_code) 479 self.bad_rev.overall_return_code)
600 480
601 if self.bypass_stats_check: 481 if self.bypass_stats_check:
602 dummy_result = self.good_rev.values != self.bad_rev.values 482 self.compare_revisions(self.good_rev, self.bad_rev)
483 dummy_result = self.good_rev.mean != self.bad_rev.mean
603 if not dummy_result: 484 if not dummy_result:
604 self._set_insufficient_confidence_warning() 485 self._set_insufficient_confidence_warning()
605 return dummy_result 486 return dummy_result
606 487
488 # TODO(robertocn): This step should not be necessary in some cases.
607 with self.api.m.step.nest('Re-testing reference range'): 489 with self.api.m.step.nest('Re-testing reference range'):
608 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT 490 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT
609 while time.time() < expiration_time: 491 while time.time() < expiration_time:
610 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5: 492 if (self.good_rev.test_run_count >= 5
611 if self.significantly_different(self.good_rev.values, 493 and self.bad_rev.test_run_count >= 5):
612 self.bad_rev.values): 494 if self.compare_revisions(self.good_rev, self.bad_rev):
613 return True 495 return True
614 if len(self.good_rev.values) == len(self.bad_rev.values): 496 if self.good_rev.test_run_count == self.bad_rev.test_run_count:
615 revision_to_retest = self.last_tested_revision 497 revision_to_retest = self.last_tested_revision
616 else: 498 else:
617 revision_to_retest = min(self.good_rev, self.bad_rev, 499 revision_to_retest = min(self.good_rev, self.bad_rev,
618 key=lambda x: len(x.values)) 500 key=lambda x: x.test_run_count)
619 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES: 501 revision_to_retest._do_test()
620 revision_to_retest.retest() 502
621 else:
622 break
623 self._set_insufficient_confidence_warning() 503 self._set_insufficient_confidence_warning()
624 return False 504 return False
625 505
626 506
627 def get_exception(self): 507 def get_exception(self):
628 raise NotImplementedError() # pragma: no cover 508 raise NotImplementedError() # pragma: no cover
629 # TODO: should return an exception with the details of the failure. 509 # TODO: should return an exception with the details of the failure.
630 510
631 def _set_insufficient_confidence_warning( 511 def _set_insufficient_confidence_warning(
632 self): # pragma: no cover 512 self): # pragma: no cover
633 """Adds a warning about the lack of initial regression confidence.""" 513 """Adds a warning about the lack of initial regression confidence."""
634 self.failed_initial_confidence = True 514 self.failed_initial_confidence = True
635 self.surface_result('LO_INIT_CONF') 515 self.surface_result('LO_INIT_CONF')
636 self.warnings.append( 516 self.warnings.append(
637 'Bisect failed to reproduce the regression with enough confidence.') 517 'Bisect failed to reproduce the regression with enough confidence.')
638 518
639 def _results_debug_message(self): 519 def _results_debug_message(self):
640 """Returns a string with values used to debug a bisect result.""" 520 """Returns a string with values used to debug a bisect result."""
641 result = 'bisector.lkgr: %r\n' % self.lkgr 521 result = 'bisector.lkgr: %r\n' % self.lkgr
642 result += 'bisector.fkbr: %r\n\n' % self.fkbr 522 result += 'bisector.fkbr: %r\n\n' % self.fkbr
643 result += self._revision_value_table() 523 result += self._revision_value_table()
644 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values): 524 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and
645 result += '\n' + self._t_test_results() 525 self.fkbr.test_run_count):
526 result += '\n' + '\n'.join([
527 'LKGR values: %r' % list(self.lkgr.debug_values),
528 'FKBR values: %r' % list(self.fkbr.debug_values),
529 ])
646 return result 530 return result
647 531
648 def _revision_value_table(self): 532 def _revision_value_table(self):
649 """Returns a string table showing revisions and their values.""" 533 """Returns a string table showing revisions and their values."""
650 header = [['Revision', 'Values']] 534 header = [['Revision', 'Values']]
651 rows = [[r.revision_string(), str(r.values)] for r in self.revisions] 535 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]
652 return self._pretty_table(header + rows) 536 return self._pretty_table(header + rows)
653 537
654 def _pretty_table(self, data): 538 def _pretty_table(self, data):
655 results = [] 539 results = []
656 for row in data: 540 for row in data:
657 results.append('%-15s' * len(row) % tuple(row)) 541 results.append('%-15s' * len(row) % tuple(row))
658 return '\n'.join(results) 542 return '\n'.join(results)
659 543
660 def _t_test_results(self):
661 """Returns a string showing t-test results for lkgr and fkbr."""
662 t, df, p = self.api.m.math_utils.welchs_t_test(
663 self.lkgr.values, self.fkbr.values)
664 lines = [
665 'LKGR values: %r' % self.lkgr.values,
666 'FKBR values: %r' % self.fkbr.values,
667 't-statistic: %r' % t,
668 'deg. of freedom: %r' % df,
669 'p-value: %r' % p,
670 'Confidence score: %r' % (100 * (1 - p))
671 ]
672 return '\n'.join(lines)
673
674 def print_result_debug_info(self): 544 def print_result_debug_info(self):
675 """Prints extra debug info at the end of the bisect process.""" 545 """Prints extra debug info at the end of the bisect process."""
676 lines = self._results_debug_message().splitlines() 546 lines = self._results_debug_message().splitlines()
677 # If we emit a null step then add a log to it, the log should be kept 547 # If we emit a null step then add a log to it, the log should be kept
678 # longer than 7 days (which is often needed to debug some issues). 548 # longer than 7 days (which is often needed to debug some issues).
679 self.api.m.step('Debug Info', []) 549 self.api.m.step('Debug Info', [])
680 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines 550 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines
681 551
682 def post_result(self, halt_on_failure=False): 552 def post_result(self, halt_on_failure=False):
683 """Posts bisect results to Perf Dashboard.""" 553 """Posts bisect results to Perf Dashboard."""
684 self.api.m.perf_dashboard.set_default_config() 554 self.api.m.perf_dashboard.set_default_config()
685 self.api.m.perf_dashboard.post_bisect_results( 555 self.api.m.perf_dashboard.post_bisect_results(
686 self.get_result(), halt_on_failure) 556 self.get_result(), halt_on_failure)
687 557
688 def get_revision_to_eval(self): 558 def get_revision_to_eval(self):
689 """Gets the next RevisionState object in the candidate range. 559 """Gets the next RevisionState object in the candidate range.
690 560
691 Returns: 561 Returns:
692 The next Revision object in a list. 562 The next Revision object in a list.
693 """ 563 """
694 self._update_candidate_range() 564 self._update_candidate_range()
695 candidate_range = [revision for revision in 565 candidate_range = [revision for revision in
696 self.revisions[self.lkgr.list_index + 1: 566 self.revisions[self.lkgr.list_index + 1:
697 self.fkbr.list_index] 567 self.fkbr.list_index]
698 if not revision.tested and not revision.failed] 568 if not revision.failed]
699 if len(candidate_range) == 1: 569 if len(candidate_range) == 1:
700 return candidate_range[0] 570 return candidate_range[0]
701 if len(candidate_range) == 0: 571 if len(candidate_range) == 0:
702 return None 572 return None
703 573
704 default_revision = candidate_range[len(candidate_range) / 2] 574 default_revision = candidate_range[len(candidate_range) / 2]
705 575
706 with self.api.m.step.nest( 576 with self.api.m.step.nest(
707 'Wiggling revision ' + default_revision.revision_string()): 577 'Wiggling revision ' + default_revision.revision_string()):
708 # We'll search up to 25% of the range (in either direction) to try and 578 # We'll search up to 25% of the range (in either direction) to try and
(...skipping 22 matching lines...) Expand all
731 return False 601 return False
732 602
733 def check_bisect_finished(self, revision): 603 def check_bisect_finished(self, revision):
734 """Checks if this revision completes the bisection process. 604 """Checks if this revision completes the bisection process.
735 605
736 In this case 'finished' refers to finding one revision considered 'good' 606 In this case 'finished' refers to finding one revision considered 'good'
737 immediately preceding a revision considered 'bad' where the 'bad' revision 607 immediately preceding a revision considered 'bad' where the 'bad' revision
738 does not contain a DEPS change. 608 does not contain a DEPS change.
739 """ 609 """
740 if (revision.bad and revision.previous_revision and 610 if (revision.bad and revision.previous_revision and
741 revision.previous_revision.good): # pragma: no cover 611 revision.previous_revision.good):
742 if revision.deps_change() and self._expand_deps_revisions(revision): 612 if revision.deps_change() and self._expand_deps_revisions(revision):
743 return False 613 return False
744 self.culprit = revision 614 self.culprit = revision
745 return True 615 return True
746 if (revision.good and revision.next_revision and 616 if (revision.good and revision.next_revision and
747 revision.next_revision.bad): 617 revision.next_revision.bad):
748 if (revision.next_revision.deps_change() 618 if (revision.next_revision.deps_change()
749 and self._expand_deps_revisions(revision.next_revision)): 619 and self._expand_deps_revisions(revision.next_revision)):
750 return False 620 return False
751 self.culprit = revision.next_revision 621 self.culprit = revision.next_revision
752 return True 622 return True
753 return False 623 # We'll never get here because revision adjacency is checked before this
754 624 # function is called.
755 def wait_for_all(self, revision_list): 625 assert False # pragma: no cover
756 """Waits for all revisions in list to finish."""
757 for r in revision_list:
758 self.wait_for(r)
759
760 def wait_for(self, revision, nest_check=True):
761 """Waits for the revision to finish its job."""
762 if nest_check and not self.flags.get(
763 'do_not_nest_wait_for_revision'): # pragma: no cover
764 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):
765 return self.wait_for(revision, nest_check=False)
766 while True:
767 revision.update_status()
768 if revision.in_progress:
769 self.api.m.python.inline(
770 'sleeping',
771 """
772 import sys
773 import time
774 time.sleep(20*60)
775 sys.exit(0)
776 """)
777 else:
778 break
779 626
780 def _update_candidate_range(self): 627 def _update_candidate_range(self):
781 """Updates lkgr and fkbr (last known good/first known bad) revisions. 628 """Updates lkgr and fkbr (last known good/first known bad) revisions.
782 629
783 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in 630 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in
784 bisectors.revisions.""" 631 bisectors.revisions."""
785 for r in self.revisions: 632 for r in self.revisions:
786 if r.tested: 633 if r.test_run_count:
787 if r.good: 634 if r.good:
788 self.lkgr = r 635 self.lkgr = r
789 elif r.bad: 636 elif r.bad:
790 self.fkbr = r 637 self.fkbr = r
791 break 638 break
792 assert self.lkgr and self.fkbr 639 assert self.lkgr and self.fkbr
793 640
794 def get_perf_tester_name(self): 641 def get_perf_tester_name(self):
795 """Gets the name of the tester bot (on tryserver.chromium.perf) to use. 642 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.
796 643
(...skipping 17 matching lines...) Expand all
814 661
815 # TODO(prasadv): Refactor this code to remove hard coded values. 662 # TODO(prasadv): Refactor this code to remove hard coded values.
816 bot_name = self.get_perf_tester_name() 663 bot_name = self.get_perf_tester_name()
817 if 'win' in bot_name: 664 if 'win' in bot_name:
818 if any(b in bot_name for b in ['x64', 'gpu']): 665 if any(b in bot_name for b in ['x64', 'gpu']):
819 return 'winx64_bisect_builder' 666 return 'winx64_bisect_builder'
820 return 'win_perf_bisect_builder' 667 return 'win_perf_bisect_builder'
821 668
822 # TODO(prasadv): Refactor this code to remove hard coded values and use 669 # TODO(prasadv): Refactor this code to remove hard coded values and use
823 # target_bit from the bot config. crbug.com/640287 670 # target_bit from the bot config. crbug.com/640287
824 if 'android' in bot_name: 671 if 'android' in bot_name: # pragma: no cover
825 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): 672 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):
826 return 'android_arm64_perf_bisect_builder' 673 return 'android_arm64_perf_bisect_builder'
827 return 'android_perf_bisect_builder' 674 return 'android_perf_bisect_builder'
828 675
829 if 'mac' in bot_name: 676 if 'mac' in bot_name:
830 return 'mac_perf_bisect_builder' 677 return 'mac_perf_bisect_builder'
831 678
832 return 'linux_perf_bisect_builder' 679 return 'linux_perf_bisect_builder'
833 680
834 def get_platform_gs_prefix(self): 681 def get_platform_gs_prefix(self):
835 """Returns the prefix of a GS URL where a build can be found. 682 """Returns the prefix of a GS URL where a build can be found.
836 683
837 This prefix includes the schema, bucket, directory and beginning 684 This prefix includes the schema, bucket, directory and beginning
838 of filename. It is joined together with the part of the filename 685 of filename. It is joined together with the part of the filename
839 that includes the revision and the file extension to form the 686 that includes the revision and the file extension to form the
840 full GS URL. 687 full GS URL.
841 """ 688 """
842 if self.api.buildurl_gs_prefix: # pragma: no cover 689 if self.api.buildurl_gs_prefix: # pragma: no cover
843 return self.api.buildurl_gs_prefix 690 return self.api.buildurl_gs_prefix
844 691
845 # TODO(prasadv): Refactor this code to remove hard coded values. 692 # TODO(prasadv): Refactor this code to remove hard coded values.
846 bot_name = self.get_perf_tester_name() 693 bot_name = self.get_perf_tester_name()
847 if 'win' in bot_name: 694 if 'win' in bot_name:
848 if any(b in bot_name for b in ['x64', 'gpu']): 695 if any(b in bot_name for b in ['x64', 'gpu']):
849 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_' 696 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_'
850 return 'gs://chrome-perf/Win Builder/full-build-win32_' 697 return 'gs://chrome-perf/Win Builder/full-build-win32_'
851 698
852 # TODO(prasadv): Refactor this code to remove hard coded values and use 699 # TODO(prasadv): Refactor this code to remove hard coded values and use
853 # target_bit from the bot config. crbug.com/640287 700 # target_bit from the bot config. crbug.com/640287
854 if 'android' in bot_name: 701 if 'android' in bot_name: #pragma: no cover
855 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']): 702 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):
856 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_' 703 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_'
857 return 'gs://chrome-perf/Android Builder/full-build-linux_' 704 return 'gs://chrome-perf/Android Builder/full-build-linux_'
858 705
859 if 'mac' in bot_name: 706 if 'mac' in bot_name:
860 return 'gs://chrome-perf/Mac Builder/full-build-mac_' 707 return 'gs://chrome-perf/Mac Builder/full-build-mac_'
861 708
862 return 'gs://chrome-perf/Linux Builder/full-build-linux_' 709 return 'gs://chrome-perf/Linux Builder/full-build-linux_'
863 710
864 def ensure_sync_master_branch(self): 711 def ensure_sync_master_branch(self):
865 """Make sure the local master is in sync with the fetched origin/master. 712 """Make sure the local master is in sync with the fetched origin/master.
866 713
867 We have seen on several occasions that the local master branch gets reset 714 We have seen on several occasions that the local master branch gets reset
868 to previous revisions and also detached head states. Running this should 715 to previous revisions and also detached head states. Running this should
869 take care of either situation. 716 take care of either situation.
870 """ 717 """
871 # TODO(robertocn): Investigate what causes the states mentioned in the 718 # TODO(robertocn): Investigate what causes the states mentioned in the
872 # docstring in the first place. 719 # docstring in the first place.
873 self.api.m.git('update-ref', 'refs/heads/master', 720 self.api.m.git('update-ref', 'refs/heads/master',
874 'refs/remotes/origin/master') 721 'refs/remotes/origin/master')
875 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout']) 722 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout'])
876 723
877 def is_return_code_mode(self): 724 def is_return_code_mode(self):
878 """Checks whether this is a bisect on the test's exit code.""" 725 """Checks whether this is a bisect on the test's exit code."""
879 return self.bisect_config.get('test_type') == 'return_code' 726 return self.test_type == 'return_code'
880 727
881 def surface_result(self, result_string): 728 def surface_result(self, result_string):
882 assert result_string in VALID_RESULT_CODES 729 assert result_string in VALID_RESULT_CODES
883 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n). 730 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).
884 result_code = prefix + result_string 731 result_code = prefix + result_string
885 assert len(result_code) <= 20 732 assert len(result_code) <= 20
886 if result_code not in self.result_codes: 733 if result_code not in self.result_codes:
887 self.result_codes.add(result_code) 734 self.result_codes.add(result_code)
888 properties = self.api.m.step.active_result.presentation.properties 735 properties = self.api.m.step.active_result.presentation.properties
889 properties['extra_result_code'] = sorted(self.result_codes) 736 properties['extra_result_code'] = sorted(self.result_codes)
890 737
891 def get_result(self): 738 def get_result(self):
892 """Returns the results as a jsonable object.""" 739 """Returns the results as a jsonable object."""
893 config = self.bisect_config 740 config = self.bisect_config
894 results_confidence = 0
895 if self.culprit:
896 results_confidence = self.api.m.math_utils.confidence_score(
897 self.lkgr.values, self.fkbr.values)
898 741
899 if self.failed: 742 if self.failed:
900 status = 'failed' 743 status = 'failed'
901 elif self.bisect_over: 744 elif self.bisect_over:
902 status = 'completed' 745 status = 'completed'
903 else: 746 else:
904 status = 'started' 747 status = 'started'
905 748
906 aborted_reason = None 749 aborted_reason = None
907 if self.failed_initial_confidence: 750 if self.failed_initial_confidence:
908 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON 751 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON
909 elif self.failed_direction: 752 elif self.failed_direction:
910 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON 753 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON
911 return { 754 return {
912 'try_job_id': config.get('try_job_id'), 755 'try_job_id': config.get('try_job_id'),
913 'bug_id': config.get('bug_id'), 756 'bug_id': config.get('bug_id'),
914 'status': status, 757 'status': status,
915 'buildbot_log_url': self._get_build_url(), 758 'buildbot_log_url': self._get_build_url(),
916 'bisect_bot': self.get_perf_tester_name(), 759 'bisect_bot': self.get_perf_tester_name(),
917 'command': config['command'], 760 'command': config['command'],
918 'test_type': config['test_type'], 761 'test_type': config['test_type'],
919 'metric': config['metric'], 762 'metric': config.get('metric'),
920 'change': self.relative_change, 763 'change': self.relative_change,
921 'score': results_confidence,
922 'good_revision': self.good_rev.commit_hash, 764 'good_revision': self.good_rev.commit_hash,
923 'bad_revision': self.bad_rev.commit_hash, 765 'bad_revision': self.bad_rev.commit_hash,
924 'warnings': self.warnings, 766 'warnings': self.warnings,
925 'aborted_reason': aborted_reason, 767 'aborted_reason': aborted_reason,
926 'culprit_data': self._culprit_data(), 768 'culprit_data': self._culprit_data(),
927 'revision_data': self._revision_data() 769 'revision_data': self._revision_data()
928 } 770 }
929 771
930 def _culprit_data(self): 772 def _culprit_data(self):
931 culprit = self.culprit 773 culprit = self.culprit
(...skipping 11 matching lines...) Expand all
943 'email': culprit_info['email'], 785 'email': culprit_info['email'],
944 'cl_date': culprit_info['date'], 786 'cl_date': culprit_info['date'],
945 'commit_info': culprit_info['body'], 787 'commit_info': culprit_info['body'],
946 'revisions_links': [], 788 'revisions_links': [],
947 'cl': culprit.commit_hash 789 'cl': culprit.commit_hash
948 } 790 }
949 791
950 def _revision_data(self): 792 def _revision_data(self):
951 revision_rows = [] 793 revision_rows = []
952 for r in self.revisions: 794 for r in self.revisions:
953 if r.tested or r.aborted: 795 if r.test_run_count:
954 revision_rows.append({ 796 revision_rows.append({
955 'depot_name': r.depot_name, 797 'depot_name': r.depot_name,
956 'commit_hash': r.commit_hash, 798 'commit_hash': r.commit_hash,
957 'revision_string': r.revision_string(), 799 'revision_string': r.revision_string(),
958 'mean_value': r.mean_value, 800 'mean_value': r.mean,
959 'std_dev': r.std_dev, 801 'std_dev': r.std_dev,
960 'values': r.values, 802 'values': r.debug_values,
961 'result': 'good' if r.good else 'bad' if r.bad else 'unknown', 803 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',
962 }) 804 })
963 return revision_rows 805 return revision_rows
964 806
965 def _get_build_url(self): 807 def _get_build_url(self):
966 properties = self.api.m.properties 808 properties = self.api.m.properties
967 bot_url = properties.get('buildbotURL', 809 bot_url = properties.get('buildbotURL',
968 'http://build.chromium.org/p/chromium/') 810 'http://build.chromium.org/p/chromium/')
969 builder_name = urllib.quote(properties.get('buildername', '')) 811 builder_name = urllib.quote(properties.get('buildername', ''))
970 builder_number = str(properties.get('buildnumber', '')) 812 builder_number = str(properties.get('buildnumber', ''))
971 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number) 813 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698