scripts/slave/recipe_modules/auto_bisect/bisector.py - Issue 2247373002: Refactor stages 1, 2 and test_api overhaul.

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « scripts/slave/recipe_modules/auto_bisect/api.py ('k') | scripts/slave/recipe_modules/auto_bisect/bisector_test.py » ('j') | scripts/slave/recipe_modules/auto_bisect/local_bisect.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import re	6 import re

7 import time	7 import time

8 import urllib	8 import urllib

9	9

10 from . import config_validation	10 from . import config_validation

(...skipping 29 matching lines...) Expand all Loading...
40 )	40 )

41	41

42 # When we look for the next revision to build, we search nearby revisions	42 # When we look for the next revision to build, we search nearby revisions

43 # looking for a revision that's already been archived. Since we don't want	43 # looking for a revision that's already been archived. Since we don't want

44 # to move too far from the original revision, we'll cap the search at 25%.	44 # to move too far from the original revision, we'll cap the search at 25%.

45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25	45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25

46	46

47 # How long to re-test the initial good-bad range for until significant	47 # How long to re-test the initial good-bad range for until significant

48 # difference is established.	48 # difference is established.

49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60	49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60

50 # If we reach this number of samples on the reference range and have not

51 # achieved statistical significance, bail.

52 MAX_REQUIRED_SAMPLES = 15

53

54 # Significance level to use for determining difference between revisions via

55 # hypothesis testing.

56 SIGNIFICANCE_LEVEL = 0.01

57	50

58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (	51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (

59 'The metric values for the initial "good" and "bad" revisions '	52 'The metric values for the initial "good" and "bad" revisions '

60 'do not represent a clear regression.')	53 'do not represent a clear regression.')

61	54

62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (	55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (

63 'The metric values for the initial "good" and "bad" revisions match the '	56 'The metric values for the initial "good" and "bad" revisions match the '

64 'expected direction of improvement. Thus, likely represent an improvement '	57 'expected direction of improvement. Thus, likely represent an improvement '

65 'and not a regression.')	58 'and not a regression.')

66	59

67	60

68 class Bisector(object):	61 class Bisector(object):

69 """This class abstracts an ongoing bisect (or n-sect) job."""	62 """This class abstracts an ongoing bisect (or n-sect) job."""

70	63

71 def __init__(self, api, bisect_config, revision_class, init_revisions=True,	64 def __init__(self, api, bisect_config, revision_class, init_revisions=True,

72 **flags):	65 **flags):

73 """Initializes the state of a new bisect job from a dictionary.	66 """Initializes the state of a new bisect job from a dictionary.

74	67

75 Note that the initial good_rev and bad_rev MUST resolve to a commit position	68 Note that the initial good_rev and bad_rev MUST resolve to a commit position

76 in the chromium repo.	69 in the chromium repo.

77 """	70 """

78 super(Bisector, self).__init__()	71 super(Bisector, self).__init__()

	72 self.loopCHECK = {}

79 self.flags = flags	73 self.flags = flags

80 self._api = api	74 self._api = api

81 self.result_codes = set()	75 self.result_codes = set()

82 self.ensure_sync_master_branch()	76 self.ensure_sync_master_branch()

83 self.bisect_config = bisect_config	77 self.bisect_config = bisect_config

84 self.config_step()	78 self.config_step()

85 self._validate_config()	79 self._validate_config()

86 self.revision_class = revision_class	80 self.revision_class = revision_class

87 self.last_tested_revision = None	81 self.last_tested_revision = None

88	82

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148	142

149 Returns:	143 Returns:

150 A 40-digit git commit hash string.	144 A 40-digit git commit hash string.

151 """	145 """

152 if self._is_sha1(rev): # pragma: no cover	146 if self._is_sha1(rev): # pragma: no cover

153 return rev	147 return rev

154 if rev.isdigit():	148 if rev.isdigit():

155 commit_position = self._api.m.commit_position.construct(	149 commit_position = self._api.m.commit_position.construct(

156 branch='refs/heads/master', value=rev)	150 branch='refs/heads/master', value=rev)

157 try:	151 try:

158 return self._api.m.crrev.to_commit_hash(commit_position)	152 return self._api.m.crrev.to_commit_hash(

	153 commit_position,

	154 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])

159 except self.api.m.step.StepFailure: # pragma: no cover	155 except self.api.m.step.StepFailure: # pragma: no cover

160 self.surface_result('BAD_REV')	156 self.surface_result('BAD_REV')

161 raise	157 raise

162 self.surface_result('BAD_REV') # pragma: no cover	158 self.surface_result('BAD_REV') # pragma: no cover

163 raise self.api.m.step.StepFailure(	159 raise self.api.m.step.StepFailure(

164 'Invalid input revision: %r' % (rev,)) # pragma: no cover	160 'Invalid input revision: %r' % (rev,)) # pragma: no cover

165	161

166 @staticmethod	162 @staticmethod

167 def _is_sha1(s):	163 def _is_sha1(s):

168 return bool(re.match('^[0-9A-Fa-f]{40}$', s))	164 return bool(re.match('^[0-9A-Fa-f]{40}$', s))

169	165

170 def significantly_different(	166 def compare_revisions(self, revision_a, revision_b):

171 self, list_a, list_b,	167 """

172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover	168 Returns:

173 """Uses an external script to run hypothesis testing with scipy.	169 True if the samples are significantly different.

	170 None if there is not enough data to tell.

	171 False if there's enough data but still can't tell the samples apart.

	172 """

	173 output_format = 'chartjson'

	174 values_a = revision_a.chartjson_paths

	175 values_b = revision_b.chartjson_paths

	176 if revision_a.valueset_paths and revision_b.valueset_paths:

	177 output_format = 'valueset'

174	178

175 The reason why we need an external script is that scipy is not available to	179 result = self.api.stat_compare(

176 the default python installed in all platforms. We instead rely on an	180 values_a,

177 anaconda environment to provide those packages.	181 values_b,

	182 self.bisect_config['metric'],

	183 output_format=output_format,

	184 step_test_data=lambda: self.api.test_api.compare_samples_data(

	185 self.api._test_data.get('revision_data'), revision_a, revision_b))

178	186

179 Args:	187 revision_a.debug_values = result['sample_a']['debug_values']

180 list_a, list_b: Two lists representing samples to be compared.	188 revision_b.debug_values = result['sample_b']['debug_values']

181 significance_level: Self-describing. As a decimal fraction.	189 revision_a.mean = result['sample_a']['mean']

	190 revision_b.mean = result['sample_b']['mean']

	191 revision_a.std_dev = result['sample_a']['std_dev']

	192 revision_b.std_dev = result['sample_b']['std_dev']

182	193

183 Returns:	194 if result['result'] == 'needMoreData':

184 A boolean indicating whether the null hypothesis ~(that the lists are	195 key = tuple(values_a), tuple(values_b)

185 samples from the same population) can be rejected at the specified	196 self.loopCHECK.setdefault(key, 0)

186 significance level.	197 self.loopCHECK[key] += 1
	RobertoCN 2016/08/23 00:26:23 Remove loop check and debug prints Remove loop check and debug prints RobertoCN 2016/09/07 00:33:24 Done. Show quoted text On 2016/08/23 00:26:23, RobertoCN wrote: > Remove loop check and debug prints Done.
187 """	198 if self.loopCHECK[key] > 10:

188 step_result = self.api.m.python(	199 raise Exception('loopCHECK!@')

189 'Checking sample difference',	200 print result['result'], revision_a.debug_values, revision_b.debug_values

190 self.api.resource('significantly_different.py'),	201 print revision_a.bisector.revisions

191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],	202 return None

192 stdout=self.api.m.json.output())	203 return bool(result['result'])

193 results = step_result.stdout

194 if results is None:

195 assert self.dummy_builds

196 return True

197 significantly_different = results['significantly_different']

198 step_result.presentation.logs[str(significantly_different)] = [

199 'See json.output for details']

200 return significantly_different

201	204

202 def config_step(self):	205 def config_step(self):

203 """Yields a step that prints the bisect config."""	206 """Yields a step that prints the bisect config."""

204 api = self.api	207 api = self.api

205	208

206 # bisect_config may come as a FrozenDict (which is not serializable).	209 # bisect_config may come as a FrozenDict (which is not serializable).

207 bisect_config = dict(self.bisect_config)	210 bisect_config = dict(self.bisect_config)

208	211

209 def fix_windows_backslashes(s):	212 def fix_windows_backslashes(s):

210 backslash_regex = re.compile(r'(?<!\\)\$?!\$')	213 backslash_regex = re.compile(r'(?<!\\)\$?!\$')

(...skipping 17 matching lines...) Expand all Loading...
228 except config_validation.ValidationFail as error:	231 except config_validation.ValidationFail as error:

229 self.surface_result('BAD_CONFIG')	232 self.surface_result('BAD_CONFIG')

230 self.api.m.halt(error.message)	233 self.api.m.halt(error.message)

231 raise self.api.m.step.StepFailure(error.message)	234 raise self.api.m.step.StepFailure(error.message)

232	235

233 @property	236 @property

234 def api(self):	237 def api(self):

235 return self._api	238 return self._api

236	239

237 def compute_relative_change(self):	240 def compute_relative_change(self):

238 old_value = float(self.good_rev.mean_value)	241 old_value = float(self.good_rev.mean)

239 new_value = float(self.bad_rev.mean_value)	242 new_value = float(self.bad_rev.mean)

240	243

241 if new_value and not old_value: # pragma: no cover	244 if new_value and not old_value: # pragma: no cover

242 self.relative_change = ZERO_TO_NON_ZERO	245 self.relative_change = ZERO_TO_NON_ZERO

243 return	246 return

244	247

245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)	248 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)

246 self.relative_change = '%.2f%%' % (100 * rel_change)	249 self.relative_change = '%.2f%%' % (100 * rel_change)

247	250

248 def make_deps_sha_file(self, deps_sha):	251 def make_deps_sha_file(self, deps_sha):

249 """Make a diff patch that creates DEPS.sha.	252 """Make a diff patch that creates DEPS.sha.

(...skipping 16 matching lines...) Expand all Loading...
266 file is to be written to.	269 file is to be written to.

267 commit_hash (str): An identifier for the step.	270 commit_hash (str): An identifier for the step.

268	271

269 Returns:	272 Returns:

270 A string containing the hash of the interned object.	273 A string containing the hash of the interned object.

271 """	274 """

272 cmd = 'hash-object -t blob -w --stdin'.split(' ')	275 cmd = 'hash-object -t blob -w --stdin'.split(' ')

273 stdin = self.api.m.raw_io.input(file_contents)	276 stdin = self.api.m.raw_io.input(file_contents)

274 stdout = self.api.m.raw_io.output()	277 stdout = self.api.m.raw_io.output()

275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash	278 step_name = 'Hashing modified DEPS file with revision ' + commit_hash

276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout,	279 step_result = self.api.m.git(

277 name=step_name)	280 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,

	281 step_test_data=lambda:

	282 self.api.m.raw_io.test_api.stream_output(commit_hash))

278 hash_string = step_result.stdout.splitlines()[0]	283 hash_string = step_result.stdout.splitlines()[0]

279 try:	284 try:

280 if hash_string:	285 if hash_string:

281 int(hash_string, 16)	286 int(hash_string, 16)

282 return hash_string	287 return hash_string

283 except ValueError: # pragma: no cover	288 except ValueError: # pragma: no cover

284 reason = 'Git did not output a valid hash for the interned file.'	289 reason = 'Git did not output a valid hash for the interned file.'

285 self.api.m.halt(reason)	290 self.api.m.halt(reason)

286 raise self.api.m.step.StepFailure(reason)	291 raise self.api.m.step.StepFailure(reason)

287	292

(...skipping 14 matching lines...) Expand all Loading...
302 Returns:	307 Returns:

303 A string containing the diff patch as produced by the 'git diff' command.	308 A string containing the diff patch as produced by the 'git diff' command.

304 """	309 """

305 # The prefixes used in the command below are used to find and replace the	310 # The prefixes used in the command below are used to find and replace the

306 # tree-ish git object id's on the diff output more easily.	311 # tree-ish git object id's on the diff output more easily.

307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'	312 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'

308 cmd %= (git_object_a, git_object_b)	313 cmd %= (git_object_a, git_object_b)

309 cmd = cmd.split(' ')	314 cmd = cmd.split(' ')

310 stdout = self.api.m.raw_io.output()	315 stdout = self.api.m.raw_io.output()

311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)	316 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)

312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name)	317 step_result = self.api.m.git(

	318 *cmd, cwd=cwd, stdout=stdout, name=step_name,

	319 step_test_data=lambda: self.api._test_data['diff_patch'])

313 patch_text = step_result.stdout	320 patch_text = step_result.stdout

314 src_string = 'IAMSRC:' + git_object_a	321 src_string = 'IAMSRC:' + git_object_a

315 dst_string = 'IAMDST:' + git_object_b	322 dst_string = 'IAMDST:' + git_object_b

316 patch_text = patch_text.replace(src_string, src_alias)	323 patch_text = patch_text.replace(src_string, src_alias)

317 patch_text = patch_text.replace(dst_string, dst_alias)	324 patch_text = patch_text.replace(dst_string, dst_alias)

318 return patch_text	325 return patch_text

319	326

320 def make_deps_patch(self, base_revision, base_file_contents,	327 def make_deps_patch(self, base_revision, base_file_contents,

321 depot, new_commit_hash):	328 depot, new_commit_hash):

322 """Make a diff patch that updates a specific dependency revision.	329 """Make a diff patch that updates a specific dependency revision.

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
365 interned_deps_hash, deps_file, deps_file,	372 interned_deps_hash, deps_file, deps_file,

366 cwd=cwd,	373 cwd=cwd,

367 deps_rev=new_commit_hash)	374 deps_rev=new_commit_hash)

368 return patch_text, patched_contents	375 return patch_text, patched_contents

369	376

370 def _expand_initial_revision_range(self):	377 def _expand_initial_revision_range(self):

371 """Sets the initial contents of \|self.revisions\|."""	378 """Sets the initial contents of \|self.revisions\|."""

372 with self.api.m.step.nest('Expanding revision range'):	379 with self.api.m.step.nest('Expanding revision range'):

373 good_hash = self.good_rev.commit_hash	380 good_hash = self.good_rev.commit_hash

374 bad_hash = self.bad_rev.commit_hash	381 bad_hash = self.bad_rev.commit_hash

	382 depot = self.good_rev.depot_name

375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)	383 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)

376 revisions = self._revision_range(	384 revisions = self._revision_range(

377 start=good_hash,	385 start=good_hash,

378 end=bad_hash,	386 end=bad_hash,

379 depot_name=self.base_depot,	387 depot_name=self.base_depot,

380 step_name=step_name)	388 step_name=step_name,

	389 step_test_data=lambda: self.api._test_data['revision_list'][depot]

	390 )

381 self.revisions = [self.good_rev] + revisions + [self.bad_rev]	391 self.revisions = [self.good_rev] + revisions + [self.bad_rev]

382 self._update_revision_list_indexes()	392 self._update_revision_list_indexes()

383	393

384 def _revision_range(self, start, end, depot_name, base_revision=None,	394 def _revision_range(self, start, end, depot_name, base_revision=None,

385 step_name=None):	395 step_name=None, **kwargs):

386 """Returns a list of RevisionState objects between \|start\| and \|end\|.	396 """Returns a list of RevisionState objects between \|start\| and \|end\|.

387	397

388 Args:	398 Args:

389 start (str): Start commit hash.	399 start (str): Start commit hash.

390 end (str): End commit hash.	400 end (str): End commit hash.

391 depot_name (str): Short string name of repo, e.g. chromium or v8.	401 depot_name (str): Short string name of repo, e.g. chromium or v8.

392 base_revision (str): Base revision in the downstream repo (e.g. chromium).	402 base_revision (str): Base revision in the downstream repo (e.g. chromium).

393 step_name (str): Optional step name.	403 step_name (str): Optional step name.

394	404

395 Returns:	405 Returns:

396 A list of RevisionState objects, not including the given start or end.	406 A list of RevisionState objects, not including the given start or end.

397 """	407 """

398 if self.internal_bisect: # pragma: no cover	408 if self.internal_bisect: # pragma: no cover

399 return self._revision_range_with_gitiles(	409 return self._revision_range_with_gitiles(

400 start, end, depot_name, base_revision, step_name)	410 start, end, depot_name, base_revision, step_name)

401 try:	411 try:

402 step_result = self.api.m.python(	412 step_result = self.api.m.python(

403 step_name,	413 step_name,

404 self.api.resource('fetch_intervening_revisions.py'),	414 self.api.resource('fetch_intervening_revisions.py'),

405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],	415 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],

406 stdout=self.api.m.json.output())	416 stdout=self.api.m.json.output(), **kwargs)

407 except self.api.m.step.StepFailure: # pragma: no cover	417 except self.api.m.step.StepFailure: # pragma: no cover

408 self.surface_result('BAD_REV')	418 self.surface_result('BAD_REV')

409 raise	419 raise

410 revisions = []	420 revisions = []

411 for commit_hash, _ in step_result.stdout:	421 for commit_hash, _ in step_result.stdout:

412 revisions.append(self.revision_class(	422 revisions.append(self.revision_class(

413 bisector=self,	423 bisector=self,

414 commit_hash=commit_hash,	424 commit_hash=commit_hash,

415 depot_name=depot_name,	425 depot_name=depot_name,

416 base_revision=base_revision))	426 base_revision=base_revision))

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
502 dep_revision_max = max_revision.deps[depot_name]	512 dep_revision_max = max_revision.deps[depot_name]

503 if (dep_revision_min and dep_revision_max and	513 if (dep_revision_min and dep_revision_max and

504 dep_revision_min != dep_revision_max):	514 dep_revision_min != dep_revision_max):

505 step_name = ('Expanding revision range for revision %s'	515 step_name = ('Expanding revision range for revision %s'

506 ' on depot %s' % (dep_revision_max, depot_name))	516 ' on depot %s' % (dep_revision_max, depot_name))

507 rev_list = self._revision_range(	517 rev_list = self._revision_range(

508 start=dep_revision_min,	518 start=dep_revision_min,

509 end=dep_revision_max,	519 end=dep_revision_max,

510 depot_name=depot_name,	520 depot_name=depot_name,

511 base_revision=min_revision,	521 base_revision=min_revision,

512 step_name=step_name)	522 step_name=step_name,

	523 step_test_data=lambda:

	524 self.api._test_data['revision_list'][depot_name])

513 new_revisions = self.revisions[:max_revision.list_index]	525 new_revisions = self.revisions[:max_revision.list_index]

514 new_revisions += rev_list	526 new_revisions += rev_list

515 new_revisions += self.revisions[max_revision.list_index:]	527 new_revisions += self.revisions[max_revision.list_index:]

516 self.revisions = new_revisions	528 self.revisions = new_revisions

517 self._update_revision_list_indexes()	529 self._update_revision_list_indexes()

518 return True	530 return True

519 except RuntimeError: # pragma: no cover	531 except RuntimeError: # pragma: no cover

520 warning_text = ('Could not expand dependency revisions for ' +	532 warning_text = ('Could not expand dependency revisions for ' +

521 revision_to_expand.commit_hash)	533 revision_to_expand.commit_hash)

522 self.surface_result('BAD_REV')	534 self.surface_result('BAD_REV')

(...skipping 16 matching lines...) Expand all Loading...
539	551

540 The change between the test results obtained for the given 'good' and	552 The change between the test results obtained for the given 'good' and

541 'bad' revisions is expected to be considered a regression. The	553 'bad' revisions is expected to be considered a regression. The

542 `improvement_direction` attribute is positive if a larger number is	554 `improvement_direction` attribute is positive if a larger number is

543 considered better, and negative if a smaller number is considered better.	555 considered better, and negative if a smaller number is considered better.

544	556

545 Returns:	557 Returns:

546 True if the check passes (i.e. no problem), False if the change is not	558 True if the check passes (i.e. no problem), False if the change is not

547 a regression according to the improvement direction.	559 a regression according to the improvement direction.

548 """	560 """

549 good = self.good_rev.mean_value	561 good = self.good_rev.mean

550 bad = self.bad_rev.mean_value	562 bad = self.bad_rev.mean

551	563

552 if self.is_return_code_mode():	564 if self.is_return_code_mode():

553 return True	565 return True

554	566

555 direction = self.improvement_direction	567 direction = self.improvement_direction

556 if direction is None:	568 if direction is None:

557 return True	569 return True

558 if (bad > good and direction > 0) or (bad < good and direction < 0):	570 if (bad > good and direction > 0) or (bad < good and direction < 0):

559 self._set_failed_direction_results()	571 self._set_failed_direction_results()

560 return False	572 return False

(...skipping 19 matching lines...) Expand all Loading...
580	592

581 Returns: True if the revisions produced results that differ from each	593 Returns: True if the revisions produced results that differ from each

582 other in a statistically significant manner. False if such difference could	594 other in a statistically significant manner. False if such difference could

583 not be established in the time or sample size allowed.	595 not be established in the time or sample size allowed.

584 """	596 """

585 if self.test_type == 'return_code':	597 if self.test_type == 'return_code':

586 return (self.good_rev.overall_return_code !=	598 return (self.good_rev.overall_return_code !=

587 self.bad_rev.overall_return_code)	599 self.bad_rev.overall_return_code)

588	600

589 if self.bypass_stats_check:	601 if self.bypass_stats_check:

590 dummy_result = self.good_rev.values != self.bad_rev.values	602 self.compare_revisions(self.good_rev, self.bad_rev)

	603 dummy_result = self.good_rev.mean != self.bad_rev.mean

591 if not dummy_result:	604 if not dummy_result:

592 self._set_insufficient_confidence_warning()	605 self._set_insufficient_confidence_warning()

593 return dummy_result	606 return dummy_result

594	607

	608 # TODO(robertocn): This step should not be necessary in some cases.

595 with self.api.m.step.nest('Re-testing reference range'):	609 with self.api.m.step.nest('Re-testing reference range'):

596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT	610 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT

597 while time.time() < expiration_time:	611 while time.time() < expiration_time:

598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5:	612 if (self.good_rev.test_run_count >= 5

599 if self.significantly_different(self.good_rev.values,	613 and self.bad_rev.test_run_count >= 5):

600 self.bad_rev.values):	614 if self.compare_revisions(self.good_rev, self.bad_rev):

601 return True	615 return True

602 if len(self.good_rev.values) == len(self.bad_rev.values):	616 if self.good_rev.test_run_count == self.bad_rev.test_run_count:

603 revision_to_retest = self.last_tested_revision	617 revision_to_retest = self.last_tested_revision

604 else:	618 else:

605 revision_to_retest = min(self.good_rev, self.bad_rev,	619 revision_to_retest = min(self.good_rev, self.bad_rev,

606 key=lambda x: len(x.values))	620 key=lambda x: x.test_run_count)

607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES:

608 revision_to_retest.retest()

609 else:

610 break

611 self._set_insufficient_confidence_warning()	621 self._set_insufficient_confidence_warning()

612 return False	622 return False

613	623

614	624

615 def get_exception(self):	625 def get_exception(self):

616 raise NotImplementedError() # pragma: no cover	626 raise NotImplementedError() # pragma: no cover

617 # TODO: should return an exception with the details of the failure.	627 # TODO: should return an exception with the details of the failure.

618	628

619 def _set_insufficient_confidence_warning(	629 def _set_insufficient_confidence_warning(

620 self): # pragma: no cover	630 self): # pragma: no cover

621 """Adds a warning about the lack of initial regression confidence."""	631 """Adds a warning about the lack of initial regression confidence."""

622 self.failed_initial_confidence = True	632 self.failed_initial_confidence = True

623 self.surface_result('LO_INIT_CONF')	633 self.surface_result('LO_INIT_CONF')

624 self.warnings.append(	634 self.warnings.append(

625 'Bisect failed to reproduce the regression with enough confidence.')	635 'Bisect failed to reproduce the regression with enough confidence.')

626	636

627 def _results_debug_message(self):	637 def _results_debug_message(self):

628 """Returns a string with values used to debug a bisect result."""	638 """Returns a string with values used to debug a bisect result."""

629 result = 'bisector.lkgr: %r\n' % self.lkgr	639 result = 'bisector.lkgr: %r\n' % self.lkgr

630 result += 'bisector.fkbr: %r\n\n' % self.fkbr	640 result += 'bisector.fkbr: %r\n\n' % self.fkbr

631 result += self._revision_value_table()	641 result += self._revision_value_table()

632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values):	642 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and

633 result += '\n' + self._t_test_results()	643 self.fkbr.test_run_count):

	644 result += '\n' + '\n'.join([

	645 'LKGR values: %r' % list(self.lkgr.debug_values),

	646 'FKBR values: %r' % list(self.fkbr.debug_values),

	647 ])

634 return result	648 return result

635	649

636 def _revision_value_table(self):	650 def _revision_value_table(self):

637 """Returns a string table showing revisions and their values."""	651 """Returns a string table showing revisions and their values."""

638 header = [['Revision', 'Values']]	652 header = [['Revision', 'Values']]

639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions]	653 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]

640 return self._pretty_table(header + rows)	654 return self._pretty_table(header + rows)

641	655

642 def _pretty_table(self, data):	656 def _pretty_table(self, data):

643 results = []	657 results = []

644 for row in data:	658 for row in data:

645 results.append('%-15s' * len(row) % tuple(row))	659 results.append('%-15s' * len(row) % tuple(row))

646 return '\n'.join(results)	660 return '\n'.join(results)

647	661

648 def _t_test_results(self):

649 """Returns a string showing t-test results for lkgr and fkbr."""

650 t, df, p = self.api.m.math_utils.welchs_t_test(

651 self.lkgr.values, self.fkbr.values)

652 lines = [

653 'LKGR values: %r' % self.lkgr.values,

654 'FKBR values: %r' % self.fkbr.values,

655 't-statistic: %r' % t,

656 'deg. of freedom: %r' % df,

657 'p-value: %r' % p,

658 'Confidence score: %r' % (100 * (1 - p))

659 ]

660 return '\n'.join(lines)

661

662 def print_result_debug_info(self):	662 def print_result_debug_info(self):

663 """Prints extra debug info at the end of the bisect process."""	663 """Prints extra debug info at the end of the bisect process."""

664 lines = self._results_debug_message().splitlines()	664 lines = self._results_debug_message().splitlines()

665 # If we emit a null step then add a log to it, the log should be kept	665 # If we emit a null step then add a log to it, the log should be kept

666 # longer than 7 days (which is often needed to debug some issues).	666 # longer than 7 days (which is often needed to debug some issues).

667 self.api.m.step('Debug Info', [])	667 self.api.m.step('Debug Info', [])

668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines	668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines

669	669

670 def post_result(self, halt_on_failure=False):	670 def post_result(self, halt_on_failure=False):

671 """Posts bisect results to Perf Dashboard."""	671 """Posts bisect results to Perf Dashboard."""

672 self.api.m.perf_dashboard.set_default_config()	672 self.api.m.perf_dashboard.set_default_config()

673 self.api.m.perf_dashboard.post_bisect_results(	673 self.api.m.perf_dashboard.post_bisect_results(

674 self.get_result(), halt_on_failure)	674 self.get_result(), halt_on_failure)

675	675

676 def get_revision_to_eval(self):	676 def get_revision_to_eval(self):

677 """Gets the next RevisionState object in the candidate range.	677 """Gets the next RevisionState object in the candidate range.

678	678

679 Returns:	679 Returns:

680 The next Revision object in a list.	680 The next Revision object in a list.

681 """	681 """

682 self._update_candidate_range()	682 self._update_candidate_range()

683 candidate_range = [revision for revision in	683 candidate_range = [revision for revision in

684 self.revisions[self.lkgr.list_index + 1:	684 self.revisions[self.lkgr.list_index + 1:

685 self.fkbr.list_index]	685 self.fkbr.list_index]

686 if not revision.tested and not revision.failed]	686 if not revision.failed]

687 if len(candidate_range) == 1:	687 if len(candidate_range) == 1:

688 return candidate_range[0]	688 return candidate_range[0]

689 if len(candidate_range) == 0:	689 if len(candidate_range) == 0:

690 return None	690 return None

691	691

692 default_revision = candidate_range[len(candidate_range) / 2]	692 default_revision = candidate_range[len(candidate_range) / 2]

693	693

694 with self.api.m.step.nest(	694 with self.api.m.step.nest(

695 'Wiggling revision ' + default_revision.revision_string()):	695 'Wiggling revision ' + default_revision.revision_string()):

696 # We'll search up to 25% of the range (in either direction) to try and	696 # We'll search up to 25% of the range (in either direction) to try and

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
733 return True	733 return True

734 if (revision.good and revision.next_revision and	734 if (revision.good and revision.next_revision and

735 revision.next_revision.bad):	735 revision.next_revision.bad):

736 if (revision.next_revision.deps_change()	736 if (revision.next_revision.deps_change()

737 and self._expand_deps_revisions(revision.next_revision)):	737 and self._expand_deps_revisions(revision.next_revision)):

738 return False	738 return False

739 self.culprit = revision.next_revision	739 self.culprit = revision.next_revision

740 return True	740 return True

741 return False	741 return False

742	742

743 def wait_for_all(self, revision_list):

744 """Waits for all revisions in list to finish."""

745 for r in revision_list:

746 self.wait_for(r)

747

748 def wait_for(self, revision, nest_check=True):

749 """Waits for the revision to finish its job."""

750 if nest_check and not self.flags.get(

751 'do_not_nest_wait_for_revision'): # pragma: no cover

752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):

753 return self.wait_for(revision, nest_check=False)

754 while True:

755 revision.update_status()

756 if revision.in_progress:

757 self.api.m.python.inline(

758 'sleeping',

759 """

760 import sys

761 import time

762 time.sleep(20*60)

763 sys.exit(0)

764 """)

765 else:

766 break

767

768 def _update_candidate_range(self):	743 def _update_candidate_range(self):

769 """Updates lkgr and fkbr (last known good/first known bad) revisions.	744 """Updates lkgr and fkbr (last known good/first known bad) revisions.

770	745

771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in	746 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in

772 bisectors.revisions."""	747 bisectors.revisions."""

773 for r in self.revisions:	748 for r in self.revisions:

774 if r.tested:	749 if r.test_run_count:

775 if r.good:	750 if r.good:

776 self.lkgr = r	751 self.lkgr = r

777 elif r.bad:	752 elif r.bad:

778 self.fkbr = r	753 self.fkbr = r

779 break	754 break

780 assert self.lkgr and self.fkbr	755 assert self.lkgr and self.fkbr

781	756

782 def get_perf_tester_name(self):	757 def get_perf_tester_name(self):

783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.	758 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.

784	759

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
869 assert len(result_code) <= 20	844 assert len(result_code) <= 20

870 if result_code not in self.result_codes:	845 if result_code not in self.result_codes:

871 self.result_codes.add(result_code)	846 self.result_codes.add(result_code)

872 properties = self.api.m.step.active_result.presentation.properties	847 properties = self.api.m.step.active_result.presentation.properties

873 properties['extra_result_code'] = sorted(self.result_codes)	848 properties['extra_result_code'] = sorted(self.result_codes)

874	849

875 def get_result(self):	850 def get_result(self):

876 """Returns the results as a jsonable object."""	851 """Returns the results as a jsonable object."""

877 config = self.bisect_config	852 config = self.bisect_config

878 results_confidence = 0	853 results_confidence = 0

879 if self.culprit:

880 results_confidence = self.api.m.math_utils.confidence_score(

881 self.lkgr.values, self.fkbr.values)

882	854

883 if self.failed:	855 if self.failed:

884 status = 'failed'	856 status = 'failed'

885 elif self.bisect_over:	857 elif self.bisect_over:

886 status = 'completed'	858 status = 'completed'

887 else:	859 else:

888 status = 'started'	860 status = 'started'

889	861

890 aborted_reason = None	862 aborted_reason = None

891 if self.failed_initial_confidence:	863 if self.failed_initial_confidence:

892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON	864 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON

893 elif self.failed_direction:	865 elif self.failed_direction:

894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON	866 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON

895 return {	867 return {

896 'try_job_id': config.get('try_job_id'),	868 'try_job_id': config.get('try_job_id'),

897 'bug_id': config.get('bug_id'),	869 'bug_id': config.get('bug_id'),

898 'status': status,	870 'status': status,

899 'buildbot_log_url': self._get_build_url(),	871 'buildbot_log_url': self._get_build_url(),

900 'bisect_bot': self.get_perf_tester_name(),	872 'bisect_bot': self.get_perf_tester_name(),

901 'command': config['command'],	873 'command': config['command'],

902 'test_type': config['test_type'],	874 'test_type': config['test_type'],

903 'metric': config['metric'],	875 'metric': config['metric'],

904 'change': self.relative_change,	876 'change': self.relative_change,

905 'score': results_confidence,

906 'good_revision': self.good_rev.commit_hash,	877 'good_revision': self.good_rev.commit_hash,

907 'bad_revision': self.bad_rev.commit_hash,	878 'bad_revision': self.bad_rev.commit_hash,

908 'warnings': self.warnings,	879 'warnings': self.warnings,

909 'aborted_reason': aborted_reason,	880 'aborted_reason': aborted_reason,

910 'culprit_data': self._culprit_data(),	881 'culprit_data': self._culprit_data(),

911 'revision_data': self._revision_data()	882 'revision_data': self._revision_data()

912 }	883 }

913	884

914 def _culprit_data(self):	885 def _culprit_data(self):

915 culprit = self.culprit	886 culprit = self.culprit

(...skipping 11 matching lines...) Expand all Loading...
927 'email': culprit_info['email'],	898 'email': culprit_info['email'],

928 'cl_date': culprit_info['date'],	899 'cl_date': culprit_info['date'],

929 'commit_info': culprit_info['body'],	900 'commit_info': culprit_info['body'],

930 'revisions_links': [],	901 'revisions_links': [],

931 'cl': culprit.commit_hash	902 'cl': culprit.commit_hash

932 }	903 }

933	904

934 def _revision_data(self):	905 def _revision_data(self):

935 revision_rows = []	906 revision_rows = []

936 for r in self.revisions:	907 for r in self.revisions:

937 if r.tested or r.aborted:	908 if r.test_run_count:

938 revision_rows.append({	909 revision_rows.append({

939 'depot_name': r.depot_name,	910 'depot_name': r.depot_name,

940 'commit_hash': r.commit_hash,	911 'commit_hash': r.commit_hash,

941 'revision_string': r.revision_string(),	912 'revision_string': r.revision_string(),

942 'mean_value': r.mean_value,	913 'mean_value': r.mean,

943 'std_dev': r.std_dev,	914 'std_dev': r.std_dev,

944 'values': r.values,	915 'values': r.debug_values,

945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',	916 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',

946 })	917 })

947 return revision_rows	918 return revision_rows

948	919

949 def _get_build_url(self):	920 def _get_build_url(self):

950 properties = self.api.m.properties	921 properties = self.api.m.properties

951 bot_url = properties.get('buildbotURL',	922 bot_url = properties.get('buildbotURL',

952 'http://build.chromium.org/p/chromium/')	923 'http://build.chromium.org/p/chromium/')

953 builder_name = urllib.quote(properties.get('buildername', ''))	924 builder_name = urllib.quote(properties.get('buildername', ''))

954 builder_number = str(properties.get('buildnumber', ''))	925 builder_number = str(properties.get('buildnumber', ''))

955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)	926 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)

OLD	NEW