scripts/slave/recipe_modules/auto_bisect/bisector.py - Issue 2247373002: Refactor stages 1, 2 and test_api overhaul.

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Got full coverage for new and changed code. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« scripts/slave/recipe_modules/auto_bisect/api.py ('K') | « scripts/slave/recipe_modules/auto_bisect/api.py ('k') | scripts/slave/recipe_modules/auto_bisect/bisector_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import re	6 import re

7 import time	7 import time

8 import urllib	8 import urllib

9	9

10 from . import config_validation	10 from . import config_validation

(...skipping 28 matching lines...) Expand all Loading...
39 'LO_FINAL_CONF', # The bisect completed without a culprit.	39 'LO_FINAL_CONF', # The bisect completed without a culprit.

40 )	40 )

41	41

42 # When we look for the next revision to build, we search nearby revisions	42 # When we look for the next revision to build, we search nearby revisions

43 # looking for a revision that's already been archived. Since we don't want	43 # looking for a revision that's already been archived. Since we don't want

44 # to move too far from the original revision, we'll cap the search at 25%.	44 # to move too far from the original revision, we'll cap the search at 25%.

45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25	45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25

46	46

47 # How long to re-test the initial good-bad range for until significant	47 # How long to re-test the initial good-bad range for until significant

48 # difference is established.	48 # difference is established.

49 REGRESSION_CHECK_TIMEOUT = 20 * 60 * 60 # 20 hours. A build times out after 24.	49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60

50 # If we reach this number of samples on the reference range and have not

51 # achieved statistical significance, bail.

52 MAX_REQUIRED_SAMPLES = 15

53

54 # Significance level to use for determining difference between revisions via

55 # hypothesis testing.

56 SIGNIFICANCE_LEVEL = 0.01

57	50

58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (	51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (

59 'The metric values for the initial "good" and "bad" revisions '	52 'The metric values for the initial "good" and "bad" revisions '

60 'do not represent a clear regression.')	53 'do not represent a clear regression.')

61	54

62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (	55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (

63 'The metric values for the initial "good" and "bad" revisions match the '	56 'The metric values for the initial "good" and "bad" revisions match the '

64 'expected direction of improvement. Thus, likely represent an improvement '	57 'expected direction of improvement. Thus, likely represent an improvement '

65 'and not a regression.')	58 'and not a regression.')

66	59

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148	141

149 Returns:	142 Returns:

150 A 40-digit git commit hash string.	143 A 40-digit git commit hash string.

151 """	144 """

152 if self._is_sha1(rev): # pragma: no cover	145 if self._is_sha1(rev): # pragma: no cover

153 return rev	146 return rev

154 if rev.isdigit():	147 if rev.isdigit():

155 commit_position = self._api.m.commit_position.construct(	148 commit_position = self._api.m.commit_position.construct(

156 branch='refs/heads/master', value=rev)	149 branch='refs/heads/master', value=rev)

157 try:	150 try:

158 return self._api.m.crrev.to_commit_hash(commit_position)	151 return self._api.m.crrev.to_commit_hash(

	152 commit_position,

	153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])

159 except self.api.m.step.StepFailure: # pragma: no cover	154 except self.api.m.step.StepFailure: # pragma: no cover

160 self.surface_result('BAD_REV')	155 self.surface_result('BAD_REV')

161 raise	156 raise

162 self.surface_result('BAD_REV') # pragma: no cover	157 self.surface_result('BAD_REV') # pragma: no cover

163 raise self.api.m.step.StepFailure(	158 raise self.api.m.step.StepFailure(

164 'Invalid input revision: %r' % (rev,)) # pragma: no cover	159 'Invalid input revision: %r' % (rev,)) # pragma: no cover

165	160

166 @staticmethod	161 @staticmethod

167 def _is_sha1(s):	162 def _is_sha1(s):

168 return bool(re.match('^[0-9A-Fa-f]{40}$', s))	163 return bool(re.match('^[0-9A-Fa-f]{40}$', s))

169	164

170 def significantly_different(	165 def compare_revisions(self, revision_a, revision_b):

171 self, list_a, list_b,	166 """

172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover	167 Returns:
	prasadv 2016/09/09 18:52:30 I think it is better to have a consistent datatype I think it is better to have a consistent datatype value to return. In this case if there might be a confusion to evaluate False and None. May be we should use some kind of enum here. Same for the "needMoreData" RobertoCN 2016/09/13 22:11:40 Done. Show quoted text On 2016/09/09 18:52:30, prasadv wrote: > I think it is better to have a consistent datatype value to return. > In this case if there might be a confusion to evaluate False and None. > May be we should use some kind of enum here. > > Same for the "needMoreData" Done.
173 """Uses an external script to run hypothesis testing with scipy.	168 True if the samples are significantly different.

	169 None if there is not enough data to tell.

	170 False if there's enough data but still can't tell the samples apart.

	171 """

	172 output_format = 'chartjson'

	173 values_a = revision_a.chartjson_paths

	174 values_b = revision_b.chartjson_paths

	175 if revision_a.valueset_paths and revision_b.valueset_paths:

	176 output_format = 'valueset'

174	177

175 The reason why we need an external script is that scipy is not available to	178 result = self.api.stat_compare(

176 the default python installed in all platforms. We instead rely on an	179 values_a,

177 anaconda environment to provide those packages.	180 values_b,

	181 self.bisect_config['metric'],

	182 output_format=output_format,

	183 step_test_data=lambda: self.api.test_api.compare_samples_data(

	184 self.api._test_data.get('revision_data'), revision_a, revision_b))

178	185

179 Args:	186 revision_a.debug_values = result['sample_a']['debug_values']

180 list_a, list_b: Two lists representing samples to be compared.	187 revision_b.debug_values = result['sample_b']['debug_values']

181 significance_level: Self-describing. As a decimal fraction.	188 revision_a.mean = result['sample_a']['mean']

	189 revision_b.mean = result['sample_b']['mean']

	190 revision_a.std_dev = result['sample_a']['std_dev']

	191 revision_b.std_dev = result['sample_b']['std_dev']

182	192

183 Returns:	193 if result['result'] == 'needMoreData':

184 A boolean indicating whether the null hypothesis ~(that the lists are	194 return None

185 samples from the same population) can be rejected at the specified	195 return bool(result['result'])

186 significance level.

187 """

188 step_result = self.api.m.python(

189 'Checking sample difference',

190 self.api.resource('significantly_different.py'),

191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],

192 stdout=self.api.m.json.output())

193 results = step_result.stdout

194 if results is None:

195 assert self.dummy_builds

196 return True

197 significantly_different = results['significantly_different']

198 step_result.presentation.logs[str(significantly_different)] = [

199 'See json.output for details']

200 return significantly_different

201	196

202 def config_step(self):	197 def config_step(self):

203 """Yields a step that prints the bisect config."""	198 """Yields a step that prints the bisect config."""

204 api = self.api	199 api = self.api

205	200

206 # bisect_config may come as a FrozenDict (which is not serializable).	201 # bisect_config may come as a FrozenDict (which is not serializable).

207 bisect_config = dict(self.bisect_config)	202 bisect_config = dict(self.bisect_config)

208	203

209 def fix_windows_backslashes(s):	204 def fix_windows_backslashes(s):

210 backslash_regex = re.compile(r'(?<!\\)\$?!\$')	205 backslash_regex = re.compile(r'(?<!\\)\$?!\$')

(...skipping 17 matching lines...) Expand all Loading...
228 except config_validation.ValidationFail as error:	223 except config_validation.ValidationFail as error:

229 self.surface_result('BAD_CONFIG')	224 self.surface_result('BAD_CONFIG')

230 self.api.m.halt(error.message)	225 self.api.m.halt(error.message)

231 raise self.api.m.step.StepFailure(error.message)	226 raise self.api.m.step.StepFailure(error.message)

232	227

233 @property	228 @property

234 def api(self):	229 def api(self):

235 return self._api	230 return self._api

236	231

237 def compute_relative_change(self):	232 def compute_relative_change(self):

238 old_value = float(self.good_rev.mean_value)	233 old_value = float(self.good_rev.mean or 0)

239 new_value = float(self.bad_rev.mean_value)	234 new_value = float(self.bad_rev.mean or 0)

240	235

241 if new_value and not old_value: # pragma: no cover	236 if new_value and not old_value: # pragma: no cover

242 self.relative_change = ZERO_TO_NON_ZERO	237 self.relative_change = ZERO_TO_NON_ZERO

243 return	238 return

244	239

245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)	240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)

246 self.relative_change = '%.2f%%' % (100 * rel_change)	241 self.relative_change = '%.2f%%' % (100 * rel_change)

247	242

248 def make_deps_sha_file(self, deps_sha):	243 def make_deps_sha_file(self, deps_sha):

249 """Make a diff patch that creates DEPS.sha.	244 """Make a diff patch that creates DEPS.sha.

(...skipping 16 matching lines...) Expand all Loading...
266 file is to be written to.	261 file is to be written to.

267 commit_hash (str): An identifier for the step.	262 commit_hash (str): An identifier for the step.

268	263

269 Returns:	264 Returns:

270 A string containing the hash of the interned object.	265 A string containing the hash of the interned object.

271 """	266 """

272 cmd = 'hash-object -t blob -w --stdin'.split(' ')	267 cmd = 'hash-object -t blob -w --stdin'.split(' ')

273 stdin = self.api.m.raw_io.input(file_contents)	268 stdin = self.api.m.raw_io.input(file_contents)

274 stdout = self.api.m.raw_io.output()	269 stdout = self.api.m.raw_io.output()

275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash	270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash

276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout,	271 step_result = self.api.m.git(

277 name=step_name)	272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,

	273 step_test_data=lambda:

	274 self.api.m.raw_io.test_api.stream_output(commit_hash))

278 hash_string = step_result.stdout.splitlines()[0]	275 hash_string = step_result.stdout.splitlines()[0]

279 try:	276 try:

280 if hash_string:	277 if hash_string:

281 int(hash_string, 16)	278 int(hash_string, 16)

282 return hash_string	279 return hash_string

283 except ValueError: # pragma: no cover	280 except ValueError: # pragma: no cover

284 reason = 'Git did not output a valid hash for the interned file.'	281 reason = 'Git did not output a valid hash for the interned file.'

285 self.api.m.halt(reason)	282 self.api.m.halt(reason)

286 raise self.api.m.step.StepFailure(reason)	283 raise self.api.m.step.StepFailure(reason)

287	284

(...skipping 14 matching lines...) Expand all Loading...
302 Returns:	299 Returns:

303 A string containing the diff patch as produced by the 'git diff' command.	300 A string containing the diff patch as produced by the 'git diff' command.

304 """	301 """

305 # The prefixes used in the command below are used to find and replace the	302 # The prefixes used in the command below are used to find and replace the

306 # tree-ish git object id's on the diff output more easily.	303 # tree-ish git object id's on the diff output more easily.

307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'	304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'

308 cmd %= (git_object_a, git_object_b)	305 cmd %= (git_object_a, git_object_b)

309 cmd = cmd.split(' ')	306 cmd = cmd.split(' ')

310 stdout = self.api.m.raw_io.output()	307 stdout = self.api.m.raw_io.output()

311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)	308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)

312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name)	309 step_result = self.api.m.git(

	310 *cmd, cwd=cwd, stdout=stdout, name=step_name,

	311 step_test_data=lambda: self.api._test_data['diff_patch'])

313 patch_text = step_result.stdout	312 patch_text = step_result.stdout

314 src_string = 'IAMSRC:' + git_object_a	313 src_string = 'IAMSRC:' + git_object_a

315 dst_string = 'IAMDST:' + git_object_b	314 dst_string = 'IAMDST:' + git_object_b

316 patch_text = patch_text.replace(src_string, src_alias)	315 patch_text = patch_text.replace(src_string, src_alias)

317 patch_text = patch_text.replace(dst_string, dst_alias)	316 patch_text = patch_text.replace(dst_string, dst_alias)

318 return patch_text	317 return patch_text

319	318

320 def make_deps_patch(self, base_revision, base_file_contents,	319 def make_deps_patch(self, base_revision, base_file_contents,

321 depot, new_commit_hash):	320 depot, new_commit_hash):

322 """Make a diff patch that updates a specific dependency revision.	321 """Make a diff patch that updates a specific dependency revision.

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
365 interned_deps_hash, deps_file, deps_file,	364 interned_deps_hash, deps_file, deps_file,

366 cwd=cwd,	365 cwd=cwd,

367 deps_rev=new_commit_hash)	366 deps_rev=new_commit_hash)

368 return patch_text, patched_contents	367 return patch_text, patched_contents

369	368

370 def _expand_initial_revision_range(self):	369 def _expand_initial_revision_range(self):

371 """Sets the initial contents of \|self.revisions\|."""	370 """Sets the initial contents of \|self.revisions\|."""

372 with self.api.m.step.nest('Expanding revision range'):	371 with self.api.m.step.nest('Expanding revision range'):

373 good_hash = self.good_rev.commit_hash	372 good_hash = self.good_rev.commit_hash

374 bad_hash = self.bad_rev.commit_hash	373 bad_hash = self.bad_rev.commit_hash

	374 depot = self.good_rev.depot_name

375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)	375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)

376 revisions = self._revision_range(	376 revisions = self._revision_range(

377 start=good_hash,	377 start=good_hash,

378 end=bad_hash,	378 end=bad_hash,

379 depot_name=self.base_depot,	379 depot_name=self.base_depot,

380 step_name=step_name,	380 step_name=step_name,

381 exclude_end=True)	381 exclude_end=True,

	382 step_test_data=lambda: self.api._test_data['revision_list'][depot]

	383 )

382 self.revisions = [self.good_rev] + revisions + [self.bad_rev]	384 self.revisions = [self.good_rev] + revisions + [self.bad_rev]

383 self._update_revision_list_indexes()	385 self._update_revision_list_indexes()

384	386

385 def _revision_range(self, start, end, depot_name, base_revision=None,	387 def _revision_range(self, start, end, depot_name, base_revision=None,

386 step_name=None, exclude_end=False):	388 step_name=None, exclude_end=False, **kwargs):

387 """Returns a list of RevisionState objects between \|start\| and \|end\|.	389 """Returns a list of RevisionState objects between \|start\| and \|end\|.

388	390

389 When expanding the initial revision range we want to exclude the last	391 When expanding the initial revision range we want to exclude the last

390 revision, since both good and bad have already been created and tested.	392 revision, since both good and bad have already been created and tested.

391 When bisecting into a roll on the other hand, we want to include the last	393 When bisecting into a roll on the other hand, we want to include the last

392 revision in the roll, because although the code should be equivalent to	394 revision in the roll, because although the code should be equivalent to

393 the roll, we want to blame the right culprit and not the roll.	395 the roll, we want to blame the right culprit and not the roll.

394	396

395 Args:	397 Args:

396 start (str): Start commit hash.	398 start (str): Start commit hash.

397 end (str): End commit hash.	399 end (str): End commit hash.

398 depot_name (str): Short string name of repo, e.g. chromium or v8.	400 depot_name (str): Short string name of repo, e.g. chromium or v8.

399 base_revision (str): Base revision in the downstream repo (e.g. chromium).	401 base_revision (str): Base revision in the downstream repo (e.g. chromium).

400 step_name (str): Optional step name.	402 step_name (str): Optional step name.

401 exclude_end (bool): Whether to exclude the last revision in the range,	403 exclude_end (bool): Whether to exclude the last revision in the range,

402 i.e. the revision given as end.	404 i.e. the revision given as end.

403	405

404 Returns:	406 Returns:

405 A list of RevisionState objects.	407 A list of RevisionState objects.

406 """	408 """

407 if self.internal_bisect: # pragma: no cover	409 if self.internal_bisect: # pragma: no cover

408 return self._revision_range_with_gitiles(	410 return self._revision_range_with_gitiles(

409 start, end, depot_name, base_revision, step_name)	411 start, end, depot_name, base_revision, step_name)

410 try:	412 try:

411 step_result = self.api.m.python(	413 step_result = self.api.m.python(

412 step_name,	414 step_name,

413 self.api.resource('fetch_intervening_revisions.py'),	415 self.api.resource('fetch_intervening_revisions.py'),

414 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],	416 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],

415 stdout=self.api.m.json.output())	417 stdout=self.api.m.json.output(), **kwargs)

416 except self.api.m.step.StepFailure: # pragma: no cover	418 except self.api.m.step.StepFailure: # pragma: no cover

417 self.surface_result('BAD_REV')	419 self.surface_result('BAD_REV')

418 raise	420 raise

419 revisions = []	421 revisions = []

420 revision_hashes = step_result.stdout	422 revision_hashes = step_result.stdout

421 if exclude_end:	423 if exclude_end:

422 revision_hashes = revision_hashes[:-1]	424 revision_hashes = revision_hashes[:-1]

423 for commit_hash, _ in revision_hashes:	425 for commit_hash, _ in revision_hashes:

424 revisions.append(self.revision_class(	426 revisions.append(self.revision_class(

425 bisector=self,	427 bisector=self,

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
514 dep_revision_max = max_revision.deps[depot_name]	516 dep_revision_max = max_revision.deps[depot_name]

515 if (dep_revision_min and dep_revision_max and	517 if (dep_revision_min and dep_revision_max and

516 dep_revision_min != dep_revision_max):	518 dep_revision_min != dep_revision_max):

517 step_name = ('Expanding revision range for revision %s'	519 step_name = ('Expanding revision range for revision %s'

518 ' on depot %s' % (dep_revision_max, depot_name))	520 ' on depot %s' % (dep_revision_max, depot_name))

519 rev_list = self._revision_range(	521 rev_list = self._revision_range(

520 start=dep_revision_min,	522 start=dep_revision_min,

521 end=dep_revision_max,	523 end=dep_revision_max,

522 depot_name=depot_name,	524 depot_name=depot_name,

523 base_revision=min_revision,	525 base_revision=min_revision,

524 step_name=step_name)	526 step_name=step_name,

	527 step_test_data=lambda:

	528 self.api._test_data['revision_list'][depot_name])

525 new_revisions = self.revisions[:max_revision.list_index]	529 new_revisions = self.revisions[:max_revision.list_index]

526 new_revisions += rev_list	530 new_revisions += rev_list

527 new_revisions += self.revisions[max_revision.list_index:]	531 new_revisions += self.revisions[max_revision.list_index:]

528 self.revisions = new_revisions	532 self.revisions = new_revisions

529 self._update_revision_list_indexes()	533 self._update_revision_list_indexes()

530 return True	534 return True

531 except RuntimeError: # pragma: no cover	535 except RuntimeError: # pragma: no cover

532 warning_text = ('Could not expand dependency revisions for ' +	536 warning_text = ('Could not expand dependency revisions for ' +

533 revision_to_expand.commit_hash)	537 revision_to_expand.commit_hash)

534 self.surface_result('BAD_REV')	538 self.surface_result('BAD_REV')

(...skipping 16 matching lines...) Expand all Loading...
551	555

552 The change between the test results obtained for the given 'good' and	556 The change between the test results obtained for the given 'good' and

553 'bad' revisions is expected to be considered a regression. The	557 'bad' revisions is expected to be considered a regression. The

554 `improvement_direction` attribute is positive if a larger number is	558 `improvement_direction` attribute is positive if a larger number is

555 considered better, and negative if a smaller number is considered better.	559 considered better, and negative if a smaller number is considered better.

556	560

557 Returns:	561 Returns:

558 True if the check passes (i.e. no problem), False if the change is not	562 True if the check passes (i.e. no problem), False if the change is not

559 a regression according to the improvement direction.	563 a regression according to the improvement direction.

560 """	564 """

561 good = self.good_rev.mean_value	565 good = self.good_rev.mean

562 bad = self.bad_rev.mean_value	566 bad = self.bad_rev.mean

563	567

564 if self.is_return_code_mode():	568 if self.is_return_code_mode():

565 return True	569 return True

566	570

567 direction = self.improvement_direction	571 direction = self.improvement_direction

568 if direction is None:	572 if direction is None:

569 return True	573 return True

570 if (bad > good and direction > 0) or (bad < good and direction < 0):	574 if (bad > good and direction > 0) or (bad < good and direction < 0):

571 self._set_failed_direction_results()	575 self._set_failed_direction_results()

572 return False	576 return False

(...skipping 14 matching lines...) Expand all Loading...
587 """Checks that the initial range presents a clear enough regression.	591 """Checks that the initial range presents a clear enough regression.

588	592

589 We ensure that the good and bad revisions produce significantly different	593 We ensure that the good and bad revisions produce significantly different

590 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached	594 results, increasing the sample size until MAX_REQUIRED_SAMPLES is reached

591 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.	595 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.

592	596

593 Returns: True if the revisions produced results that differ from each	597 Returns: True if the revisions produced results that differ from each

594 other in a statistically significant manner. False if such difference could	598 other in a statistically significant manner. False if such difference could

595 not be established in the time or sample size allowed.	599 not be established in the time or sample size allowed.

596 """	600 """

597 if self.test_type == 'return_code':	601 if self.is_return_code_mode():

598 return (self.good_rev.overall_return_code !=	602 return (self.good_rev.overall_return_code !=

599 self.bad_rev.overall_return_code)	603 self.bad_rev.overall_return_code)

600	604

601 if self.bypass_stats_check:	605 if self.bypass_stats_check:

602 dummy_result = self.good_rev.values != self.bad_rev.values	606 self.compare_revisions(self.good_rev, self.bad_rev)

	607 dummy_result = self.good_rev.mean != self.bad_rev.mean

603 if not dummy_result:	608 if not dummy_result:

604 self._set_insufficient_confidence_warning()	609 self._set_insufficient_confidence_warning()

605 return dummy_result	610 return dummy_result

606	611

	612 # TODO(robertocn): This step should not be necessary in some cases.

607 with self.api.m.step.nest('Re-testing reference range'):	613 with self.api.m.step.nest('Re-testing reference range'):

608 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT	614 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT

609 while time.time() < expiration_time:	615 while time.time() < expiration_time:

610 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5:	616 if (self.good_rev.test_run_count >= 5

611 if self.significantly_different(self.good_rev.values,	617 and self.bad_rev.test_run_count >= 5):

612 self.bad_rev.values):	618 if self.compare_revisions(self.good_rev, self.bad_rev):

613 return True	619 return True

614 if len(self.good_rev.values) == len(self.bad_rev.values):	620 if self.good_rev.test_run_count == self.bad_rev.test_run_count:

615 revision_to_retest = self.last_tested_revision	621 revision_to_retest = self.last_tested_revision

616 else:	622 else:

617 revision_to_retest = min(self.good_rev, self.bad_rev,	623 revision_to_retest = min(self.good_rev, self.bad_rev,

618 key=lambda x: len(x.values))	624 key=lambda x: x.test_run_count)

619 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES:	625 revision_to_retest._do_test()

620 revision_to_retest.retest()	626

621 else:

622 break

623 self._set_insufficient_confidence_warning()	627 self._set_insufficient_confidence_warning()

624 return False	628 return False

625	629

626	630

627 def get_exception(self):	631 def get_exception(self):

628 raise NotImplementedError() # pragma: no cover	632 raise NotImplementedError() # pragma: no cover

629 # TODO: should return an exception with the details of the failure.	633 # TODO: should return an exception with the details of the failure.

630	634

631 def _set_insufficient_confidence_warning(	635 def _set_insufficient_confidence_warning(

632 self): # pragma: no cover	636 self): # pragma: no cover

633 """Adds a warning about the lack of initial regression confidence."""	637 """Adds a warning about the lack of initial regression confidence."""

634 self.failed_initial_confidence = True	638 self.failed_initial_confidence = True

635 self.surface_result('LO_INIT_CONF')	639 self.surface_result('LO_INIT_CONF')

636 self.warnings.append(	640 self.warnings.append(

637 'Bisect failed to reproduce the regression with enough confidence.')	641 'Bisect failed to reproduce the regression with enough confidence.')

638	642

639 def _results_debug_message(self):	643 def _results_debug_message(self):

640 """Returns a string with values used to debug a bisect result."""	644 """Returns a string with values used to debug a bisect result."""

641 result = 'bisector.lkgr: %r\n' % self.lkgr	645 result = 'bisector.lkgr: %r\n' % self.lkgr

642 result += 'bisector.fkbr: %r\n\n' % self.fkbr	646 result += 'bisector.fkbr: %r\n\n' % self.fkbr

643 result += self._revision_value_table()	647 result += self._revision_value_table()

644 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values):	648 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and

645 result += '\n' + self._t_test_results()	649 self.fkbr.test_run_count):

	650 result += '\n' + '\n'.join([

	651 'LKGR values: %r' % list(self.lkgr.debug_values),

	652 'FKBR values: %r' % list(self.fkbr.debug_values),

	653 ])

646 return result	654 return result

647	655

648 def _revision_value_table(self):	656 def _revision_value_table(self):

649 """Returns a string table showing revisions and their values."""	657 """Returns a string table showing revisions and their values."""

650 header = [['Revision', 'Values']]	658 header = [['Revision', 'Values']]

651 rows = [[r.revision_string(), str(r.values)] for r in self.revisions]	659 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]

652 return self._pretty_table(header + rows)	660 return self._pretty_table(header + rows)

653	661

654 def _pretty_table(self, data):	662 def _pretty_table(self, data):

655 results = []	663 results = []

656 for row in data:	664 for row in data:

657 results.append('%-15s' * len(row) % tuple(row))	665 results.append('%-15s' * len(row) % tuple(row))

658 return '\n'.join(results)	666 return '\n'.join(results)

659	667

660 def _t_test_results(self):

661 """Returns a string showing t-test results for lkgr and fkbr."""

662 t, df, p = self.api.m.math_utils.welchs_t_test(

663 self.lkgr.values, self.fkbr.values)

664 lines = [

665 'LKGR values: %r' % self.lkgr.values,

666 'FKBR values: %r' % self.fkbr.values,

667 't-statistic: %r' % t,

668 'deg. of freedom: %r' % df,

669 'p-value: %r' % p,

670 'Confidence score: %r' % (100 * (1 - p))

671 ]

672 return '\n'.join(lines)

673

674 def print_result_debug_info(self):	668 def print_result_debug_info(self):

675 """Prints extra debug info at the end of the bisect process."""	669 """Prints extra debug info at the end of the bisect process."""

676 lines = self._results_debug_message().splitlines()	670 lines = self._results_debug_message().splitlines()

677 # If we emit a null step then add a log to it, the log should be kept	671 # If we emit a null step then add a log to it, the log should be kept

678 # longer than 7 days (which is often needed to debug some issues).	672 # longer than 7 days (which is often needed to debug some issues).

679 self.api.m.step('Debug Info', [])	673 self.api.m.step('Debug Info', [])

680 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines	674 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines

681	675

682 def post_result(self, halt_on_failure=False):	676 def post_result(self, halt_on_failure=False):

683 """Posts bisect results to Perf Dashboard."""	677 """Posts bisect results to Perf Dashboard."""

684 self.api.m.perf_dashboard.set_default_config()	678 self.api.m.perf_dashboard.set_default_config()

685 self.api.m.perf_dashboard.post_bisect_results(	679 self.api.m.perf_dashboard.post_bisect_results(

686 self.get_result(), halt_on_failure)	680 self.get_result(), halt_on_failure)

687	681

688 def get_revision_to_eval(self):	682 def get_revision_to_eval(self):

689 """Gets the next RevisionState object in the candidate range.	683 """Gets the next RevisionState object in the candidate range.

690	684

691 Returns:	685 Returns:

692 The next Revision object in a list.	686 The next Revision object in a list.

693 """	687 """

694 self._update_candidate_range()	688 self._update_candidate_range()

695 candidate_range = [revision for revision in	689 candidate_range = [revision for revision in

696 self.revisions[self.lkgr.list_index + 1:	690 self.revisions[self.lkgr.list_index + 1:

697 self.fkbr.list_index]	691 self.fkbr.list_index]

698 if not revision.tested and not revision.failed]	692 if not revision.failed]

699 if len(candidate_range) == 1:	693 if len(candidate_range) == 1:

700 return candidate_range[0]	694 return candidate_range[0]

701 if len(candidate_range) == 0:	695 if len(candidate_range) == 0:

702 return None	696 return None

703	697

704 default_revision = candidate_range[len(candidate_range) / 2]	698 default_revision = candidate_range[len(candidate_range) / 2]

705	699

706 with self.api.m.step.nest(	700 with self.api.m.step.nest(

707 'Wiggling revision ' + default_revision.revision_string()):	701 'Wiggling revision ' + default_revision.revision_string()):

708 # We'll search up to 25% of the range (in either direction) to try and	702 # We'll search up to 25% of the range (in either direction) to try and

(...skipping 22 matching lines...) Expand all Loading...
731 return False	725 return False

732	726

733 def check_bisect_finished(self, revision):	727 def check_bisect_finished(self, revision):

734 """Checks if this revision completes the bisection process.	728 """Checks if this revision completes the bisection process.

735	729

736 In this case 'finished' refers to finding one revision considered 'good'	730 In this case 'finished' refers to finding one revision considered 'good'

737 immediately preceding a revision considered 'bad' where the 'bad' revision	731 immediately preceding a revision considered 'bad' where the 'bad' revision

738 does not contain a DEPS change.	732 does not contain a DEPS change.

739 """	733 """

740 if (revision.bad and revision.previous_revision and	734 if (revision.bad and revision.previous_revision and

741 revision.previous_revision.good): # pragma: no cover	735 revision.previous_revision.good):

742 if revision.deps_change() and self._expand_deps_revisions(revision):	736 if revision.deps_change() and self._expand_deps_revisions(revision):

743 return False	737 return False

744 self.culprit = revision	738 self.culprit = revision

745 return True	739 return True

746 if (revision.good and revision.next_revision and	740 if (revision.good and revision.next_revision and

747 revision.next_revision.bad):	741 revision.next_revision.bad):

748 if (revision.next_revision.deps_change()	742 if (revision.next_revision.deps_change()

749 and self._expand_deps_revisions(revision.next_revision)):	743 and self._expand_deps_revisions(revision.next_revision)):

750 return False	744 return False

751 self.culprit = revision.next_revision	745 self.culprit = revision.next_revision

752 return True	746 return True

753 return False	747 # We'll never get here because revision adjacency is checked before this

754	748 # function is called.

755 def wait_for_all(self, revision_list):	749 assert False # pragma: no cover

756 """Waits for all revisions in list to finish."""

757 for r in revision_list:

758 self.wait_for(r)

759

760 def wait_for(self, revision, nest_check=True):

761 """Waits for the revision to finish its job."""

762 if nest_check and not self.flags.get(

763 'do_not_nest_wait_for_revision'): # pragma: no cover

764 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):

765 return self.wait_for(revision, nest_check=False)

766 while True:

767 revision.update_status()

768 if revision.in_progress:

769 self.api.m.python.inline(

770 'sleeping',

771 """

772 import sys

773 import time

774 time.sleep(20*60)

775 sys.exit(0)

776 """)

777 else:

778 break

779	750

780 def _update_candidate_range(self):	751 def _update_candidate_range(self):

781 """Updates lkgr and fkbr (last known good/first known bad) revisions.	752 """Updates lkgr and fkbr (last known good/first known bad) revisions.

782	753

783 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in	754 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in

784 bisectors.revisions."""	755 bisectors.revisions."""

785 for r in self.revisions:	756 for r in self.revisions:

786 if r.tested:	757 if r.test_run_count:

787 if r.good:	758 if r.good:

788 self.lkgr = r	759 self.lkgr = r

789 elif r.bad:	760 elif r.bad:

790 self.fkbr = r	761 self.fkbr = r

791 break	762 break

792 assert self.lkgr and self.fkbr	763 assert self.lkgr and self.fkbr

793	764

794 def get_perf_tester_name(self):	765 def get_perf_tester_name(self):

795 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.	766 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.

796	767

(...skipping 17 matching lines...) Expand all Loading...
814	785

815 # TODO(prasadv): Refactor this code to remove hard coded values.	786 # TODO(prasadv): Refactor this code to remove hard coded values.

816 bot_name = self.get_perf_tester_name()	787 bot_name = self.get_perf_tester_name()

817 if 'win' in bot_name:	788 if 'win' in bot_name:

818 if any(b in bot_name for b in ['x64', 'gpu']):	789 if any(b in bot_name for b in ['x64', 'gpu']):

819 return 'winx64_bisect_builder'	790 return 'winx64_bisect_builder'

820 return 'win_perf_bisect_builder'	791 return 'win_perf_bisect_builder'

821	792

822 # TODO(prasadv): Refactor this code to remove hard coded values and use	793 # TODO(prasadv): Refactor this code to remove hard coded values and use

823 # target_bit from the bot config. crbug.com/640287	794 # target_bit from the bot config. crbug.com/640287

824 if 'android' in bot_name:	795 if 'android' in bot_name: # pragma: no cover

825 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):	796 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):

826 return 'android_arm64_perf_bisect_builder'	797 return 'android_arm64_perf_bisect_builder'

827 return 'android_perf_bisect_builder'	798 return 'android_perf_bisect_builder'

828	799

829 if 'mac' in bot_name:	800 if 'mac' in bot_name:

830 return 'mac_perf_bisect_builder'	801 return 'mac_perf_bisect_builder'

831	802

832 return 'linux_perf_bisect_builder'	803 return 'linux_perf_bisect_builder'

833	804

834 def get_platform_gs_prefix(self):	805 def get_platform_gs_prefix(self):

835 """Returns the prefix of a GS URL where a build can be found.	806 """Returns the prefix of a GS URL where a build can be found.

836	807

837 This prefix includes the schema, bucket, directory and beginning	808 This prefix includes the schema, bucket, directory and beginning

838 of filename. It is joined together with the part of the filename	809 of filename. It is joined together with the part of the filename

839 that includes the revision and the file extension to form the	810 that includes the revision and the file extension to form the

840 full GS URL.	811 full GS URL.

841 """	812 """

842 if self.api.buildurl_gs_prefix: # pragma: no cover	813 if self.api.buildurl_gs_prefix: # pragma: no cover

843 return self.api.buildurl_gs_prefix	814 return self.api.buildurl_gs_prefix

844	815

845 # TODO(prasadv): Refactor this code to remove hard coded values.	816 # TODO(prasadv): Refactor this code to remove hard coded values.

846 bot_name = self.get_perf_tester_name()	817 bot_name = self.get_perf_tester_name()

847 if 'win' in bot_name:	818 if 'win' in bot_name:

848 if any(b in bot_name for b in ['x64', 'gpu']):	819 if any(b in bot_name for b in ['x64', 'gpu']):

849 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_'	820 return 'gs://chrome-perf/Win x64 Builder/full-build-win32_'

850 return 'gs://chrome-perf/Win Builder/full-build-win32_'	821 return 'gs://chrome-perf/Win Builder/full-build-win32_'

851	822

852 # TODO(prasadv): Refactor this code to remove hard coded values and use	823 # TODO(prasadv): Refactor this code to remove hard coded values and use

853 # target_bit from the bot config. crbug.com/640287	824 # target_bit from the bot config. crbug.com/640287

854 if 'android' in bot_name:	825 if 'android' in bot_name: #pragma: no cover

855 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):	826 if any(b in bot_name for b in ['arm64', 'nexus9', 'nexus5X']):

856 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_'	827 return 'gs://chrome-perf/Android arm64 Builder/full-build-linux_'

857 return 'gs://chrome-perf/Android Builder/full-build-linux_'	828 return 'gs://chrome-perf/Android Builder/full-build-linux_'

858	829

859 if 'mac' in bot_name:	830 if 'mac' in bot_name:

860 return 'gs://chrome-perf/Mac Builder/full-build-mac_'	831 return 'gs://chrome-perf/Mac Builder/full-build-mac_'

861	832

862 return 'gs://chrome-perf/Linux Builder/full-build-linux_'	833 return 'gs://chrome-perf/Linux Builder/full-build-linux_'

863	834

864 def ensure_sync_master_branch(self):	835 def ensure_sync_master_branch(self):

865 """Make sure the local master is in sync with the fetched origin/master.	836 """Make sure the local master is in sync with the fetched origin/master.

866	837

867 We have seen on several occasions that the local master branch gets reset	838 We have seen on several occasions that the local master branch gets reset

868 to previous revisions and also detached head states. Running this should	839 to previous revisions and also detached head states. Running this should

869 take care of either situation.	840 take care of either situation.

870 """	841 """

871 # TODO(robertocn): Investigate what causes the states mentioned in the	842 # TODO(robertocn): Investigate what causes the states mentioned in the

872 # docstring in the first place.	843 # docstring in the first place.

873 self.api.m.git('update-ref', 'refs/heads/master',	844 self.api.m.git('update-ref', 'refs/heads/master',

874 'refs/remotes/origin/master')	845 'refs/remotes/origin/master')

875 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout'])	846 self.api.m.git('checkout', 'master', cwd=self.api.m.path['checkout'])

876	847

877 def is_return_code_mode(self):	848 def is_return_code_mode(self):

878 """Checks whether this is a bisect on the test's exit code."""	849 """Checks whether this is a bisect on the test's exit code."""

879 return self.bisect_config.get('test_type') == 'return_code'	850 return self.test_type == 'return_code'

880	851

881 def surface_result(self, result_string):	852 def surface_result(self, result_string):

882 assert result_string in VALID_RESULT_CODES	853 assert result_string in VALID_RESULT_CODES

883 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).	854 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).

884 result_code = prefix + result_string	855 result_code = prefix + result_string

885 assert len(result_code) <= 20	856 assert len(result_code) <= 20

886 if result_code not in self.result_codes:	857 if result_code not in self.result_codes:

887 self.result_codes.add(result_code)	858 self.result_codes.add(result_code)

888 properties = self.api.m.step.active_result.presentation.properties	859 properties = self.api.m.step.active_result.presentation.properties

889 properties['extra_result_code'] = sorted(self.result_codes)	860 properties['extra_result_code'] = sorted(self.result_codes)

890	861

891 def get_result(self):	862 def get_result(self):

892 """Returns the results as a jsonable object."""	863 """Returns the results as a jsonable object."""

893 config = self.bisect_config	864 config = self.bisect_config

894 results_confidence = 0

895 if self.culprit:

896 results_confidence = self.api.m.math_utils.confidence_score(

897 self.lkgr.values, self.fkbr.values)

898	865

899 if self.failed:	866 if self.failed:

900 status = 'failed'	867 status = 'failed'

901 elif self.bisect_over:	868 elif self.bisect_over:

902 status = 'completed'	869 status = 'completed'

903 else:	870 else:

904 status = 'started'	871 status = 'started'

905	872

906 aborted_reason = None	873 aborted_reason = None

907 if self.failed_initial_confidence:	874 if self.failed_initial_confidence:

908 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON	875 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON

909 elif self.failed_direction:	876 elif self.failed_direction:

910 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON	877 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON

911 return {	878 return {

912 'try_job_id': config.get('try_job_id'),	879 'try_job_id': config.get('try_job_id'),

913 'bug_id': config.get('bug_id'),	880 'bug_id': config.get('bug_id'),

914 'status': status,	881 'status': status,

915 'buildbot_log_url': self._get_build_url(),	882 'buildbot_log_url': self._get_build_url(),

916 'bisect_bot': self.get_perf_tester_name(),	883 'bisect_bot': self.get_perf_tester_name(),

917 'command': config['command'],	884 'command': config['command'],

918 'test_type': config['test_type'],	885 'test_type': config['test_type'],

919 'metric': config['metric'],	886 'metric': config.get('metric'),

920 'change': self.relative_change,	887 'change': self.relative_change,

921 'score': results_confidence,

922 'good_revision': self.good_rev.commit_hash,	888 'good_revision': self.good_rev.commit_hash,

923 'bad_revision': self.bad_rev.commit_hash,	889 'bad_revision': self.bad_rev.commit_hash,

924 'warnings': self.warnings,	890 'warnings': self.warnings,

925 'aborted_reason': aborted_reason,	891 'aborted_reason': aborted_reason,

926 'culprit_data': self._culprit_data(),	892 'culprit_data': self._culprit_data(),

927 'revision_data': self._revision_data()	893 'revision_data': self._revision_data()

928 }	894 }

929	895

930 def _culprit_data(self):	896 def _culprit_data(self):

931 culprit = self.culprit	897 culprit = self.culprit

(...skipping 11 matching lines...) Expand all Loading...
943 'email': culprit_info['email'],	909 'email': culprit_info['email'],

944 'cl_date': culprit_info['date'],	910 'cl_date': culprit_info['date'],

945 'commit_info': culprit_info['body'],	911 'commit_info': culprit_info['body'],

946 'revisions_links': [],	912 'revisions_links': [],

947 'cl': culprit.commit_hash	913 'cl': culprit.commit_hash

948 }	914 }

949	915

950 def _revision_data(self):	916 def _revision_data(self):

951 revision_rows = []	917 revision_rows = []

952 for r in self.revisions:	918 for r in self.revisions:

953 if r.tested or r.aborted:	919 if r.test_run_count:

954 revision_rows.append({	920 revision_rows.append({

955 'depot_name': r.depot_name,	921 'depot_name': r.depot_name,

956 'commit_hash': r.commit_hash,	922 'commit_hash': r.commit_hash,

957 'revision_string': r.revision_string(),	923 'revision_string': r.revision_string(),

958 'mean_value': r.mean_value,	924 'mean_value': r.mean,

959 'std_dev': r.std_dev,	925 'std_dev': r.std_dev,

960 'values': r.values,	926 'values': r.debug_values,

961 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',	927 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',

962 })	928 })

963 return revision_rows	929 return revision_rows

964	930

965 def _get_build_url(self):	931 def _get_build_url(self):

966 properties = self.api.m.properties	932 properties = self.api.m.properties

967 bot_url = properties.get('buildbotURL',	933 bot_url = properties.get('buildbotURL',

968 'http://build.chromium.org/p/chromium/')	934 'http://build.chromium.org/p/chromium/')

969 builder_name = urllib.quote(properties.get('buildername', ''))	935 builder_name = urllib.quote(properties.get('buildername', ''))

970 builder_number = str(properties.get('buildnumber', ''))	936 builder_number = str(properties.get('buildnumber', ''))

971 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)	937 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)

OLD	NEW