scripts/slave/recipe_modules/auto_bisect/bisector.py - Issue 2247373002: Refactor stages 1, 2 and test_api overhaul.

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Removing debug prints. Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import re	6 import re

7 import time	7 import time

8 import urllib	8 import urllib

9	9

10 from . import config_validation	10 from . import config_validation

(...skipping 29 matching lines...) Expand all Loading...
40 )	40 )

41	41

42 # When we look for the next revision to build, we search nearby revisions	42 # When we look for the next revision to build, we search nearby revisions

43 # looking for a revision that's already been archived. Since we don't want	43 # looking for a revision that's already been archived. Since we don't want

44 # to move too far from the original revision, we'll cap the search at 25%.	44 # to move too far from the original revision, we'll cap the search at 25%.

45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25	45 DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25

46	46

47 # How long to re-test the initial good-bad range for until significant	47 # How long to re-test the initial good-bad range for until significant

48 # difference is established.	48 # difference is established.

49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60	49 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60

50 # If we reach this number of samples on the reference range and have not

51 # achieved statistical significance, bail.

52 MAX_REQUIRED_SAMPLES = 15

53

54 # Significance level to use for determining difference between revisions via

55 # hypothesis testing.

56 SIGNIFICANCE_LEVEL = 0.01

57	50

58 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (	51 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (

59 'The metric values for the initial "good" and "bad" revisions '	52 'The metric values for the initial "good" and "bad" revisions '

60 'do not represent a clear regression.')	53 'do not represent a clear regression.')

61	54

62 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (	55 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (

63 'The metric values for the initial "good" and "bad" revisions match the '	56 'The metric values for the initial "good" and "bad" revisions match the '

64 'expected direction of improvement. Thus, likely represent an improvement '	57 'expected direction of improvement. Thus, likely represent an improvement '

65 'and not a regression.')	58 'and not a regression.')

66	59

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148	141

149 Returns:	142 Returns:

150 A 40-digit git commit hash string.	143 A 40-digit git commit hash string.

151 """	144 """

152 if self._is_sha1(rev): # pragma: no cover	145 if self._is_sha1(rev): # pragma: no cover

153 return rev	146 return rev

154 if rev.isdigit():	147 if rev.isdigit():

155 commit_position = self._api.m.commit_position.construct(	148 commit_position = self._api.m.commit_position.construct(

156 branch='refs/heads/master', value=rev)	149 branch='refs/heads/master', value=rev)

157 try:	150 try:

158 return self._api.m.crrev.to_commit_hash(commit_position)	151 return self._api.m.crrev.to_commit_hash(

	152 commit_position,

	153 step_test_data=lambda: self.api._test_data['hash_cp_map'][rev])

159 except self.api.m.step.StepFailure: # pragma: no cover	154 except self.api.m.step.StepFailure: # pragma: no cover

160 self.surface_result('BAD_REV')	155 self.surface_result('BAD_REV')

161 raise	156 raise

162 self.surface_result('BAD_REV') # pragma: no cover	157 self.surface_result('BAD_REV') # pragma: no cover

163 raise self.api.m.step.StepFailure(	158 raise self.api.m.step.StepFailure(

164 'Invalid input revision: %r' % (rev,)) # pragma: no cover	159 'Invalid input revision: %r' % (rev,)) # pragma: no cover

165	160

166 @staticmethod	161 @staticmethod

167 def _is_sha1(s):	162 def _is_sha1(s):

168 return bool(re.match('^[0-9A-Fa-f]{40}$', s))	163 return bool(re.match('^[0-9A-Fa-f]{40}$', s))

169	164

170 def significantly_different(	165 def compare_revisions(self, revision_a, revision_b):

171 self, list_a, list_b,	166 """

172 significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover	167 Returns:

173 """Uses an external script to run hypothesis testing with scipy.	168 True if the samples are significantly different.

	169 None if there is not enough data to tell.

	170 False if there's enough data but still can't tell the samples apart.

	171 """

	172 output_format = 'chartjson'

	173 values_a = revision_a.chartjson_paths

	174 values_b = revision_b.chartjson_paths

	175 if revision_a.valueset_paths and revision_b.valueset_paths:

	176 output_format = 'valueset'

174	177

175 The reason why we need an external script is that scipy is not available to	178 result = self.api.stat_compare(

176 the default python installed in all platforms. We instead rely on an	179 values_a,

177 anaconda environment to provide those packages.	180 values_b,

	181 self.bisect_config['metric'],

	182 output_format=output_format,

	183 step_test_data=lambda: self.api.test_api.compare_samples_data(

	184 self.api._test_data.get('revision_data'), revision_a, revision_b))

178	185

179 Args:	186 revision_a.debug_values = result['sample_a']['debug_values']

180 list_a, list_b: Two lists representing samples to be compared.	187 revision_b.debug_values = result['sample_b']['debug_values']

181 significance_level: Self-describing. As a decimal fraction.	188 revision_a.mean = result['sample_a']['mean']
	RobertoCN 2016/08/23 00:26:24 Make mean and std_dev properties of revision_state Make mean and std_dev properties of revision_state and generate the values on the fly RobertoCN 2016/09/07 00:33:24 Done. Show quoted text On 2016/08/23 00:26:24, RobertoCN wrote: > Make mean and std_dev properties of revision_state and generate the values on > the fly Done.
	189 revision_b.mean = result['sample_b']['mean']

	190 revision_a.std_dev = result['sample_a']['std_dev']

	191 revision_b.std_dev = result['sample_b']['std_dev']

182	192

183 Returns:	193 if result['result'] == 'needMoreData':
	RobertoCN 2016/08/23 00:26:24 Make 3 constants for true, false and needMore. Make 3 constants for true, false and needMore. RobertoCN 2016/09/07 00:33:24 Done. Show quoted text On 2016/08/23 00:26:24, RobertoCN wrote: > Make 3 constants for true, false and needMore. Done.
184 A boolean indicating whether the null hypothesis ~(that the lists are	194 return None

185 samples from the same population) can be rejected at the specified	195 return bool(result['result'])

186 significance level.

187 """

188 step_result = self.api.m.python(

189 'Checking sample difference',

190 self.api.resource('significantly_different.py'),

191 [json.dumps(list_a), json.dumps(list_b), str(significance_level)],

192 stdout=self.api.m.json.output())

193 results = step_result.stdout

194 if results is None:

195 assert self.dummy_builds

196 return True

197 significantly_different = results['significantly_different']

198 step_result.presentation.logs[str(significantly_different)] = [

199 'See json.output for details']

200 return significantly_different

201	196

202 def config_step(self):	197 def config_step(self):

203 """Yields a step that prints the bisect config."""	198 """Yields a step that prints the bisect config."""

204 api = self.api	199 api = self.api

205	200

206 # bisect_config may come as a FrozenDict (which is not serializable).	201 # bisect_config may come as a FrozenDict (which is not serializable).

207 bisect_config = dict(self.bisect_config)	202 bisect_config = dict(self.bisect_config)

208	203

209 def fix_windows_backslashes(s):	204 def fix_windows_backslashes(s):

210 backslash_regex = re.compile(r'(?<!\\)\$?!\$')	205 backslash_regex = re.compile(r'(?<!\\)\$?!\$')

(...skipping 17 matching lines...) Expand all Loading...
228 except config_validation.ValidationFail as error:	223 except config_validation.ValidationFail as error:

229 self.surface_result('BAD_CONFIG')	224 self.surface_result('BAD_CONFIG')

230 self.api.m.halt(error.message)	225 self.api.m.halt(error.message)

231 raise self.api.m.step.StepFailure(error.message)	226 raise self.api.m.step.StepFailure(error.message)

232	227

233 @property	228 @property

234 def api(self):	229 def api(self):

235 return self._api	230 return self._api

236	231

237 def compute_relative_change(self):	232 def compute_relative_change(self):

238 old_value = float(self.good_rev.mean_value)	233 old_value = float(self.good_rev.mean)

239 new_value = float(self.bad_rev.mean_value)	234 new_value = float(self.bad_rev.mean)

240	235

241 if new_value and not old_value: # pragma: no cover	236 if new_value and not old_value: # pragma: no cover

242 self.relative_change = ZERO_TO_NON_ZERO	237 self.relative_change = ZERO_TO_NON_ZERO

243 return	238 return

244	239

245 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)	240 rel_change = self.api.m.math_utils.relative_change(old_value, new_value)

246 self.relative_change = '%.2f%%' % (100 * rel_change)	241 self.relative_change = '%.2f%%' % (100 * rel_change)

247	242

248 def make_deps_sha_file(self, deps_sha):	243 def make_deps_sha_file(self, deps_sha):

249 """Make a diff patch that creates DEPS.sha.	244 """Make a diff patch that creates DEPS.sha.

(...skipping 16 matching lines...) Expand all Loading...
266 file is to be written to.	261 file is to be written to.

267 commit_hash (str): An identifier for the step.	262 commit_hash (str): An identifier for the step.

268	263

269 Returns:	264 Returns:

270 A string containing the hash of the interned object.	265 A string containing the hash of the interned object.

271 """	266 """

272 cmd = 'hash-object -t blob -w --stdin'.split(' ')	267 cmd = 'hash-object -t blob -w --stdin'.split(' ')

273 stdin = self.api.m.raw_io.input(file_contents)	268 stdin = self.api.m.raw_io.input(file_contents)

274 stdout = self.api.m.raw_io.output()	269 stdout = self.api.m.raw_io.output()

275 step_name = 'Hashing modified DEPS file with revision ' + commit_hash	270 step_name = 'Hashing modified DEPS file with revision ' + commit_hash

276 step_result = self.api.m.git(*cmd, cwd=cwd, stdin=stdin, stdout=stdout,	271 step_result = self.api.m.git(

277 name=step_name)	272 *cmd, cwd=cwd, stdin=stdin, stdout=stdout, name=step_name,

	273 step_test_data=lambda:

	274 self.api.m.raw_io.test_api.stream_output(commit_hash))

278 hash_string = step_result.stdout.splitlines()[0]	275 hash_string = step_result.stdout.splitlines()[0]

279 try:	276 try:

280 if hash_string:	277 if hash_string:

281 int(hash_string, 16)	278 int(hash_string, 16)

282 return hash_string	279 return hash_string

283 except ValueError: # pragma: no cover	280 except ValueError: # pragma: no cover

284 reason = 'Git did not output a valid hash for the interned file.'	281 reason = 'Git did not output a valid hash for the interned file.'

285 self.api.m.halt(reason)	282 self.api.m.halt(reason)

286 raise self.api.m.step.StepFailure(reason)	283 raise self.api.m.step.StepFailure(reason)

287	284

(...skipping 14 matching lines...) Expand all Loading...
302 Returns:	299 Returns:

303 A string containing the diff patch as produced by the 'git diff' command.	300 A string containing the diff patch as produced by the 'git diff' command.

304 """	301 """

305 # The prefixes used in the command below are used to find and replace the	302 # The prefixes used in the command below are used to find and replace the

306 # tree-ish git object id's on the diff output more easily.	303 # tree-ish git object id's on the diff output more easily.

307 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'	304 cmd = 'diff %s %s --src-prefix=IAMSRC: --dst-prefix=IAMDST:'

308 cmd %= (git_object_a, git_object_b)	305 cmd %= (git_object_a, git_object_b)

309 cmd = cmd.split(' ')	306 cmd = cmd.split(' ')

310 stdout = self.api.m.raw_io.output()	307 stdout = self.api.m.raw_io.output()

311 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)	308 step_name = 'Generating patch for %s to %s' % (git_object_a, deps_rev)

312 step_result = self.api.m.git(*cmd, cwd=cwd, stdout=stdout, name=step_name)	309 step_result = self.api.m.git(

	310 *cmd, cwd=cwd, stdout=stdout, name=step_name,

	311 step_test_data=lambda: self.api._test_data['diff_patch'])

313 patch_text = step_result.stdout	312 patch_text = step_result.stdout

314 src_string = 'IAMSRC:' + git_object_a	313 src_string = 'IAMSRC:' + git_object_a

315 dst_string = 'IAMDST:' + git_object_b	314 dst_string = 'IAMDST:' + git_object_b

316 patch_text = patch_text.replace(src_string, src_alias)	315 patch_text = patch_text.replace(src_string, src_alias)

317 patch_text = patch_text.replace(dst_string, dst_alias)	316 patch_text = patch_text.replace(dst_string, dst_alias)

318 return patch_text	317 return patch_text

319	318

320 def make_deps_patch(self, base_revision, base_file_contents,	319 def make_deps_patch(self, base_revision, base_file_contents,

321 depot, new_commit_hash):	320 depot, new_commit_hash):

322 """Make a diff patch that updates a specific dependency revision.	321 """Make a diff patch that updates a specific dependency revision.

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
365 interned_deps_hash, deps_file, deps_file,	364 interned_deps_hash, deps_file, deps_file,

366 cwd=cwd,	365 cwd=cwd,

367 deps_rev=new_commit_hash)	366 deps_rev=new_commit_hash)

368 return patch_text, patched_contents	367 return patch_text, patched_contents

369	368

370 def _expand_initial_revision_range(self):	369 def _expand_initial_revision_range(self):

371 """Sets the initial contents of \|self.revisions\|."""	370 """Sets the initial contents of \|self.revisions\|."""

372 with self.api.m.step.nest('Expanding revision range'):	371 with self.api.m.step.nest('Expanding revision range'):

373 good_hash = self.good_rev.commit_hash	372 good_hash = self.good_rev.commit_hash

374 bad_hash = self.bad_rev.commit_hash	373 bad_hash = self.bad_rev.commit_hash

	374 depot = self.good_rev.depot_name

375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)	375 step_name = 'for revisions %s:%s' % (good_hash, bad_hash)

376 revisions = self._revision_range(	376 revisions = self._revision_range(

377 start=good_hash,	377 start=good_hash,

378 end=bad_hash,	378 end=bad_hash,

379 depot_name=self.base_depot,	379 depot_name=self.base_depot,

380 step_name=step_name)	380 step_name=step_name,

	381 step_test_data=lambda: self.api._test_data['revision_list'][depot]

	382 )

381 self.revisions = [self.good_rev] + revisions + [self.bad_rev]	383 self.revisions = [self.good_rev] + revisions + [self.bad_rev]

382 self._update_revision_list_indexes()	384 self._update_revision_list_indexes()

383	385

384 def _revision_range(self, start, end, depot_name, base_revision=None,	386 def _revision_range(self, start, end, depot_name, base_revision=None,

385 step_name=None):	387 step_name=None, **kwargs):

386 """Returns a list of RevisionState objects between \|start\| and \|end\|.	388 """Returns a list of RevisionState objects between \|start\| and \|end\|.

387	389

388 Args:	390 Args:

389 start (str): Start commit hash.	391 start (str): Start commit hash.

390 end (str): End commit hash.	392 end (str): End commit hash.

391 depot_name (str): Short string name of repo, e.g. chromium or v8.	393 depot_name (str): Short string name of repo, e.g. chromium or v8.

392 base_revision (str): Base revision in the downstream repo (e.g. chromium).	394 base_revision (str): Base revision in the downstream repo (e.g. chromium).

393 step_name (str): Optional step name.	395 step_name (str): Optional step name.

394	396

395 Returns:	397 Returns:

396 A list of RevisionState objects, not including the given start or end.	398 A list of RevisionState objects, not including the given start or end.

397 """	399 """

398 if self.internal_bisect: # pragma: no cover	400 if self.internal_bisect: # pragma: no cover

399 return self._revision_range_with_gitiles(	401 return self._revision_range_with_gitiles(

400 start, end, depot_name, base_revision, step_name)	402 start, end, depot_name, base_revision, step_name)

401 try:	403 try:

402 step_result = self.api.m.python(	404 step_result = self.api.m.python(

403 step_name,	405 step_name,

404 self.api.resource('fetch_intervening_revisions.py'),	406 self.api.resource('fetch_intervening_revisions.py'),

405 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],	407 [start, end, depot_config.DEPOT_DEPS_NAME[depot_name]['url']],

406 stdout=self.api.m.json.output())	408 stdout=self.api.m.json.output(), **kwargs)

407 except self.api.m.step.StepFailure: # pragma: no cover	409 except self.api.m.step.StepFailure: # pragma: no cover

408 self.surface_result('BAD_REV')	410 self.surface_result('BAD_REV')

409 raise	411 raise

410 revisions = []	412 revisions = []

411 for commit_hash, _ in step_result.stdout:	413 for commit_hash, _ in step_result.stdout:

412 revisions.append(self.revision_class(	414 revisions.append(self.revision_class(

413 bisector=self,	415 bisector=self,

414 commit_hash=commit_hash,	416 commit_hash=commit_hash,

415 depot_name=depot_name,	417 depot_name=depot_name,

416 base_revision=base_revision))	418 base_revision=base_revision))

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
502 dep_revision_max = max_revision.deps[depot_name]	504 dep_revision_max = max_revision.deps[depot_name]

503 if (dep_revision_min and dep_revision_max and	505 if (dep_revision_min and dep_revision_max and

504 dep_revision_min != dep_revision_max):	506 dep_revision_min != dep_revision_max):

505 step_name = ('Expanding revision range for revision %s'	507 step_name = ('Expanding revision range for revision %s'

506 ' on depot %s' % (dep_revision_max, depot_name))	508 ' on depot %s' % (dep_revision_max, depot_name))

507 rev_list = self._revision_range(	509 rev_list = self._revision_range(

508 start=dep_revision_min,	510 start=dep_revision_min,

509 end=dep_revision_max,	511 end=dep_revision_max,

510 depot_name=depot_name,	512 depot_name=depot_name,

511 base_revision=min_revision,	513 base_revision=min_revision,

512 step_name=step_name)	514 step_name=step_name,

	515 step_test_data=lambda:

	516 self.api._test_data['revision_list'][depot_name])

513 new_revisions = self.revisions[:max_revision.list_index]	517 new_revisions = self.revisions[:max_revision.list_index]

514 new_revisions += rev_list	518 new_revisions += rev_list

515 new_revisions += self.revisions[max_revision.list_index:]	519 new_revisions += self.revisions[max_revision.list_index:]

516 self.revisions = new_revisions	520 self.revisions = new_revisions

517 self._update_revision_list_indexes()	521 self._update_revision_list_indexes()

518 return True	522 return True

519 except RuntimeError: # pragma: no cover	523 except RuntimeError: # pragma: no cover

520 warning_text = ('Could not expand dependency revisions for ' +	524 warning_text = ('Could not expand dependency revisions for ' +

521 revision_to_expand.commit_hash)	525 revision_to_expand.commit_hash)

522 self.surface_result('BAD_REV')	526 self.surface_result('BAD_REV')

(...skipping 16 matching lines...) Expand all Loading...
539	543

540 The change between the test results obtained for the given 'good' and	544 The change between the test results obtained for the given 'good' and

541 'bad' revisions is expected to be considered a regression. The	545 'bad' revisions is expected to be considered a regression. The

542 `improvement_direction` attribute is positive if a larger number is	546 `improvement_direction` attribute is positive if a larger number is

543 considered better, and negative if a smaller number is considered better.	547 considered better, and negative if a smaller number is considered better.

544	548

545 Returns:	549 Returns:

546 True if the check passes (i.e. no problem), False if the change is not	550 True if the check passes (i.e. no problem), False if the change is not

547 a regression according to the improvement direction.	551 a regression according to the improvement direction.

548 """	552 """

549 good = self.good_rev.mean_value	553 good = self.good_rev.mean

550 bad = self.bad_rev.mean_value	554 bad = self.bad_rev.mean

551	555

552 if self.is_return_code_mode():	556 if self.is_return_code_mode():

553 return True	557 return True

554	558

555 direction = self.improvement_direction	559 direction = self.improvement_direction

556 if direction is None:	560 if direction is None:

557 return True	561 return True

558 if (bad > good and direction > 0) or (bad < good and direction < 0):	562 if (bad > good and direction > 0) or (bad < good and direction < 0):

559 self._set_failed_direction_results()	563 self._set_failed_direction_results()

560 return False	564 return False

(...skipping 18 matching lines...) Expand all Loading...
579 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.	583 or REGRESSION_CHECK_TIMEOUT seconds have elapsed.

580	584

581 Returns: True if the revisions produced results that differ from each	585 Returns: True if the revisions produced results that differ from each

582 other in a statistically significant manner. False if such difference could	586 other in a statistically significant manner. False if such difference could

583 not be established in the time or sample size allowed.	587 not be established in the time or sample size allowed.

584 """	588 """

585 if self.test_type == 'return_code':	589 if self.test_type == 'return_code':

586 return (self.good_rev.overall_return_code !=	590 return (self.good_rev.overall_return_code !=

587 self.bad_rev.overall_return_code)	591 self.bad_rev.overall_return_code)

588	592

589 if self.bypass_stats_check:	593 if self.bypass_stats_check:
	RobertoCN 2016/08/23 00:26:24 Remove this flag Remove this flag RobertoCN 2016/09/07 00:33:24 Acknowledged. Show quoted text On 2016/08/23 00:26:24, RobertoCN wrote: > Remove this flag Acknowledged.
590 dummy_result = self.good_rev.values != self.bad_rev.values	594 self.compare_revisions(self.good_rev, self.bad_rev)

	595 dummy_result = self.good_rev.mean != self.bad_rev.mean

591 if not dummy_result:	596 if not dummy_result:

592 self._set_insufficient_confidence_warning()	597 self._set_insufficient_confidence_warning()

593 return dummy_result	598 return dummy_result

594	599

	600 # TODO(robertocn): This step should not be necessary in some cases.

595 with self.api.m.step.nest('Re-testing reference range'):	601 with self.api.m.step.nest('Re-testing reference range'):

596 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT	602 expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT

597 while time.time() < expiration_time:	603 while time.time() < expiration_time:

598 if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5:	604 if (self.good_rev.test_run_count >= 5

599 if self.significantly_different(self.good_rev.values,	605 and self.bad_rev.test_run_count >= 5):

600 self.bad_rev.values):	606 if self.compare_revisions(self.good_rev, self.bad_rev):

601 return True	607 return True

602 if len(self.good_rev.values) == len(self.bad_rev.values):	608 if self.good_rev.test_run_count == self.bad_rev.test_run_count:

603 revision_to_retest = self.last_tested_revision	609 revision_to_retest = self.last_tested_revision

604 else:	610 else:

605 revision_to_retest = min(self.good_rev, self.bad_rev,	611 revision_to_retest = min(self.good_rev, self.bad_rev,

606 key=lambda x: len(x.values))	612 key=lambda x: x.test_run_count)

607 if len(revision_to_retest.values) < MAX_REQUIRED_SAMPLES:

608 revision_to_retest.retest()

609 else:

610 break

611 self._set_insufficient_confidence_warning()	613 self._set_insufficient_confidence_warning()

612 return False	614 return False

613	615

614	616

615 def get_exception(self):	617 def get_exception(self):

616 raise NotImplementedError() # pragma: no cover	618 raise NotImplementedError() # pragma: no cover

617 # TODO: should return an exception with the details of the failure.	619 # TODO: should return an exception with the details of the failure.

618	620

619 def _set_insufficient_confidence_warning(	621 def _set_insufficient_confidence_warning(

620 self): # pragma: no cover	622 self): # pragma: no cover

621 """Adds a warning about the lack of initial regression confidence."""	623 """Adds a warning about the lack of initial regression confidence."""

622 self.failed_initial_confidence = True	624 self.failed_initial_confidence = True

623 self.surface_result('LO_INIT_CONF')	625 self.surface_result('LO_INIT_CONF')

624 self.warnings.append(	626 self.warnings.append(

625 'Bisect failed to reproduce the regression with enough confidence.')	627 'Bisect failed to reproduce the regression with enough confidence.')

626	628

627 def _results_debug_message(self):	629 def _results_debug_message(self):

628 """Returns a string with values used to debug a bisect result."""	630 """Returns a string with values used to debug a bisect result."""

629 result = 'bisector.lkgr: %r\n' % self.lkgr	631 result = 'bisector.lkgr: %r\n' % self.lkgr

630 result += 'bisector.fkbr: %r\n\n' % self.fkbr	632 result += 'bisector.fkbr: %r\n\n' % self.fkbr

631 result += self._revision_value_table()	633 result += self._revision_value_table()

632 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values):	634 if (self.lkgr and self.lkgr.test_run_count and self.fkbr and

633 result += '\n' + self._t_test_results()	635 self.fkbr.test_run_count):

	636 result += '\n' + '\n'.join([

	637 'LKGR values: %r' % list(self.lkgr.debug_values),

	638 'FKBR values: %r' % list(self.fkbr.debug_values),

	639 ])

634 return result	640 return result

635	641

636 def _revision_value_table(self):	642 def _revision_value_table(self):

637 """Returns a string table showing revisions and their values."""	643 """Returns a string table showing revisions and their values."""

638 header = [['Revision', 'Values']]	644 header = [['Revision', 'Values']]

639 rows = [[r.revision_string(), str(r.values)] for r in self.revisions]	645 rows = [[r.revision_string(), str(r.debug_values)] for r in self.revisions]

640 return self._pretty_table(header + rows)	646 return self._pretty_table(header + rows)

641	647

642 def _pretty_table(self, data):	648 def _pretty_table(self, data):

643 results = []	649 results = []

644 for row in data:	650 for row in data:

645 results.append('%-15s' * len(row) % tuple(row))	651 results.append('%-15s' * len(row) % tuple(row))

646 return '\n'.join(results)	652 return '\n'.join(results)

647	653

648 def _t_test_results(self):

649 """Returns a string showing t-test results for lkgr and fkbr."""

650 t, df, p = self.api.m.math_utils.welchs_t_test(

651 self.lkgr.values, self.fkbr.values)

652 lines = [

653 'LKGR values: %r' % self.lkgr.values,

654 'FKBR values: %r' % self.fkbr.values,

655 't-statistic: %r' % t,

656 'deg. of freedom: %r' % df,

657 'p-value: %r' % p,

658 'Confidence score: %r' % (100 * (1 - p))

659 ]

660 return '\n'.join(lines)

661

662 def print_result_debug_info(self):	654 def print_result_debug_info(self):

663 """Prints extra debug info at the end of the bisect process."""	655 """Prints extra debug info at the end of the bisect process."""

664 lines = self._results_debug_message().splitlines()	656 lines = self._results_debug_message().splitlines()

665 # If we emit a null step then add a log to it, the log should be kept	657 # If we emit a null step then add a log to it, the log should be kept

666 # longer than 7 days (which is often needed to debug some issues).	658 # longer than 7 days (which is often needed to debug some issues).

667 self.api.m.step('Debug Info', [])	659 self.api.m.step('Debug Info', [])

668 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines	660 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines

669	661

670 def post_result(self, halt_on_failure=False):	662 def post_result(self, halt_on_failure=False):

671 """Posts bisect results to Perf Dashboard."""	663 """Posts bisect results to Perf Dashboard."""

672 self.api.m.perf_dashboard.set_default_config()	664 self.api.m.perf_dashboard.set_default_config()

673 self.api.m.perf_dashboard.post_bisect_results(	665 self.api.m.perf_dashboard.post_bisect_results(

674 self.get_result(), halt_on_failure)	666 self.get_result(), halt_on_failure)

675	667

676 def get_revision_to_eval(self):	668 def get_revision_to_eval(self):

677 """Gets the next RevisionState object in the candidate range.	669 """Gets the next RevisionState object in the candidate range.

678	670

679 Returns:	671 Returns:

680 The next Revision object in a list.	672 The next Revision object in a list.

681 """	673 """

682 self._update_candidate_range()	674 self._update_candidate_range()

683 candidate_range = [revision for revision in	675 candidate_range = [revision for revision in

684 self.revisions[self.lkgr.list_index + 1:	676 self.revisions[self.lkgr.list_index + 1:

685 self.fkbr.list_index]	677 self.fkbr.list_index]

686 if not revision.tested and not revision.failed]	678 if not revision.failed]

687 if len(candidate_range) == 1:	679 if len(candidate_range) == 1:

688 return candidate_range[0]	680 return candidate_range[0]

689 if len(candidate_range) == 0:	681 if len(candidate_range) == 0:

690 return None	682 return None

691	683

692 default_revision = candidate_range[len(candidate_range) / 2]	684 default_revision = candidate_range[len(candidate_range) / 2]

693	685

694 with self.api.m.step.nest(	686 with self.api.m.step.nest(

695 'Wiggling revision ' + default_revision.revision_string()):	687 'Wiggling revision ' + default_revision.revision_string()):

696 # We'll search up to 25% of the range (in either direction) to try and	688 # We'll search up to 25% of the range (in either direction) to try and

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
733 return True	725 return True

734 if (revision.good and revision.next_revision and	726 if (revision.good and revision.next_revision and

735 revision.next_revision.bad):	727 revision.next_revision.bad):

736 if (revision.next_revision.deps_change()	728 if (revision.next_revision.deps_change()

737 and self._expand_deps_revisions(revision.next_revision)):	729 and self._expand_deps_revisions(revision.next_revision)):

738 return False	730 return False

739 self.culprit = revision.next_revision	731 self.culprit = revision.next_revision

740 return True	732 return True

741 return False	733 return False

742	734

743 def wait_for_all(self, revision_list):

744 """Waits for all revisions in list to finish."""

745 for r in revision_list:

746 self.wait_for(r)

747

748 def wait_for(self, revision, nest_check=True):

749 """Waits for the revision to finish its job."""

750 if nest_check and not self.flags.get(

751 'do_not_nest_wait_for_revision'): # pragma: no cover

752 with self.api.m.step.nest('Waiting for ' + revision.revision_string()):

753 return self.wait_for(revision, nest_check=False)

754 while True:

755 revision.update_status()

756 if revision.in_progress:

757 self.api.m.python.inline(

758 'sleeping',

759 """

760 import sys

761 import time

762 time.sleep(20*60)

763 sys.exit(0)

764 """)

765 else:

766 break

767

768 def _update_candidate_range(self):	735 def _update_candidate_range(self):

769 """Updates lkgr and fkbr (last known good/first known bad) revisions.	736 """Updates lkgr and fkbr (last known good/first known bad) revisions.

770	737

771 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in	738 lkgr and fkbr are 'pointers' to the appropriate RevisionState objects in

772 bisectors.revisions."""	739 bisectors.revisions."""

773 for r in self.revisions:	740 for r in self.revisions:

774 if r.tested:	741 if r.test_run_count:

775 if r.good:	742 if r.good:

776 self.lkgr = r	743 self.lkgr = r

777 elif r.bad:	744 elif r.bad:

778 self.fkbr = r	745 self.fkbr = r

779 break	746 break

780 assert self.lkgr and self.fkbr	747 assert self.lkgr and self.fkbr

781	748

782 def get_perf_tester_name(self):	749 def get_perf_tester_name(self):

783 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.	750 """Gets the name of the tester bot (on tryserver.chromium.perf) to use.

784	751

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
869 assert len(result_code) <= 20	836 assert len(result_code) <= 20

870 if result_code not in self.result_codes:	837 if result_code not in self.result_codes:

871 self.result_codes.add(result_code)	838 self.result_codes.add(result_code)

872 properties = self.api.m.step.active_result.presentation.properties	839 properties = self.api.m.step.active_result.presentation.properties

873 properties['extra_result_code'] = sorted(self.result_codes)	840 properties['extra_result_code'] = sorted(self.result_codes)

874	841

875 def get_result(self):	842 def get_result(self):

876 """Returns the results as a jsonable object."""	843 """Returns the results as a jsonable object."""

877 config = self.bisect_config	844 config = self.bisect_config

878 results_confidence = 0	845 results_confidence = 0

879 if self.culprit:

880 results_confidence = self.api.m.math_utils.confidence_score(

881 self.lkgr.values, self.fkbr.values)

882	846

883 if self.failed:	847 if self.failed:

884 status = 'failed'	848 status = 'failed'

885 elif self.bisect_over:	849 elif self.bisect_over:

886 status = 'completed'	850 status = 'completed'

887 else:	851 else:

888 status = 'started'	852 status = 'started'

889	853

890 aborted_reason = None	854 aborted_reason = None

891 if self.failed_initial_confidence:	855 if self.failed_initial_confidence:

892 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON	856 aborted_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON

893 elif self.failed_direction:	857 elif self.failed_direction:

894 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON	858 aborted_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON

895 return {	859 return {

896 'try_job_id': config.get('try_job_id'),	860 'try_job_id': config.get('try_job_id'),

897 'bug_id': config.get('bug_id'),	861 'bug_id': config.get('bug_id'),

898 'status': status,	862 'status': status,

899 'buildbot_log_url': self._get_build_url(),	863 'buildbot_log_url': self._get_build_url(),

900 'bisect_bot': self.get_perf_tester_name(),	864 'bisect_bot': self.get_perf_tester_name(),

901 'command': config['command'],	865 'command': config['command'],

902 'test_type': config['test_type'],	866 'test_type': config['test_type'],

903 'metric': config['metric'],	867 'metric': config['metric'],

904 'change': self.relative_change,	868 'change': self.relative_change,

905 'score': results_confidence,

906 'good_revision': self.good_rev.commit_hash,	869 'good_revision': self.good_rev.commit_hash,

907 'bad_revision': self.bad_rev.commit_hash,	870 'bad_revision': self.bad_rev.commit_hash,

908 'warnings': self.warnings,	871 'warnings': self.warnings,

909 'aborted_reason': aborted_reason,	872 'aborted_reason': aborted_reason,

910 'culprit_data': self._culprit_data(),	873 'culprit_data': self._culprit_data(),

911 'revision_data': self._revision_data()	874 'revision_data': self._revision_data()

912 }	875 }

913	876

914 def _culprit_data(self):	877 def _culprit_data(self):

915 culprit = self.culprit	878 culprit = self.culprit

(...skipping 11 matching lines...) Expand all Loading...
927 'email': culprit_info['email'],	890 'email': culprit_info['email'],

928 'cl_date': culprit_info['date'],	891 'cl_date': culprit_info['date'],

929 'commit_info': culprit_info['body'],	892 'commit_info': culprit_info['body'],

930 'revisions_links': [],	893 'revisions_links': [],

931 'cl': culprit.commit_hash	894 'cl': culprit.commit_hash

932 }	895 }

933	896

934 def _revision_data(self):	897 def _revision_data(self):

935 revision_rows = []	898 revision_rows = []

936 for r in self.revisions:	899 for r in self.revisions:

937 if r.tested or r.aborted:	900 if r.test_run_count:

938 revision_rows.append({	901 revision_rows.append({

939 'depot_name': r.depot_name,	902 'depot_name': r.depot_name,

940 'commit_hash': r.commit_hash,	903 'commit_hash': r.commit_hash,

941 'revision_string': r.revision_string(),	904 'revision_string': r.revision_string(),

942 'mean_value': r.mean_value,	905 'mean_value': r.mean,

943 'std_dev': r.std_dev,	906 'std_dev': r.std_dev,

944 'values': r.values,	907 'values': r.debug_values,

945 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',	908 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',

946 })	909 })

947 return revision_rows	910 return revision_rows

948	911

949 def _get_build_url(self):	912 def _get_build_url(self):

950 properties = self.api.m.properties	913 properties = self.api.m.properties

951 bot_url = properties.get('buildbotURL',	914 bot_url = properties.get('buildbotURL',

952 'http://build.chromium.org/p/chromium/')	915 'http://build.chromium.org/p/chromium/')

953 builder_name = urllib.quote(properties.get('buildername', ''))	916 builder_name = urllib.quote(properties.get('buildername', ''))

954 builder_number = str(properties.get('buildnumber', ''))	917 builder_number = str(properties.get('buildnumber', ''))

955 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)	918 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)

OLD	NEW

« no previous file with comments | « scripts/slave/recipe_modules/auto_bisect/api.py ('k') | scripts/slave/recipe_modules/auto_bisect/bisector_test.py » ('j') | no next file with comments »