tools/auto_bisect/bisect_perf_regression.py - Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressing comments Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Performance Test Bisect Tool	6 """Performance Test Bisect Tool

7	7

8 This script bisects a series of changelists using binary search. It starts at	8 This script bisects a series of changelists using binary search. It starts at

9 a bad revision where a performance metric has regressed, and asks for a last	9 a bad revision where a performance metric has regressed, and asks for a last

10 known-good revision. It will then binary search across this revision range by	10 known-good revision. It will then binary search across this revision range by

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
68 BUILD_RESULT_FAIL = 1	68 BUILD_RESULT_FAIL = 1

69 BUILD_RESULT_SKIPPED = 2	69 BUILD_RESULT_SKIPPED = 2

70	70

71 # Maximum time in seconds to wait after posting build request to the try server.	71 # Maximum time in seconds to wait after posting build request to the try server.

72 # TODO: Change these values based on the actual time taken by buildbots on	72 # TODO: Change these values based on the actual time taken by buildbots on

73 # the try server.	73 # the try server.

74 MAX_MAC_BUILD_TIME = 14400	74 MAX_MAC_BUILD_TIME = 14400

75 MAX_WIN_BUILD_TIME = 14400	75 MAX_WIN_BUILD_TIME = 14400

76 MAX_LINUX_BUILD_TIME = 14400	76 MAX_LINUX_BUILD_TIME = 14400

77	77

	78 # The confidence percentage we require to consider the initial range a

	79 # regression based on the test results of the inital good and bad revisions.

	80 REGRESSION_CONFIDENCE = 95

	81

78 # Patch template to add a new file, DEPS.sha under src folder.	82 # Patch template to add a new file, DEPS.sha under src folder.

79 # This file contains SHA1 value of the DEPS changes made while bisecting	83 # This file contains SHA1 value of the DEPS changes made while bisecting

80 # dependency repositories. This patch send along with DEPS patch to try server.	84 # dependency repositories. This patch send along with DEPS patch to try server.

81 # When a build requested is posted with a patch, bisect builders on try server,	85 # When a build requested is posted with a patch, bisect builders on try server,

82 # once build is produced, it reads SHA value from this file and appends it	86 # once build is produced, it reads SHA value from this file and appends it

83 # to build archive filename.	87 # to build archive filename.

84 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha	88 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha

85 new file mode 100644	89 new file mode 100644

86 --- /dev/null	90 --- /dev/null

87 +++ DEPS.sha	91 +++ DEPS.sha

88 @@ -0,0 +1 @@	92 @@ -0,0 +1 @@

89 +%(deps_sha)s	93 +%(deps_sha)s

90 """	94 """

91	95

	96 REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """

	97 We could not reproduce the regression with this test/metric/platform combination

	98 with enough confidence.

	99

	100 Here are the results for the initial revision range:

	101 'Good' revision: {good_rev}

	102 \tmean: {good_mean}

	103 \tstd.err.:{good_std_err}

	104 \tsample size:{good_sample_size}

	105 'Bad' revision: {bad_rev}

	106 \tmean: {bad_mean}

	107 \tstd.err.:{bad_std_err}

	108 \tsample size:{bad_sample_size}

	109

	110 NOTE: There's still a chance that this is actually a regression, but you may

	111 need to bisect a different platform."""

	112

92 # Git branch name used to run bisect try jobs.	113 # Git branch name used to run bisect try jobs.

93 BISECT_TRYJOB_BRANCH = 'bisect-tryjob'	114 BISECT_TRYJOB_BRANCH = 'bisect-tryjob'

94 # Git master branch name.	115 # Git master branch name.

95 BISECT_MASTER_BRANCH = 'master'	116 BISECT_MASTER_BRANCH = 'master'

96 # File to store 'git diff' content.	117 # File to store 'git diff' content.

97 BISECT_PATCH_FILE = 'deps_patch.txt'	118 BISECT_PATCH_FILE = 'deps_patch.txt'

98 # SVN repo where the bisect try jobs are submitted.	119 # SVN repo where the bisect try jobs are submitted.

99 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf'	120 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf'

100	121

101 class RunGitError(Exception):	122 class RunGitError(Exception):

(...skipping 480 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
582	603

583 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'):	604 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'):

584 profile_path, profile_type = os.path.split(arg_dict['--profile-dir'])	605 profile_path, profile_type = os.path.split(arg_dict['--profile-dir'])

585 return not bisect_utils.RunProcess(['python', path_to_generate,	606 return not bisect_utils.RunProcess(['python', path_to_generate,

586 '--profile-type-to-generate', profile_type,	607 '--profile-type-to-generate', profile_type,

587 '--browser', arg_dict['--browser'], '--output-dir', profile_path])	608 '--browser', arg_dict['--browser'], '--output-dir', profile_path])

588 return False	609 return False

589 return True	610 return True

590	611

591	612

	613 def _CheckRegressionConfidenceError(

	614 good_revision,

	615 bad_revision,

	616 known_good_value,

	617 known_bad_value):

	618 """Checks whether we can be confident beyond a certain degree that the given

	619 metrics represent a regression.

	620

	621 Args:

	622 good_revision: string representing the commit considered 'good'

	623 bad_revision: Same as above for 'bad'.

	624 known_good_value: A dict with at least: 'values', 'mean' and 'std_err'

	625 known_bad_value: Same as above.

	626

	627 Returns:

	628 False if there is no error (i.e. we can be confident there's a regressioni),

	629 a string containing the details of the lack of confidence otherwise.

	630 """

	631 error = False

	632 # Adding good and bad values to a parameter list.

	633 confidenceParams = []

	634 for l in [known_bad_value['values'], known_good_value['values']]:

	635 # Flatten if needed

	636 if isinstance(l, list) and all([isinstance(x, list) for x in l]):

	637 confidenceParams.append(sum(l, []))

	638 else:

	639 confidenceParams.append(l)

	640 regression_confidence = BisectResults.ConfidenceScore(*confidenceParams)

	641 if regression_confidence < REGRESSION_CONFIDENCE:

	642 error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format(

	643 good_rev=good_revision,

	644 good_mean=known_good_value['mean'],

	645 good_std_err=known_good_value['std_err'],

	646 good_sample_size=len(known_good_value['values']),

	647 bad_rev=bad_revision,

	648 bad_mean=known_bad_value['mean'],

	649 bad_std_err=known_bad_value['std_err'],

	650 bad_sample_size=len(known_bad_value['values']))

	651 return error

	652

592 class DepotDirectoryRegistry(object):	653 class DepotDirectoryRegistry(object):

593	654

594 def __init__(self, src_cwd):	655 def __init__(self, src_cwd):

595 self.depot_cwd = {}	656 self.depot_cwd = {}

596 for depot in bisect_utils.DEPOT_NAMES:	657 for depot in bisect_utils.DEPOT_NAMES:

597 # The working directory of each depot is just the path to the depot, but	658 # The working directory of each depot is just the path to the depot, but

598 # since we're already in 'src', we can skip that part.	659 # since we're already in 'src', we can skip that part.

599 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:]	660 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:]

600 self.AddDepot(depot, os.path.join(src_cwd, path_in_src))	661 self.AddDepot(depot, os.path.join(src_cwd, path_in_src))

601	662

(...skipping 1608 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2210 '\'good\' - \'bad\' range of revisions represent an '	2271 '\'good\' - \'bad\' range of revisions represent an '

2211 'improvement (and not a regression).')	2272 'improvement (and not a regression).')

2212 return BisectResults(error=error)	2273 return BisectResults(error=error)

2213 print message, "Therefore we continue to bisect."	2274 print message, "Therefore we continue to bisect."

2214	2275

2215 bisect_state = BisectState(target_depot, revision_list)	2276 bisect_state = BisectState(target_depot, revision_list)

2216 revision_states = bisect_state.GetRevisionStates()	2277 revision_states = bisect_state.GetRevisionStates()

2217	2278

2218 min_revision = 0	2279 min_revision = 0

2219 max_revision = len(revision_states) - 1	2280 max_revision = len(revision_states) - 1

	2281 # Check how likely it is that the good and bad results are different

	2282 # beyond chance-induced variation.

	2283 if not self.opts.debug_ignore_regression_confidence:

	2284 error = _CheckRegressionConfidenceError(good_revision,

	2285 bad_revision,

	2286 known_good_value,

	2287 known_bad_value)

	2288 if error:

	2289 return BisectResults(error=error)

2220	2290

2221 # Can just mark the good and bad revisions explicitly here since we	2291 # Can just mark the good and bad revisions explicitly here since we

2222 # already know the results.	2292 # already know the results.

2223 bad_revision_state = revision_states[min_revision]	2293 bad_revision_state = revision_states[min_revision]

2224 bad_revision_state.external = bad_results[2]	2294 bad_revision_state.external = bad_results[2]

2225 bad_revision_state.perf_time = bad_results[3]	2295 bad_revision_state.perf_time = bad_results[3]

2226 bad_revision_state.build_time = bad_results[4]	2296 bad_revision_state.build_time = bad_results[4]

2227 bad_revision_state.passed = False	2297 bad_revision_state.passed = False

2228 bad_revision_state.value = known_bad_value	2298 bad_revision_state.value = known_bad_value

2229	2299

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2418 self.max_time_minutes = 20	2488 self.max_time_minutes = 20

2419 self.metric = None	2489 self.metric = None

2420 self.command = None	2490 self.command = None

2421 self.output_buildbot_annotations = None	2491 self.output_buildbot_annotations = None

2422 self.no_custom_deps = False	2492 self.no_custom_deps = False

2423 self.working_directory = None	2493 self.working_directory = None

2424 self.extra_src = None	2494 self.extra_src = None

2425 self.debug_ignore_build = None	2495 self.debug_ignore_build = None

2426 self.debug_ignore_sync = None	2496 self.debug_ignore_sync = None

2427 self.debug_ignore_perf_test = None	2497 self.debug_ignore_perf_test = None

	2498 self.debug_ignore_regression_confidence = None

2428 self.debug_fake_first_test_mean = 0	2499 self.debug_fake_first_test_mean = 0

2429 self.gs_bucket = None	2500 self.gs_bucket = None

2430 self.target_arch = 'ia32'	2501 self.target_arch = 'ia32'

2431 self.target_build_type = 'Release'	2502 self.target_build_type = 'Release'

2432 self.builder_host = None	2503 self.builder_host = None

2433 self.builder_port = None	2504 self.builder_port = None

2434 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN	2505 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN

2435 self.improvement_direction = 0	2506 self.improvement_direction = 0

2436	2507

2437 @staticmethod	2508 @staticmethod

(...skipping 148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2586 group = optparse.OptionGroup(parser, 'Debug options')	2657 group = optparse.OptionGroup(parser, 'Debug options')

2587 group.add_option('--debug_ignore_build',	2658 group.add_option('--debug_ignore_build',

2588 action='store_true',	2659 action='store_true',

2589 help='DEBUG: Don\'t perform builds.')	2660 help='DEBUG: Don\'t perform builds.')

2590 group.add_option('--debug_ignore_sync',	2661 group.add_option('--debug_ignore_sync',

2591 action='store_true',	2662 action='store_true',

2592 help='DEBUG: Don\'t perform syncs.')	2663 help='DEBUG: Don\'t perform syncs.')

2593 group.add_option('--debug_ignore_perf_test',	2664 group.add_option('--debug_ignore_perf_test',

2594 action='store_true',	2665 action='store_true',

2595 help='DEBUG: Don\'t perform performance tests.')	2666 help='DEBUG: Don\'t perform performance tests.')

	2667 group.add_option('--debug_ignore_regression_confidence',

	2668 action='store_true',

	2669 help='DEBUG: Don\'t score the confidence of the initial '

	2670 'good and bad revisions\' test results.')

2596 group.add_option('--debug_fake_first_test_mean',	2671 group.add_option('--debug_fake_first_test_mean',

2597 type='int',	2672 type='int',

2598 default='0',	2673 default='0',

2599 help=('DEBUG: When faking performance tests, return this '	2674 help=('DEBUG: When faking performance tests, return this '

2600 'value as the mean of the first performance test, '	2675 'value as the mean of the first performance test, '

2601 'and return a mean of 0.0 for further tests.'))	2676 'and return a mean of 0.0 for further tests.'))

2602 parser.add_option_group(group)	2677 parser.add_option_group(group)

2603 return parser	2678 return parser

2604	2679

2605 def ParseCommandLine(self):	2680 def ParseCommandLine(self):

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2747 # bugs. If you change this, please update the perf dashboard as well.	2822 # bugs. If you change this, please update the perf dashboard as well.

2748 bisect_utils.OutputAnnotationStepStart('Results')	2823 bisect_utils.OutputAnnotationStepStart('Results')

2749 print 'Error: %s' % e.message	2824 print 'Error: %s' % e.message

2750 if opts.output_buildbot_annotations:	2825 if opts.output_buildbot_annotations:

2751 bisect_utils.OutputAnnotationStepClosed()	2826 bisect_utils.OutputAnnotationStepClosed()

2752 return 1	2827 return 1

2753	2828

2754	2829

2755 if __name__ == '__main__':	2830 if __name__ == '__main__':

2756 sys.exit(main())	2831 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »