OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
68 BUILD_RESULT_FAIL = 1 | 68 BUILD_RESULT_FAIL = 1 |
69 BUILD_RESULT_SKIPPED = 2 | 69 BUILD_RESULT_SKIPPED = 2 |
70 | 70 |
71 # Maximum time in seconds to wait after posting build request to the try server. | 71 # Maximum time in seconds to wait after posting build request to the try server. |
72 # TODO: Change these values based on the actual time taken by buildbots on | 72 # TODO: Change these values based on the actual time taken by buildbots on |
73 # the try server. | 73 # the try server. |
74 MAX_MAC_BUILD_TIME = 14400 | 74 MAX_MAC_BUILD_TIME = 14400 |
75 MAX_WIN_BUILD_TIME = 14400 | 75 MAX_WIN_BUILD_TIME = 14400 |
76 MAX_LINUX_BUILD_TIME = 14400 | 76 MAX_LINUX_BUILD_TIME = 14400 |
77 | 77 |
78 # The confidence percentage we require to consider the initial range a | |
79 # regression based on the test results of the inital good and bad revisions. | |
80 REGRESSION_CONFIDENCE = 95 | |
81 | |
78 # Patch template to add a new file, DEPS.sha under src folder. | 82 # Patch template to add a new file, DEPS.sha under src folder. |
79 # This file contains SHA1 value of the DEPS changes made while bisecting | 83 # This file contains SHA1 value of the DEPS changes made while bisecting |
80 # dependency repositories. This patch send along with DEPS patch to try server. | 84 # dependency repositories. This patch send along with DEPS patch to try server. |
81 # When a build requested is posted with a patch, bisect builders on try server, | 85 # When a build requested is posted with a patch, bisect builders on try server, |
82 # once build is produced, it reads SHA value from this file and appends it | 86 # once build is produced, it reads SHA value from this file and appends it |
83 # to build archive filename. | 87 # to build archive filename. |
84 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 88 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
85 new file mode 100644 | 89 new file mode 100644 |
86 --- /dev/null | 90 --- /dev/null |
87 +++ DEPS.sha | 91 +++ DEPS.sha |
88 @@ -0,0 +1 @@ | 92 @@ -0,0 +1 @@ |
89 +%(deps_sha)s | 93 +%(deps_sha)s |
90 """ | 94 """ |
91 | 95 |
96 REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """ | |
97 We could not reproduce the regression with this test/metric/platform combination | |
98 with enough confidence. | |
99 | |
100 Here are the results for the initial revision range: | |
101 \'Good\' revision: {} | |
qyearsley
2014/10/23 00:38:39
Giving names to each of these template fields woul
RobertoCN
2014/10/23 19:51:45
Done.
| |
102 \tmean: {} | |
103 \tstd.err.:{} | |
104 \tsample size:{} | |
105 \'Bad\' revision: {} | |
106 \tmean: {} | |
107 \tstd.err.:{} | |
108 \tsample size:{} | |
109 | |
110 NOTE: There\'s still a chance that this is actually a regression, but you may | |
qyearsley
2014/10/23 00:38:39
Escaping single quotes isn't necessary insider a "
RobertoCN
2014/10/23 19:51:45
Done.
| |
111 need to bisect a different platform.""" | |
112 | |
92 # Git branch name used to run bisect try jobs. | 113 # Git branch name used to run bisect try jobs. |
93 BISECT_TRYJOB_BRANCH = 'bisect-tryjob' | 114 BISECT_TRYJOB_BRANCH = 'bisect-tryjob' |
94 # Git master branch name. | 115 # Git master branch name. |
95 BISECT_MASTER_BRANCH = 'master' | 116 BISECT_MASTER_BRANCH = 'master' |
96 # File to store 'git diff' content. | 117 # File to store 'git diff' content. |
97 BISECT_PATCH_FILE = 'deps_patch.txt' | 118 BISECT_PATCH_FILE = 'deps_patch.txt' |
98 # SVN repo where the bisect try jobs are submitted. | 119 # SVN repo where the bisect try jobs are submitted. |
99 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf' | 120 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf' |
100 | 121 |
101 class RunGitError(Exception): | 122 class RunGitError(Exception): |
(...skipping 2108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2210 '\'good\' - \'bad\' range of revisions represent an ' | 2231 '\'good\' - \'bad\' range of revisions represent an ' |
2211 'improvement (and not a regression).') | 2232 'improvement (and not a regression).') |
2212 return BisectResults(error=error) | 2233 return BisectResults(error=error) |
2213 print message, "Therefore we continue to bisect." | 2234 print message, "Therefore we continue to bisect." |
2214 | 2235 |
2215 bisect_state = BisectState(target_depot, revision_list) | 2236 bisect_state = BisectState(target_depot, revision_list) |
2216 revision_states = bisect_state.GetRevisionStates() | 2237 revision_states = bisect_state.GetRevisionStates() |
2217 | 2238 |
2218 min_revision = 0 | 2239 min_revision = 0 |
2219 max_revision = len(revision_states) - 1 | 2240 max_revision = len(revision_states) - 1 |
2241 # Check how likely it is that the good and bad results are different | |
2242 # beyond chance-induced variation. | |
2243 if not self.opts.debug_ignore_regression_confidence: | |
2244 # Adding good and bad values to a parameter list. | |
2245 confidenceParams = [] | |
2246 for l in [known_bad_value['values'], known_good_value['values']]: | |
2247 # Flatten if needed | |
2248 if isinstance(l, list) and all([isinstance(x, list) for x in l]): | |
2249 confidenceParams.append(sum(l, [])) | |
2250 else: | |
2251 confidenceParams.append(l) | |
2252 regression_confidence = BisectResults.ConfidenceScore(*confidenceParams) | |
2253 if regression_confidence < REGRESSION_CONFIDENCE: | |
2254 error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format( | |
2255 good_revision, known_good_value['mean'], | |
2256 known_good_value['std_err'], len(known_good_value['values']), | |
2257 bad_revision, known_bad_value['mean'], | |
2258 known_bad_value['std_err'], len(known_bad_value['values'])) | |
2259 return BisectResults(error=error) | |
qyearsley
2014/10/23 00:38:38
Might be a good idea to extract everything under t
RobertoCN
2014/10/23 19:51:45
Done.
| |
2220 | 2260 |
2221 # Can just mark the good and bad revisions explicitly here since we | 2261 # Can just mark the good and bad revisions explicitly here since we |
2222 # already know the results. | 2262 # already know the results. |
2223 bad_revision_state = revision_states[min_revision] | 2263 bad_revision_state = revision_states[min_revision] |
2224 bad_revision_state.external = bad_results[2] | 2264 bad_revision_state.external = bad_results[2] |
2225 bad_revision_state.perf_time = bad_results[3] | 2265 bad_revision_state.perf_time = bad_results[3] |
2226 bad_revision_state.build_time = bad_results[4] | 2266 bad_revision_state.build_time = bad_results[4] |
2227 bad_revision_state.passed = False | 2267 bad_revision_state.passed = False |
2228 bad_revision_state.value = known_bad_value | 2268 bad_revision_state.value = known_bad_value |
2229 | 2269 |
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2418 self.max_time_minutes = 20 | 2458 self.max_time_minutes = 20 |
2419 self.metric = None | 2459 self.metric = None |
2420 self.command = None | 2460 self.command = None |
2421 self.output_buildbot_annotations = None | 2461 self.output_buildbot_annotations = None |
2422 self.no_custom_deps = False | 2462 self.no_custom_deps = False |
2423 self.working_directory = None | 2463 self.working_directory = None |
2424 self.extra_src = None | 2464 self.extra_src = None |
2425 self.debug_ignore_build = None | 2465 self.debug_ignore_build = None |
2426 self.debug_ignore_sync = None | 2466 self.debug_ignore_sync = None |
2427 self.debug_ignore_perf_test = None | 2467 self.debug_ignore_perf_test = None |
2468 self.debug_ignore_regression_confidence = None | |
2428 self.debug_fake_first_test_mean = 0 | 2469 self.debug_fake_first_test_mean = 0 |
2429 self.gs_bucket = None | 2470 self.gs_bucket = None |
2430 self.target_arch = 'ia32' | 2471 self.target_arch = 'ia32' |
2431 self.target_build_type = 'Release' | 2472 self.target_build_type = 'Release' |
2432 self.builder_host = None | 2473 self.builder_host = None |
2433 self.builder_port = None | 2474 self.builder_port = None |
2434 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN | 2475 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN |
2435 self.improvement_direction = 0 | 2476 self.improvement_direction = 0 |
2436 | 2477 |
2437 @staticmethod | 2478 @staticmethod |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2586 group = optparse.OptionGroup(parser, 'Debug options') | 2627 group = optparse.OptionGroup(parser, 'Debug options') |
2587 group.add_option('--debug_ignore_build', | 2628 group.add_option('--debug_ignore_build', |
2588 action='store_true', | 2629 action='store_true', |
2589 help='DEBUG: Don\'t perform builds.') | 2630 help='DEBUG: Don\'t perform builds.') |
2590 group.add_option('--debug_ignore_sync', | 2631 group.add_option('--debug_ignore_sync', |
2591 action='store_true', | 2632 action='store_true', |
2592 help='DEBUG: Don\'t perform syncs.') | 2633 help='DEBUG: Don\'t perform syncs.') |
2593 group.add_option('--debug_ignore_perf_test', | 2634 group.add_option('--debug_ignore_perf_test', |
2594 action='store_true', | 2635 action='store_true', |
2595 help='DEBUG: Don\'t perform performance tests.') | 2636 help='DEBUG: Don\'t perform performance tests.') |
2637 group.add_option('--debug_ignore_regression_confidence', | |
2638 action='store_true', | |
2639 help='DEBUG: Don\'t score the confidence of the initial ' | |
2640 'good and bad revisions\' test results.') | |
2596 group.add_option('--debug_fake_first_test_mean', | 2641 group.add_option('--debug_fake_first_test_mean', |
2597 type='int', | 2642 type='int', |
2598 default='0', | 2643 default='0', |
2599 help=('DEBUG: When faking performance tests, return this ' | 2644 help=('DEBUG: When faking performance tests, return this ' |
2600 'value as the mean of the first performance test, ' | 2645 'value as the mean of the first performance test, ' |
2601 'and return a mean of 0.0 for further tests.')) | 2646 'and return a mean of 0.0 for further tests.')) |
2602 parser.add_option_group(group) | 2647 parser.add_option_group(group) |
2603 return parser | 2648 return parser |
2604 | 2649 |
2605 def ParseCommandLine(self): | 2650 def ParseCommandLine(self): |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2747 # bugs. If you change this, please update the perf dashboard as well. | 2792 # bugs. If you change this, please update the perf dashboard as well. |
2748 bisect_utils.OutputAnnotationStepStart('Results') | 2793 bisect_utils.OutputAnnotationStepStart('Results') |
2749 print 'Error: %s' % e.message | 2794 print 'Error: %s' % e.message |
2750 if opts.output_buildbot_annotations: | 2795 if opts.output_buildbot_annotations: |
2751 bisect_utils.OutputAnnotationStepClosed() | 2796 bisect_utils.OutputAnnotationStepClosed() |
2752 return 1 | 2797 return 1 |
2753 | 2798 |
2754 | 2799 |
2755 if __name__ == '__main__': | 2800 if __name__ == '__main__': |
2756 sys.exit(main()) | 2801 sys.exit(main()) |
OLD | NEW |