OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
68 BUILD_RESULT_FAIL = 1 | 68 BUILD_RESULT_FAIL = 1 |
69 BUILD_RESULT_SKIPPED = 2 | 69 BUILD_RESULT_SKIPPED = 2 |
70 | 70 |
71 # Maximum time in seconds to wait after posting build request to the try server. | 71 # Maximum time in seconds to wait after posting build request to the try server. |
72 # TODO: Change these values based on the actual time taken by buildbots on | 72 # TODO: Change these values based on the actual time taken by buildbots on |
73 # the try server. | 73 # the try server. |
74 MAX_MAC_BUILD_TIME = 14400 | 74 MAX_MAC_BUILD_TIME = 14400 |
75 MAX_WIN_BUILD_TIME = 14400 | 75 MAX_WIN_BUILD_TIME = 14400 |
76 MAX_LINUX_BUILD_TIME = 14400 | 76 MAX_LINUX_BUILD_TIME = 14400 |
77 | 77 |
| 78 # The confidence percentage we require to consider the initial range a |
| 79 # regression based on the test results of the inital good and bad revisions. |
| 80 REGRESSION_CONFIDENCE = 95 |
| 81 |
78 # Patch template to add a new file, DEPS.sha under src folder. | 82 # Patch template to add a new file, DEPS.sha under src folder. |
79 # This file contains SHA1 value of the DEPS changes made while bisecting | 83 # This file contains SHA1 value of the DEPS changes made while bisecting |
80 # dependency repositories. This patch send along with DEPS patch to try server. | 84 # dependency repositories. This patch send along with DEPS patch to try server. |
81 # When a build requested is posted with a patch, bisect builders on try server, | 85 # When a build requested is posted with a patch, bisect builders on try server, |
82 # once build is produced, it reads SHA value from this file and appends it | 86 # once build is produced, it reads SHA value from this file and appends it |
83 # to build archive filename. | 87 # to build archive filename. |
84 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 88 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
85 new file mode 100644 | 89 new file mode 100644 |
86 --- /dev/null | 90 --- /dev/null |
87 +++ DEPS.sha | 91 +++ DEPS.sha |
88 @@ -0,0 +1 @@ | 92 @@ -0,0 +1 @@ |
89 +%(deps_sha)s | 93 +%(deps_sha)s |
90 """ | 94 """ |
91 | 95 |
| 96 REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """ |
| 97 We could not reproduce the regression with this test/metric/platform combination |
| 98 with enough confidence. |
| 99 |
| 100 Here are the results for the initial revision range: |
| 101 'Good' revision: {good_rev} |
| 102 \tmean: {good_mean} |
| 103 \tstd.err.:{good_std_err} |
| 104 \tsample size:{good_sample_size} |
| 105 'Bad' revision: {bad_rev} |
| 106 \tmean: {bad_mean} |
| 107 \tstd.err.:{bad_std_err} |
| 108 \tsample size:{bad_sample_size} |
| 109 |
| 110 NOTE: There's still a chance that this is actually a regression, but you may |
| 111 need to bisect a different platform.""" |
| 112 |
92 # Git branch name used to run bisect try jobs. | 113 # Git branch name used to run bisect try jobs. |
93 BISECT_TRYJOB_BRANCH = 'bisect-tryjob' | 114 BISECT_TRYJOB_BRANCH = 'bisect-tryjob' |
94 # Git master branch name. | 115 # Git master branch name. |
95 BISECT_MASTER_BRANCH = 'master' | 116 BISECT_MASTER_BRANCH = 'master' |
96 # File to store 'git diff' content. | 117 # File to store 'git diff' content. |
97 BISECT_PATCH_FILE = 'deps_patch.txt' | 118 BISECT_PATCH_FILE = 'deps_patch.txt' |
98 # SVN repo where the bisect try jobs are submitted. | 119 # SVN repo where the bisect try jobs are submitted. |
99 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf' | 120 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf' |
100 | 121 |
101 class RunGitError(Exception): | 122 class RunGitError(Exception): |
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
582 | 603 |
583 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'): | 604 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'): |
584 profile_path, profile_type = os.path.split(arg_dict['--profile-dir']) | 605 profile_path, profile_type = os.path.split(arg_dict['--profile-dir']) |
585 return not bisect_utils.RunProcess(['python', path_to_generate, | 606 return not bisect_utils.RunProcess(['python', path_to_generate, |
586 '--profile-type-to-generate', profile_type, | 607 '--profile-type-to-generate', profile_type, |
587 '--browser', arg_dict['--browser'], '--output-dir', profile_path]) | 608 '--browser', arg_dict['--browser'], '--output-dir', profile_path]) |
588 return False | 609 return False |
589 return True | 610 return True |
590 | 611 |
591 | 612 |
| 613 def _CheckRegressionConfidenceError( |
| 614 good_revision, |
| 615 bad_revision, |
| 616 known_good_value, |
| 617 known_bad_value): |
| 618 """Checks whether we can be confident beyond a certain degree that the given |
| 619 metrics represent a regression. |
| 620 |
| 621 Args: |
| 622 good_revision: string representing the commit considered 'good' |
| 623 bad_revision: Same as above for 'bad'. |
| 624 known_good_value: A dict with at least: 'values', 'mean' and 'std_err' |
| 625 known_bad_value: Same as above. |
| 626 |
| 627 Returns: |
| 628 False if there is no error (i.e. we can be confident there's a regressioni), |
| 629 a string containing the details of the lack of confidence otherwise. |
| 630 """ |
| 631 error = False |
| 632 # Adding good and bad values to a parameter list. |
| 633 confidenceParams = [] |
| 634 for l in [known_bad_value['values'], known_good_value['values']]: |
| 635 # Flatten if needed |
| 636 if isinstance(l, list) and all([isinstance(x, list) for x in l]): |
| 637 confidenceParams.append(sum(l, [])) |
| 638 else: |
| 639 confidenceParams.append(l) |
| 640 regression_confidence = BisectResults.ConfidenceScore(*confidenceParams) |
| 641 if regression_confidence < REGRESSION_CONFIDENCE: |
| 642 error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format( |
| 643 good_rev=good_revision, |
| 644 good_mean=known_good_value['mean'], |
| 645 good_std_err=known_good_value['std_err'], |
| 646 good_sample_size=len(known_good_value['values']), |
| 647 bad_rev=bad_revision, |
| 648 bad_mean=known_bad_value['mean'], |
| 649 bad_std_err=known_bad_value['std_err'], |
| 650 bad_sample_size=len(known_bad_value['values'])) |
| 651 return error |
| 652 |
592 class DepotDirectoryRegistry(object): | 653 class DepotDirectoryRegistry(object): |
593 | 654 |
594 def __init__(self, src_cwd): | 655 def __init__(self, src_cwd): |
595 self.depot_cwd = {} | 656 self.depot_cwd = {} |
596 for depot in bisect_utils.DEPOT_NAMES: | 657 for depot in bisect_utils.DEPOT_NAMES: |
597 # The working directory of each depot is just the path to the depot, but | 658 # The working directory of each depot is just the path to the depot, but |
598 # since we're already in 'src', we can skip that part. | 659 # since we're already in 'src', we can skip that part. |
599 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:] | 660 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:] |
600 self.AddDepot(depot, os.path.join(src_cwd, path_in_src)) | 661 self.AddDepot(depot, os.path.join(src_cwd, path_in_src)) |
601 | 662 |
(...skipping 1608 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2210 '\'good\' - \'bad\' range of revisions represent an ' | 2271 '\'good\' - \'bad\' range of revisions represent an ' |
2211 'improvement (and not a regression).') | 2272 'improvement (and not a regression).') |
2212 return BisectResults(error=error) | 2273 return BisectResults(error=error) |
2213 print message, "Therefore we continue to bisect." | 2274 print message, "Therefore we continue to bisect." |
2214 | 2275 |
2215 bisect_state = BisectState(target_depot, revision_list) | 2276 bisect_state = BisectState(target_depot, revision_list) |
2216 revision_states = bisect_state.GetRevisionStates() | 2277 revision_states = bisect_state.GetRevisionStates() |
2217 | 2278 |
2218 min_revision = 0 | 2279 min_revision = 0 |
2219 max_revision = len(revision_states) - 1 | 2280 max_revision = len(revision_states) - 1 |
| 2281 # Check how likely it is that the good and bad results are different |
| 2282 # beyond chance-induced variation. |
| 2283 if not self.opts.debug_ignore_regression_confidence: |
| 2284 error = _CheckRegressionConfidenceError(good_revision, |
| 2285 bad_revision, |
| 2286 known_good_value, |
| 2287 known_bad_value) |
| 2288 if error: |
| 2289 return BisectResults(error=error) |
2220 | 2290 |
2221 # Can just mark the good and bad revisions explicitly here since we | 2291 # Can just mark the good and bad revisions explicitly here since we |
2222 # already know the results. | 2292 # already know the results. |
2223 bad_revision_state = revision_states[min_revision] | 2293 bad_revision_state = revision_states[min_revision] |
2224 bad_revision_state.external = bad_results[2] | 2294 bad_revision_state.external = bad_results[2] |
2225 bad_revision_state.perf_time = bad_results[3] | 2295 bad_revision_state.perf_time = bad_results[3] |
2226 bad_revision_state.build_time = bad_results[4] | 2296 bad_revision_state.build_time = bad_results[4] |
2227 bad_revision_state.passed = False | 2297 bad_revision_state.passed = False |
2228 bad_revision_state.value = known_bad_value | 2298 bad_revision_state.value = known_bad_value |
2229 | 2299 |
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2418 self.max_time_minutes = 20 | 2488 self.max_time_minutes = 20 |
2419 self.metric = None | 2489 self.metric = None |
2420 self.command = None | 2490 self.command = None |
2421 self.output_buildbot_annotations = None | 2491 self.output_buildbot_annotations = None |
2422 self.no_custom_deps = False | 2492 self.no_custom_deps = False |
2423 self.working_directory = None | 2493 self.working_directory = None |
2424 self.extra_src = None | 2494 self.extra_src = None |
2425 self.debug_ignore_build = None | 2495 self.debug_ignore_build = None |
2426 self.debug_ignore_sync = None | 2496 self.debug_ignore_sync = None |
2427 self.debug_ignore_perf_test = None | 2497 self.debug_ignore_perf_test = None |
| 2498 self.debug_ignore_regression_confidence = None |
2428 self.debug_fake_first_test_mean = 0 | 2499 self.debug_fake_first_test_mean = 0 |
2429 self.gs_bucket = None | 2500 self.gs_bucket = None |
2430 self.target_arch = 'ia32' | 2501 self.target_arch = 'ia32' |
2431 self.target_build_type = 'Release' | 2502 self.target_build_type = 'Release' |
2432 self.builder_host = None | 2503 self.builder_host = None |
2433 self.builder_port = None | 2504 self.builder_port = None |
2434 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN | 2505 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN |
2435 self.improvement_direction = 0 | 2506 self.improvement_direction = 0 |
2436 | 2507 |
2437 @staticmethod | 2508 @staticmethod |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2586 group = optparse.OptionGroup(parser, 'Debug options') | 2657 group = optparse.OptionGroup(parser, 'Debug options') |
2587 group.add_option('--debug_ignore_build', | 2658 group.add_option('--debug_ignore_build', |
2588 action='store_true', | 2659 action='store_true', |
2589 help='DEBUG: Don\'t perform builds.') | 2660 help='DEBUG: Don\'t perform builds.') |
2590 group.add_option('--debug_ignore_sync', | 2661 group.add_option('--debug_ignore_sync', |
2591 action='store_true', | 2662 action='store_true', |
2592 help='DEBUG: Don\'t perform syncs.') | 2663 help='DEBUG: Don\'t perform syncs.') |
2593 group.add_option('--debug_ignore_perf_test', | 2664 group.add_option('--debug_ignore_perf_test', |
2594 action='store_true', | 2665 action='store_true', |
2595 help='DEBUG: Don\'t perform performance tests.') | 2666 help='DEBUG: Don\'t perform performance tests.') |
| 2667 group.add_option('--debug_ignore_regression_confidence', |
| 2668 action='store_true', |
| 2669 help='DEBUG: Don\'t score the confidence of the initial ' |
| 2670 'good and bad revisions\' test results.') |
2596 group.add_option('--debug_fake_first_test_mean', | 2671 group.add_option('--debug_fake_first_test_mean', |
2597 type='int', | 2672 type='int', |
2598 default='0', | 2673 default='0', |
2599 help=('DEBUG: When faking performance tests, return this ' | 2674 help=('DEBUG: When faking performance tests, return this ' |
2600 'value as the mean of the first performance test, ' | 2675 'value as the mean of the first performance test, ' |
2601 'and return a mean of 0.0 for further tests.')) | 2676 'and return a mean of 0.0 for further tests.')) |
2602 parser.add_option_group(group) | 2677 parser.add_option_group(group) |
2603 return parser | 2678 return parser |
2604 | 2679 |
2605 def ParseCommandLine(self): | 2680 def ParseCommandLine(self): |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2747 # bugs. If you change this, please update the perf dashboard as well. | 2822 # bugs. If you change this, please update the perf dashboard as well. |
2748 bisect_utils.OutputAnnotationStepStart('Results') | 2823 bisect_utils.OutputAnnotationStepStart('Results') |
2749 print 'Error: %s' % e.message | 2824 print 'Error: %s' % e.message |
2750 if opts.output_buildbot_annotations: | 2825 if opts.output_buildbot_annotations: |
2751 bisect_utils.OutputAnnotationStepClosed() | 2826 bisect_utils.OutputAnnotationStepClosed() |
2752 return 1 | 2827 return 1 |
2753 | 2828 |
2754 | 2829 |
2755 if __name__ == '__main__': | 2830 if __name__ == '__main__': |
2756 sys.exit(main()) | 2831 sys.exit(main()) |
OLD | NEW |