OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
43 import shutil | 43 import shutil |
44 import StringIO | 44 import StringIO |
45 import sys | 45 import sys |
46 import time | 46 import time |
47 import zipfile | 47 import zipfile |
48 | 48 |
49 sys.path.append(os.path.join( | 49 sys.path.append(os.path.join( |
50 os.path.dirname(__file__), os.path.pardir, 'telemetry')) | 50 os.path.dirname(__file__), os.path.pardir, 'telemetry')) |
51 | 51 |
52 from bisect_results import BisectResults | 52 from bisect_results import BisectResults |
53 from bisect_results import ConfidenceScore | |
54 import bisect_utils | 53 import bisect_utils |
55 import builder | 54 import builder |
56 import math_utils | 55 import math_utils |
57 import request_build | 56 import request_build |
58 import source_control | 57 import source_control |
59 from telemetry.util import cloud_storage | 58 from telemetry.util import cloud_storage |
60 | 59 |
61 # Below is the map of "depot" names to information about each depot. Each depot | 60 # Below is the map of "depot" names to information about each depot. Each depot |
62 # is a repository, and in the process of bisecting, revision ranges in these | 61 # is a repository, and in the process of bisecting, revision ranges in these |
63 # repositories may also be bisected. | 62 # repositories may also be bisected. |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
163 | 162 |
164 # Maximum time in seconds to wait after posting build request to the try server. | 163 # Maximum time in seconds to wait after posting build request to the try server. |
165 # TODO: Change these values based on the actual time taken by buildbots on | 164 # TODO: Change these values based on the actual time taken by buildbots on |
166 # the try server. | 165 # the try server. |
167 MAX_MAC_BUILD_TIME = 14400 | 166 MAX_MAC_BUILD_TIME = 14400 |
168 MAX_WIN_BUILD_TIME = 14400 | 167 MAX_WIN_BUILD_TIME = 14400 |
169 MAX_LINUX_BUILD_TIME = 14400 | 168 MAX_LINUX_BUILD_TIME = 14400 |
170 | 169 |
171 # The percentage at which confidence is considered high. | 170 # The percentage at which confidence is considered high. |
172 HIGH_CONFIDENCE = 95 | 171 HIGH_CONFIDENCE = 95 |
173 # The confidence percentage we require to consider the initial range a | |
174 # regression based on the test results of the inital good and bad revisions. | |
175 REGRESSION_CONFIDENCE = 95 | |
176 | 172 |
177 # Patch template to add a new file, DEPS.sha under src folder. | 173 # Patch template to add a new file, DEPS.sha under src folder. |
178 # This file contains SHA1 value of the DEPS changes made while bisecting | 174 # This file contains SHA1 value of the DEPS changes made while bisecting |
179 # dependency repositories. This patch send along with DEPS patch to try server. | 175 # dependency repositories. This patch send along with DEPS patch to try server. |
180 # When a build requested is posted with a patch, bisect builders on try server, | 176 # When a build requested is posted with a patch, bisect builders on try server, |
181 # once build is produced, it reads SHA value from this file and appends it | 177 # once build is produced, it reads SHA value from this file and appends it |
182 # to build archive filename. | 178 # to build archive filename. |
183 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 179 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
184 new file mode 100644 | 180 new file mode 100644 |
185 --- /dev/null | 181 --- /dev/null |
(...skipping 2282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2468 else: | 2464 else: |
2469 message += "and the metric appears to have decreased. " | 2465 message += "and the metric appears to have decreased. " |
2470 if ((higher_is_better and metric_increased) or | 2466 if ((higher_is_better and metric_increased) or |
2471 (not higher_is_better and not metric_increased)): | 2467 (not higher_is_better and not metric_increased)): |
2472 results.error = (message + 'Then, the test results for the ends of ' | 2468 results.error = (message + 'Then, the test results for the ends of ' |
2473 'the given \'good\' - \'bad\' range of revisions ' | 2469 'the given \'good\' - \'bad\' range of revisions ' |
2474 'represent an improvement (and not a regression).') | 2470 'represent an improvement (and not a regression).') |
2475 return results | 2471 return results |
2476 print message, "Therefore we continue to bisect." | 2472 print message, "Therefore we continue to bisect." |
2477 | 2473 |
2478 # Check how likely it is that the good and bad results are different | |
2479 # beyond chance-induced variation. | |
2480 if not self.opts.debug_ignore_regression_confidence: | |
2481 regression_confidence = ConfidenceScore(known_bad_value['values'], | |
2482 known_good_value['values']) | |
2483 if regression_confidence < REGRESSION_CONFIDENCE: | |
2484 results.error = ('We could not reproduce the regression with this ' | |
2485 'test/metric/platform combination with enough ' | |
2486 'confidence. There\'s still a chance that this is ' | |
2487 'actually a regression, but you may need to bisect ' | |
2488 'a different platform.') | |
2489 return results | |
2490 | |
2491 # Can just mark the good and bad revisions explicitly here since we | 2474 # Can just mark the good and bad revisions explicitly here since we |
2492 # already know the results. | 2475 # already know the results. |
2493 bad_revision_data = revision_data[revision_list[0]] | 2476 bad_revision_data = revision_data[revision_list[0]] |
2494 bad_revision_data['external'] = bad_results[2] | 2477 bad_revision_data['external'] = bad_results[2] |
2495 bad_revision_data['perf_time'] = bad_results[3] | 2478 bad_revision_data['perf_time'] = bad_results[3] |
2496 bad_revision_data['build_time'] = bad_results[4] | 2479 bad_revision_data['build_time'] = bad_results[4] |
2497 bad_revision_data['passed'] = False | 2480 bad_revision_data['passed'] = False |
2498 bad_revision_data['value'] = known_bad_value | 2481 bad_revision_data['value'] = known_bad_value |
2499 | 2482 |
2500 good_revision_data = revision_data[revision_list[max_revision]] | 2483 good_revision_data = revision_data[revision_list[max_revision]] |
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2978 self.max_time_minutes = 20 | 2961 self.max_time_minutes = 20 |
2979 self.metric = None | 2962 self.metric = None |
2980 self.command = None | 2963 self.command = None |
2981 self.output_buildbot_annotations = None | 2964 self.output_buildbot_annotations = None |
2982 self.no_custom_deps = False | 2965 self.no_custom_deps = False |
2983 self.working_directory = None | 2966 self.working_directory = None |
2984 self.extra_src = None | 2967 self.extra_src = None |
2985 self.debug_ignore_build = None | 2968 self.debug_ignore_build = None |
2986 self.debug_ignore_sync = None | 2969 self.debug_ignore_sync = None |
2987 self.debug_ignore_perf_test = None | 2970 self.debug_ignore_perf_test = None |
2988 self.debug_ignore_regression_confidence = None | |
2989 self.debug_fake_first_test_mean = 0 | 2971 self.debug_fake_first_test_mean = 0 |
2990 self.gs_bucket = None | 2972 self.gs_bucket = None |
2991 self.target_arch = 'ia32' | 2973 self.target_arch = 'ia32' |
2992 self.target_build_type = 'Release' | 2974 self.target_build_type = 'Release' |
2993 self.builder_host = None | 2975 self.builder_host = None |
2994 self.builder_port = None | 2976 self.builder_port = None |
2995 self.bisect_mode = BISECT_MODE_MEAN | 2977 self.bisect_mode = BISECT_MODE_MEAN |
2996 self.improvement_direction = 0 | 2978 self.improvement_direction = 0 |
2997 | 2979 |
2998 @staticmethod | 2980 @staticmethod |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3146 group = optparse.OptionGroup(parser, 'Debug options') | 3128 group = optparse.OptionGroup(parser, 'Debug options') |
3147 group.add_option('--debug_ignore_build', | 3129 group.add_option('--debug_ignore_build', |
3148 action='store_true', | 3130 action='store_true', |
3149 help='DEBUG: Don\'t perform builds.') | 3131 help='DEBUG: Don\'t perform builds.') |
3150 group.add_option('--debug_ignore_sync', | 3132 group.add_option('--debug_ignore_sync', |
3151 action='store_true', | 3133 action='store_true', |
3152 help='DEBUG: Don\'t perform syncs.') | 3134 help='DEBUG: Don\'t perform syncs.') |
3153 group.add_option('--debug_ignore_perf_test', | 3135 group.add_option('--debug_ignore_perf_test', |
3154 action='store_true', | 3136 action='store_true', |
3155 help='DEBUG: Don\'t perform performance tests.') | 3137 help='DEBUG: Don\'t perform performance tests.') |
3156 group.add_option('--debug_ignore_regression_confidence', | |
3157 action='store_true', | |
3158 help='DEBUG: Don\'t score the confidence of the initial ' | |
3159 'good and bad revisions\' test results.') | |
3160 group.add_option('--debug_fake_first_test_mean', | 3138 group.add_option('--debug_fake_first_test_mean', |
3161 type='int', | 3139 type='int', |
3162 default='0', | 3140 default='0', |
3163 help=('DEBUG: When faking performance tests, return this ' | 3141 help=('DEBUG: When faking performance tests, return this ' |
3164 'value as the mean of the first performance test, ' | 3142 'value as the mean of the first performance test, ' |
3165 'and return a mean of 0.0 for further tests.')) | 3143 'and return a mean of 0.0 for further tests.')) |
3166 parser.add_option_group(group) | 3144 parser.add_option_group(group) |
3167 return parser | 3145 return parser |
3168 | 3146 |
3169 def ParseCommandLine(self): | 3147 def ParseCommandLine(self): |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3311 # bugs. If you change this, please update the perf dashboard as well. | 3289 # bugs. If you change this, please update the perf dashboard as well. |
3312 bisect_utils.OutputAnnotationStepStart('Results') | 3290 bisect_utils.OutputAnnotationStepStart('Results') |
3313 print 'Error: %s' % e.message | 3291 print 'Error: %s' % e.message |
3314 if opts.output_buildbot_annotations: | 3292 if opts.output_buildbot_annotations: |
3315 bisect_utils.OutputAnnotationStepClosed() | 3293 bisect_utils.OutputAnnotationStepClosed() |
3316 return 1 | 3294 return 1 |
3317 | 3295 |
3318 | 3296 |
3319 if __name__ == '__main__': | 3297 if __name__ == '__main__': |
3320 sys.exit(main()) | 3298 sys.exit(main()) |
OLD | NEW |