| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
| 7 | 7 |
| 8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
| 9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
| 10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 43 import shutil | 43 import shutil |
| 44 import StringIO | 44 import StringIO |
| 45 import sys | 45 import sys |
| 46 import time | 46 import time |
| 47 import zipfile | 47 import zipfile |
| 48 | 48 |
| 49 sys.path.append(os.path.join( | 49 sys.path.append(os.path.join( |
| 50 os.path.dirname(__file__), os.path.pardir, 'telemetry')) | 50 os.path.dirname(__file__), os.path.pardir, 'telemetry')) |
| 51 | 51 |
| 52 from bisect_results import BisectResults | 52 from bisect_results import BisectResults |
| 53 from bisect_results import ConfidenceScore | |
| 54 import bisect_utils | 53 import bisect_utils |
| 55 import builder | 54 import builder |
| 56 import math_utils | 55 import math_utils |
| 57 import request_build | 56 import request_build |
| 58 import source_control | 57 import source_control |
| 59 from telemetry.util import cloud_storage | 58 from telemetry.util import cloud_storage |
| 60 | 59 |
| 61 # Below is the map of "depot" names to information about each depot. Each depot | 60 # Below is the map of "depot" names to information about each depot. Each depot |
| 62 # is a repository, and in the process of bisecting, revision ranges in these | 61 # is a repository, and in the process of bisecting, revision ranges in these |
| 63 # repositories may also be bisected. | 62 # repositories may also be bisected. |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 | 162 |
| 164 # Maximum time in seconds to wait after posting build request to the try server. | 163 # Maximum time in seconds to wait after posting build request to the try server. |
| 165 # TODO: Change these values based on the actual time taken by buildbots on | 164 # TODO: Change these values based on the actual time taken by buildbots on |
| 166 # the try server. | 165 # the try server. |
| 167 MAX_MAC_BUILD_TIME = 14400 | 166 MAX_MAC_BUILD_TIME = 14400 |
| 168 MAX_WIN_BUILD_TIME = 14400 | 167 MAX_WIN_BUILD_TIME = 14400 |
| 169 MAX_LINUX_BUILD_TIME = 14400 | 168 MAX_LINUX_BUILD_TIME = 14400 |
| 170 | 169 |
| 171 # The percentage at which confidence is considered high. | 170 # The percentage at which confidence is considered high. |
| 172 HIGH_CONFIDENCE = 95 | 171 HIGH_CONFIDENCE = 95 |
| 173 # The confidence percentage we require to consider the initial range a | |
| 174 # regression based on the test results of the inital good and bad revisions. | |
| 175 REGRESSION_CONFIDENCE = 95 | |
| 176 | 172 |
| 177 # Patch template to add a new file, DEPS.sha under src folder. | 173 # Patch template to add a new file, DEPS.sha under src folder. |
| 178 # This file contains SHA1 value of the DEPS changes made while bisecting | 174 # This file contains SHA1 value of the DEPS changes made while bisecting |
| 179 # dependency repositories. This patch send along with DEPS patch to try server. | 175 # dependency repositories. This patch send along with DEPS patch to try server. |
| 180 # When a build requested is posted with a patch, bisect builders on try server, | 176 # When a build requested is posted with a patch, bisect builders on try server, |
| 181 # once build is produced, it reads SHA value from this file and appends it | 177 # once build is produced, it reads SHA value from this file and appends it |
| 182 # to build archive filename. | 178 # to build archive filename. |
| 183 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 179 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
| 184 new file mode 100644 | 180 new file mode 100644 |
| 185 --- /dev/null | 181 --- /dev/null |
| (...skipping 2282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2468 else: | 2464 else: |
| 2469 message += "and the metric appears to have decreased. " | 2465 message += "and the metric appears to have decreased. " |
| 2470 if ((higher_is_better and metric_increased) or | 2466 if ((higher_is_better and metric_increased) or |
| 2471 (not higher_is_better and not metric_increased)): | 2467 (not higher_is_better and not metric_increased)): |
| 2472 results.error = (message + 'Then, the test results for the ends of ' | 2468 results.error = (message + 'Then, the test results for the ends of ' |
| 2473 'the given \'good\' - \'bad\' range of revisions ' | 2469 'the given \'good\' - \'bad\' range of revisions ' |
| 2474 'represent an improvement (and not a regression).') | 2470 'represent an improvement (and not a regression).') |
| 2475 return results | 2471 return results |
| 2476 print message, "Therefore we continue to bisect." | 2472 print message, "Therefore we continue to bisect." |
| 2477 | 2473 |
| 2478 # Check how likely it is that the good and bad results are different | |
| 2479 # beyond chance-induced variation. | |
| 2480 if not self.opts.debug_ignore_regression_confidence: | |
| 2481 regression_confidence = ConfidenceScore(known_bad_value['values'], | |
| 2482 known_good_value['values']) | |
| 2483 if regression_confidence < REGRESSION_CONFIDENCE: | |
| 2484 results.error = ('We could not reproduce the regression with this ' | |
| 2485 'test/metric/platform combination with enough ' | |
| 2486 'confidence. There\'s still a chance that this is ' | |
| 2487 'actually a regression, but you may need to bisect ' | |
| 2488 'a different platform.') | |
| 2489 return results | |
| 2490 | |
| 2491 # Can just mark the good and bad revisions explicitly here since we | 2474 # Can just mark the good and bad revisions explicitly here since we |
| 2492 # already know the results. | 2475 # already know the results. |
| 2493 bad_revision_data = revision_data[revision_list[0]] | 2476 bad_revision_data = revision_data[revision_list[0]] |
| 2494 bad_revision_data['external'] = bad_results[2] | 2477 bad_revision_data['external'] = bad_results[2] |
| 2495 bad_revision_data['perf_time'] = bad_results[3] | 2478 bad_revision_data['perf_time'] = bad_results[3] |
| 2496 bad_revision_data['build_time'] = bad_results[4] | 2479 bad_revision_data['build_time'] = bad_results[4] |
| 2497 bad_revision_data['passed'] = False | 2480 bad_revision_data['passed'] = False |
| 2498 bad_revision_data['value'] = known_bad_value | 2481 bad_revision_data['value'] = known_bad_value |
| 2499 | 2482 |
| 2500 good_revision_data = revision_data[revision_list[max_revision]] | 2483 good_revision_data = revision_data[revision_list[max_revision]] |
| (...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2978 self.max_time_minutes = 20 | 2961 self.max_time_minutes = 20 |
| 2979 self.metric = None | 2962 self.metric = None |
| 2980 self.command = None | 2963 self.command = None |
| 2981 self.output_buildbot_annotations = None | 2964 self.output_buildbot_annotations = None |
| 2982 self.no_custom_deps = False | 2965 self.no_custom_deps = False |
| 2983 self.working_directory = None | 2966 self.working_directory = None |
| 2984 self.extra_src = None | 2967 self.extra_src = None |
| 2985 self.debug_ignore_build = None | 2968 self.debug_ignore_build = None |
| 2986 self.debug_ignore_sync = None | 2969 self.debug_ignore_sync = None |
| 2987 self.debug_ignore_perf_test = None | 2970 self.debug_ignore_perf_test = None |
| 2988 self.debug_ignore_regression_confidence = None | |
| 2989 self.debug_fake_first_test_mean = 0 | 2971 self.debug_fake_first_test_mean = 0 |
| 2990 self.gs_bucket = None | 2972 self.gs_bucket = None |
| 2991 self.target_arch = 'ia32' | 2973 self.target_arch = 'ia32' |
| 2992 self.target_build_type = 'Release' | 2974 self.target_build_type = 'Release' |
| 2993 self.builder_host = None | 2975 self.builder_host = None |
| 2994 self.builder_port = None | 2976 self.builder_port = None |
| 2995 self.bisect_mode = BISECT_MODE_MEAN | 2977 self.bisect_mode = BISECT_MODE_MEAN |
| 2996 self.improvement_direction = 0 | 2978 self.improvement_direction = 0 |
| 2997 | 2979 |
| 2998 @staticmethod | 2980 @staticmethod |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3146 group = optparse.OptionGroup(parser, 'Debug options') | 3128 group = optparse.OptionGroup(parser, 'Debug options') |
| 3147 group.add_option('--debug_ignore_build', | 3129 group.add_option('--debug_ignore_build', |
| 3148 action='store_true', | 3130 action='store_true', |
| 3149 help='DEBUG: Don\'t perform builds.') | 3131 help='DEBUG: Don\'t perform builds.') |
| 3150 group.add_option('--debug_ignore_sync', | 3132 group.add_option('--debug_ignore_sync', |
| 3151 action='store_true', | 3133 action='store_true', |
| 3152 help='DEBUG: Don\'t perform syncs.') | 3134 help='DEBUG: Don\'t perform syncs.') |
| 3153 group.add_option('--debug_ignore_perf_test', | 3135 group.add_option('--debug_ignore_perf_test', |
| 3154 action='store_true', | 3136 action='store_true', |
| 3155 help='DEBUG: Don\'t perform performance tests.') | 3137 help='DEBUG: Don\'t perform performance tests.') |
| 3156 group.add_option('--debug_ignore_regression_confidence', | |
| 3157 action='store_true', | |
| 3158 help='DEBUG: Don\'t score the confidence of the initial ' | |
| 3159 'good and bad revisions\' test results.') | |
| 3160 group.add_option('--debug_fake_first_test_mean', | 3138 group.add_option('--debug_fake_first_test_mean', |
| 3161 type='int', | 3139 type='int', |
| 3162 default='0', | 3140 default='0', |
| 3163 help=('DEBUG: When faking performance tests, return this ' | 3141 help=('DEBUG: When faking performance tests, return this ' |
| 3164 'value as the mean of the first performance test, ' | 3142 'value as the mean of the first performance test, ' |
| 3165 'and return a mean of 0.0 for further tests.')) | 3143 'and return a mean of 0.0 for further tests.')) |
| 3166 parser.add_option_group(group) | 3144 parser.add_option_group(group) |
| 3167 return parser | 3145 return parser |
| 3168 | 3146 |
| 3169 def ParseCommandLine(self): | 3147 def ParseCommandLine(self): |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3311 # bugs. If you change this, please update the perf dashboard as well. | 3289 # bugs. If you change this, please update the perf dashboard as well. |
| 3312 bisect_utils.OutputAnnotationStepStart('Results') | 3290 bisect_utils.OutputAnnotationStepStart('Results') |
| 3313 print 'Error: %s' % e.message | 3291 print 'Error: %s' % e.message |
| 3314 if opts.output_buildbot_annotations: | 3292 if opts.output_buildbot_annotations: |
| 3315 bisect_utils.OutputAnnotationStepClosed() | 3293 bisect_utils.OutputAnnotationStepClosed() |
| 3316 return 1 | 3294 return 1 |
| 3317 | 3295 |
| 3318 | 3296 |
| 3319 if __name__ == '__main__': | 3297 if __name__ == '__main__': |
| 3320 sys.exit(main()) | 3298 sys.exit(main()) |
| OLD | NEW |