Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 661393003: Revert of Requiring confidence in initial regression range before bisecting. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Performance Test Bisect Tool 6 """Performance Test Bisect Tool
7 7
8 This script bisects a series of changelists using binary search. It starts at 8 This script bisects a series of changelists using binary search. It starts at
9 a bad revision where a performance metric has regressed, and asks for a last 9 a bad revision where a performance metric has regressed, and asks for a last
10 known-good revision. It will then binary search across this revision range by 10 known-good revision. It will then binary search across this revision range by
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 import shutil 43 import shutil
44 import StringIO 44 import StringIO
45 import sys 45 import sys
46 import time 46 import time
47 import zipfile 47 import zipfile
48 48
49 sys.path.append(os.path.join( 49 sys.path.append(os.path.join(
50 os.path.dirname(__file__), os.path.pardir, 'telemetry')) 50 os.path.dirname(__file__), os.path.pardir, 'telemetry'))
51 51
52 from bisect_results import BisectResults 52 from bisect_results import BisectResults
53 from bisect_results import ConfidenceScore
54 import bisect_utils 53 import bisect_utils
55 import builder 54 import builder
56 import math_utils 55 import math_utils
57 import request_build 56 import request_build
58 import source_control 57 import source_control
59 from telemetry.util import cloud_storage 58 from telemetry.util import cloud_storage
60 59
61 # Below is the map of "depot" names to information about each depot. Each depot 60 # Below is the map of "depot" names to information about each depot. Each depot
62 # is a repository, and in the process of bisecting, revision ranges in these 61 # is a repository, and in the process of bisecting, revision ranges in these
63 # repositories may also be bisected. 62 # repositories may also be bisected.
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 162
164 # Maximum time in seconds to wait after posting build request to the try server. 163 # Maximum time in seconds to wait after posting build request to the try server.
165 # TODO: Change these values based on the actual time taken by buildbots on 164 # TODO: Change these values based on the actual time taken by buildbots on
166 # the try server. 165 # the try server.
167 MAX_MAC_BUILD_TIME = 14400 166 MAX_MAC_BUILD_TIME = 14400
168 MAX_WIN_BUILD_TIME = 14400 167 MAX_WIN_BUILD_TIME = 14400
169 MAX_LINUX_BUILD_TIME = 14400 168 MAX_LINUX_BUILD_TIME = 14400
170 169
171 # The percentage at which confidence is considered high. 170 # The percentage at which confidence is considered high.
172 HIGH_CONFIDENCE = 95 171 HIGH_CONFIDENCE = 95
173 # The confidence percentage we require to consider the initial range a
174 # regression based on the test results of the inital good and bad revisions.
175 REGRESSION_CONFIDENCE = 95
176 172
177 # Patch template to add a new file, DEPS.sha under src folder. 173 # Patch template to add a new file, DEPS.sha under src folder.
178 # This file contains SHA1 value of the DEPS changes made while bisecting 174 # This file contains SHA1 value of the DEPS changes made while bisecting
179 # dependency repositories. This patch send along with DEPS patch to try server. 175 # dependency repositories. This patch send along with DEPS patch to try server.
180 # When a build requested is posted with a patch, bisect builders on try server, 176 # When a build requested is posted with a patch, bisect builders on try server,
181 # once build is produced, it reads SHA value from this file and appends it 177 # once build is produced, it reads SHA value from this file and appends it
182 # to build archive filename. 178 # to build archive filename.
183 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha 179 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha
184 new file mode 100644 180 new file mode 100644
185 --- /dev/null 181 --- /dev/null
(...skipping 2282 matching lines...) Expand 10 before | Expand all | Expand 10 after
2468 else: 2464 else:
2469 message += "and the metric appears to have decreased. " 2465 message += "and the metric appears to have decreased. "
2470 if ((higher_is_better and metric_increased) or 2466 if ((higher_is_better and metric_increased) or
2471 (not higher_is_better and not metric_increased)): 2467 (not higher_is_better and not metric_increased)):
2472 results.error = (message + 'Then, the test results for the ends of ' 2468 results.error = (message + 'Then, the test results for the ends of '
2473 'the given \'good\' - \'bad\' range of revisions ' 2469 'the given \'good\' - \'bad\' range of revisions '
2474 'represent an improvement (and not a regression).') 2470 'represent an improvement (and not a regression).')
2475 return results 2471 return results
2476 print message, "Therefore we continue to bisect." 2472 print message, "Therefore we continue to bisect."
2477 2473
2478 # Check how likely it is that the good and bad results are different
2479 # beyond chance-induced variation.
2480 if not self.opts.debug_ignore_regression_confidence:
2481 regression_confidence = ConfidenceScore(known_bad_value['values'],
2482 known_good_value['values'])
2483 if regression_confidence < REGRESSION_CONFIDENCE:
2484 results.error = ('We could not reproduce the regression with this '
2485 'test/metric/platform combination with enough '
2486 'confidence. There\'s still a chance that this is '
2487 'actually a regression, but you may need to bisect '
2488 'a different platform.')
2489 return results
2490
2491 # Can just mark the good and bad revisions explicitly here since we 2474 # Can just mark the good and bad revisions explicitly here since we
2492 # already know the results. 2475 # already know the results.
2493 bad_revision_data = revision_data[revision_list[0]] 2476 bad_revision_data = revision_data[revision_list[0]]
2494 bad_revision_data['external'] = bad_results[2] 2477 bad_revision_data['external'] = bad_results[2]
2495 bad_revision_data['perf_time'] = bad_results[3] 2478 bad_revision_data['perf_time'] = bad_results[3]
2496 bad_revision_data['build_time'] = bad_results[4] 2479 bad_revision_data['build_time'] = bad_results[4]
2497 bad_revision_data['passed'] = False 2480 bad_revision_data['passed'] = False
2498 bad_revision_data['value'] = known_bad_value 2481 bad_revision_data['value'] = known_bad_value
2499 2482
2500 good_revision_data = revision_data[revision_list[max_revision]] 2483 good_revision_data = revision_data[revision_list[max_revision]]
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after
2978 self.max_time_minutes = 20 2961 self.max_time_minutes = 20
2979 self.metric = None 2962 self.metric = None
2980 self.command = None 2963 self.command = None
2981 self.output_buildbot_annotations = None 2964 self.output_buildbot_annotations = None
2982 self.no_custom_deps = False 2965 self.no_custom_deps = False
2983 self.working_directory = None 2966 self.working_directory = None
2984 self.extra_src = None 2967 self.extra_src = None
2985 self.debug_ignore_build = None 2968 self.debug_ignore_build = None
2986 self.debug_ignore_sync = None 2969 self.debug_ignore_sync = None
2987 self.debug_ignore_perf_test = None 2970 self.debug_ignore_perf_test = None
2988 self.debug_ignore_regression_confidence = None
2989 self.debug_fake_first_test_mean = 0 2971 self.debug_fake_first_test_mean = 0
2990 self.gs_bucket = None 2972 self.gs_bucket = None
2991 self.target_arch = 'ia32' 2973 self.target_arch = 'ia32'
2992 self.target_build_type = 'Release' 2974 self.target_build_type = 'Release'
2993 self.builder_host = None 2975 self.builder_host = None
2994 self.builder_port = None 2976 self.builder_port = None
2995 self.bisect_mode = BISECT_MODE_MEAN 2977 self.bisect_mode = BISECT_MODE_MEAN
2996 self.improvement_direction = 0 2978 self.improvement_direction = 0
2997 2979
2998 @staticmethod 2980 @staticmethod
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
3146 group = optparse.OptionGroup(parser, 'Debug options') 3128 group = optparse.OptionGroup(parser, 'Debug options')
3147 group.add_option('--debug_ignore_build', 3129 group.add_option('--debug_ignore_build',
3148 action='store_true', 3130 action='store_true',
3149 help='DEBUG: Don\'t perform builds.') 3131 help='DEBUG: Don\'t perform builds.')
3150 group.add_option('--debug_ignore_sync', 3132 group.add_option('--debug_ignore_sync',
3151 action='store_true', 3133 action='store_true',
3152 help='DEBUG: Don\'t perform syncs.') 3134 help='DEBUG: Don\'t perform syncs.')
3153 group.add_option('--debug_ignore_perf_test', 3135 group.add_option('--debug_ignore_perf_test',
3154 action='store_true', 3136 action='store_true',
3155 help='DEBUG: Don\'t perform performance tests.') 3137 help='DEBUG: Don\'t perform performance tests.')
3156 group.add_option('--debug_ignore_regression_confidence',
3157 action='store_true',
3158 help='DEBUG: Don\'t score the confidence of the initial '
3159 'good and bad revisions\' test results.')
3160 group.add_option('--debug_fake_first_test_mean', 3138 group.add_option('--debug_fake_first_test_mean',
3161 type='int', 3139 type='int',
3162 default='0', 3140 default='0',
3163 help=('DEBUG: When faking performance tests, return this ' 3141 help=('DEBUG: When faking performance tests, return this '
3164 'value as the mean of the first performance test, ' 3142 'value as the mean of the first performance test, '
3165 'and return a mean of 0.0 for further tests.')) 3143 'and return a mean of 0.0 for further tests.'))
3166 parser.add_option_group(group) 3144 parser.add_option_group(group)
3167 return parser 3145 return parser
3168 3146
3169 def ParseCommandLine(self): 3147 def ParseCommandLine(self):
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
3311 # bugs. If you change this, please update the perf dashboard as well. 3289 # bugs. If you change this, please update the perf dashboard as well.
3312 bisect_utils.OutputAnnotationStepStart('Results') 3290 bisect_utils.OutputAnnotationStepStart('Results')
3313 print 'Error: %s' % e.message 3291 print 'Error: %s' % e.message
3314 if opts.output_buildbot_annotations: 3292 if opts.output_buildbot_annotations:
3315 bisect_utils.OutputAnnotationStepClosed() 3293 bisect_utils.OutputAnnotationStepClosed()
3316 return 1 3294 return 1
3317 3295
3318 3296
3319 if __name__ == '__main__': 3297 if __name__ == '__main__':
3320 sys.exit(main()) 3298 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698