tools/auto_bisect/bisect_perf_regression.py - Issue 661393003: Revert of Requiring confidence in initial regression range before bisecting.

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 661393003: Revert of Requiring confidence in initial regression range before bisecting. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Performance Test Bisect Tool	6 """Performance Test Bisect Tool

7	7

8 This script bisects a series of changelists using binary search. It starts at	8 This script bisects a series of changelists using binary search. It starts at

9 a bad revision where a performance metric has regressed, and asks for a last	9 a bad revision where a performance metric has regressed, and asks for a last

10 known-good revision. It will then binary search across this revision range by	10 known-good revision. It will then binary search across this revision range by

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 import shutil	43 import shutil

44 import StringIO	44 import StringIO

45 import sys	45 import sys

46 import time	46 import time

47 import zipfile	47 import zipfile

48	48

49 sys.path.append(os.path.join(	49 sys.path.append(os.path.join(

50 os.path.dirname(__file__), os.path.pardir, 'telemetry'))	50 os.path.dirname(__file__), os.path.pardir, 'telemetry'))

51	51

52 from bisect_results import BisectResults	52 from bisect_results import BisectResults

53 from bisect_results import ConfidenceScore

54 import bisect_utils	53 import bisect_utils

55 import builder	54 import builder

56 import math_utils	55 import math_utils

57 import request_build	56 import request_build

58 import source_control	57 import source_control

59 from telemetry.util import cloud_storage	58 from telemetry.util import cloud_storage

60	59

61 # Below is the map of "depot" names to information about each depot. Each depot	60 # Below is the map of "depot" names to information about each depot. Each depot

62 # is a repository, and in the process of bisecting, revision ranges in these	61 # is a repository, and in the process of bisecting, revision ranges in these

63 # repositories may also be bisected.	62 # repositories may also be bisected.

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
163	162

164 # Maximum time in seconds to wait after posting build request to the try server.	163 # Maximum time in seconds to wait after posting build request to the try server.

165 # TODO: Change these values based on the actual time taken by buildbots on	164 # TODO: Change these values based on the actual time taken by buildbots on

166 # the try server.	165 # the try server.

167 MAX_MAC_BUILD_TIME = 14400	166 MAX_MAC_BUILD_TIME = 14400

168 MAX_WIN_BUILD_TIME = 14400	167 MAX_WIN_BUILD_TIME = 14400

169 MAX_LINUX_BUILD_TIME = 14400	168 MAX_LINUX_BUILD_TIME = 14400

170	169

171 # The percentage at which confidence is considered high.	170 # The percentage at which confidence is considered high.

172 HIGH_CONFIDENCE = 95	171 HIGH_CONFIDENCE = 95

173 # The confidence percentage we require to consider the initial range a

174 # regression based on the test results of the inital good and bad revisions.

175 REGRESSION_CONFIDENCE = 95

176	172

177 # Patch template to add a new file, DEPS.sha under src folder.	173 # Patch template to add a new file, DEPS.sha under src folder.

178 # This file contains SHA1 value of the DEPS changes made while bisecting	174 # This file contains SHA1 value of the DEPS changes made while bisecting

179 # dependency repositories. This patch send along with DEPS patch to try server.	175 # dependency repositories. This patch send along with DEPS patch to try server.

180 # When a build requested is posted with a patch, bisect builders on try server,	176 # When a build requested is posted with a patch, bisect builders on try server,

181 # once build is produced, it reads SHA value from this file and appends it	177 # once build is produced, it reads SHA value from this file and appends it

182 # to build archive filename.	178 # to build archive filename.

183 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha	179 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha

184 new file mode 100644	180 new file mode 100644

185 --- /dev/null	181 --- /dev/null

(...skipping 2282 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2468 else:	2464 else:

2469 message += "and the metric appears to have decreased. "	2465 message += "and the metric appears to have decreased. "

2470 if ((higher_is_better and metric_increased) or	2466 if ((higher_is_better and metric_increased) or

2471 (not higher_is_better and not metric_increased)):	2467 (not higher_is_better and not metric_increased)):

2472 results.error = (message + 'Then, the test results for the ends of '	2468 results.error = (message + 'Then, the test results for the ends of '

2473 'the given \'good\' - \'bad\' range of revisions '	2469 'the given \'good\' - \'bad\' range of revisions '

2474 'represent an improvement (and not a regression).')	2470 'represent an improvement (and not a regression).')

2475 return results	2471 return results

2476 print message, "Therefore we continue to bisect."	2472 print message, "Therefore we continue to bisect."

2477	2473

2478 # Check how likely it is that the good and bad results are different

2479 # beyond chance-induced variation.

2480 if not self.opts.debug_ignore_regression_confidence:

2481 regression_confidence = ConfidenceScore(known_bad_value['values'],

2482 known_good_value['values'])

2483 if regression_confidence < REGRESSION_CONFIDENCE:

2484 results.error = ('We could not reproduce the regression with this '

2485 'test/metric/platform combination with enough '

2486 'confidence. There\'s still a chance that this is '

2487 'actually a regression, but you may need to bisect '

2488 'a different platform.')

2489 return results

2490

2491 # Can just mark the good and bad revisions explicitly here since we	2474 # Can just mark the good and bad revisions explicitly here since we

2492 # already know the results.	2475 # already know the results.

2493 bad_revision_data = revision_data[revision_list[0]]	2476 bad_revision_data = revision_data[revision_list[0]]

2494 bad_revision_data['external'] = bad_results[2]	2477 bad_revision_data['external'] = bad_results[2]

2495 bad_revision_data['perf_time'] = bad_results[3]	2478 bad_revision_data['perf_time'] = bad_results[3]

2496 bad_revision_data['build_time'] = bad_results[4]	2479 bad_revision_data['build_time'] = bad_results[4]

2497 bad_revision_data['passed'] = False	2480 bad_revision_data['passed'] = False

2498 bad_revision_data['value'] = known_bad_value	2481 bad_revision_data['value'] = known_bad_value

2499	2482

2500 good_revision_data = revision_data[revision_list[max_revision]]	2483 good_revision_data = revision_data[revision_list[max_revision]]

(...skipping 477 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2978 self.max_time_minutes = 20	2961 self.max_time_minutes = 20

2979 self.metric = None	2962 self.metric = None

2980 self.command = None	2963 self.command = None

2981 self.output_buildbot_annotations = None	2964 self.output_buildbot_annotations = None

2982 self.no_custom_deps = False	2965 self.no_custom_deps = False

2983 self.working_directory = None	2966 self.working_directory = None

2984 self.extra_src = None	2967 self.extra_src = None

2985 self.debug_ignore_build = None	2968 self.debug_ignore_build = None

2986 self.debug_ignore_sync = None	2969 self.debug_ignore_sync = None

2987 self.debug_ignore_perf_test = None	2970 self.debug_ignore_perf_test = None

2988 self.debug_ignore_regression_confidence = None

2989 self.debug_fake_first_test_mean = 0	2971 self.debug_fake_first_test_mean = 0

2990 self.gs_bucket = None	2972 self.gs_bucket = None

2991 self.target_arch = 'ia32'	2973 self.target_arch = 'ia32'

2992 self.target_build_type = 'Release'	2974 self.target_build_type = 'Release'

2993 self.builder_host = None	2975 self.builder_host = None

2994 self.builder_port = None	2976 self.builder_port = None

2995 self.bisect_mode = BISECT_MODE_MEAN	2977 self.bisect_mode = BISECT_MODE_MEAN

2996 self.improvement_direction = 0	2978 self.improvement_direction = 0

2997	2979

2998 @staticmethod	2980 @staticmethod

(...skipping 147 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3146 group = optparse.OptionGroup(parser, 'Debug options')	3128 group = optparse.OptionGroup(parser, 'Debug options')

3147 group.add_option('--debug_ignore_build',	3129 group.add_option('--debug_ignore_build',

3148 action='store_true',	3130 action='store_true',

3149 help='DEBUG: Don\'t perform builds.')	3131 help='DEBUG: Don\'t perform builds.')

3150 group.add_option('--debug_ignore_sync',	3132 group.add_option('--debug_ignore_sync',

3151 action='store_true',	3133 action='store_true',

3152 help='DEBUG: Don\'t perform syncs.')	3134 help='DEBUG: Don\'t perform syncs.')

3153 group.add_option('--debug_ignore_perf_test',	3135 group.add_option('--debug_ignore_perf_test',

3154 action='store_true',	3136 action='store_true',

3155 help='DEBUG: Don\'t perform performance tests.')	3137 help='DEBUG: Don\'t perform performance tests.')

3156 group.add_option('--debug_ignore_regression_confidence',

3157 action='store_true',

3158 help='DEBUG: Don\'t score the confidence of the initial '

3159 'good and bad revisions\' test results.')

3160 group.add_option('--debug_fake_first_test_mean',	3138 group.add_option('--debug_fake_first_test_mean',

3161 type='int',	3139 type='int',

3162 default='0',	3140 default='0',

3163 help=('DEBUG: When faking performance tests, return this '	3141 help=('DEBUG: When faking performance tests, return this '

3164 'value as the mean of the first performance test, '	3142 'value as the mean of the first performance test, '

3165 'and return a mean of 0.0 for further tests.'))	3143 'and return a mean of 0.0 for further tests.'))

3166 parser.add_option_group(group)	3144 parser.add_option_group(group)

3167 return parser	3145 return parser

3168	3146

3169 def ParseCommandLine(self):	3147 def ParseCommandLine(self):

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3311 # bugs. If you change this, please update the perf dashboard as well.	3289 # bugs. If you change this, please update the perf dashboard as well.

3312 bisect_utils.OutputAnnotationStepStart('Results')	3290 bisect_utils.OutputAnnotationStepStart('Results')

3313 print 'Error: %s' % e.message	3291 print 'Error: %s' % e.message

3314 if opts.output_buildbot_annotations:	3292 if opts.output_buildbot_annotations:

3315 bisect_utils.OutputAnnotationStepClosed()	3293 bisect_utils.OutputAnnotationStepClosed()

3316 return 1	3294 return 1

3317	3295

3318	3296

3319 if __name__ == '__main__':	3297 if __name__ == '__main__':

3320 sys.exit(main())	3298 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »