tools/auto_bisect/bisect_perf_regression.py - Issue 644323002: Requiring confidence in initial regression range before bisecting.

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 644323002: Requiring confidence in initial regression range before bisecting. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebasing. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Performance Test Bisect Tool	6 """Performance Test Bisect Tool

7	7

8 This script bisects a series of changelists using binary search. It starts at	8 This script bisects a series of changelists using binary search. It starts at

9 a bad revision where a performance metric has regressed, and asks for a last	9 a bad revision where a performance metric has regressed, and asks for a last

10 known-good revision. It will then binary search across this revision range by	10 known-good revision. It will then binary search across this revision range by

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 import shutil	43 import shutil

44 import StringIO	44 import StringIO

45 import sys	45 import sys

46 import time	46 import time

47 import zipfile	47 import zipfile

48	48

49 sys.path.append(os.path.join(	49 sys.path.append(os.path.join(

50 os.path.dirname(__file__), os.path.pardir, 'telemetry'))	50 os.path.dirname(__file__), os.path.pardir, 'telemetry'))

51	51

52 from bisect_results import BisectResults	52 from bisect_results import BisectResults

	53 from bisect_results import ConfidenceScore

53 import bisect_utils	54 import bisect_utils

54 import builder	55 import builder

55 import math_utils	56 import math_utils

56 import request_build	57 import request_build

57 import source_control	58 import source_control

58 from telemetry.util import cloud_storage	59 from telemetry.util import cloud_storage

59	60

60 # Below is the map of "depot" names to information about each depot. Each depot	61 # Below is the map of "depot" names to information about each depot. Each depot

61 # is a repository, and in the process of bisecting, revision ranges in these	62 # is a repository, and in the process of bisecting, revision ranges in these

62 # repositories may also be bisected.	63 # repositories may also be bisected.

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
162	163

163 # Maximum time in seconds to wait after posting build request to the try server.	164 # Maximum time in seconds to wait after posting build request to the try server.

164 # TODO: Change these values based on the actual time taken by buildbots on	165 # TODO: Change these values based on the actual time taken by buildbots on

165 # the try server.	166 # the try server.

166 MAX_MAC_BUILD_TIME = 14400	167 MAX_MAC_BUILD_TIME = 14400

167 MAX_WIN_BUILD_TIME = 14400	168 MAX_WIN_BUILD_TIME = 14400

168 MAX_LINUX_BUILD_TIME = 14400	169 MAX_LINUX_BUILD_TIME = 14400

169	170

170 # The percentage at which confidence is considered high.	171 # The percentage at which confidence is considered high.

171 HIGH_CONFIDENCE = 95	172 HIGH_CONFIDENCE = 95

	173 # The confidence percentage we require to consider the initial range a

	174 # regression based on the test results of the inital good and bad revisions.

	175 REGRESSION_CONFIDENCE = 95

172	176

173 # Patch template to add a new file, DEPS.sha under src folder.	177 # Patch template to add a new file, DEPS.sha under src folder.

174 # This file contains SHA1 value of the DEPS changes made while bisecting	178 # This file contains SHA1 value of the DEPS changes made while bisecting

175 # dependency repositories. This patch send along with DEPS patch to try server.	179 # dependency repositories. This patch send along with DEPS patch to try server.

176 # When a build requested is posted with a patch, bisect builders on try server,	180 # When a build requested is posted with a patch, bisect builders on try server,

177 # once build is produced, it reads SHA value from this file and appends it	181 # once build is produced, it reads SHA value from this file and appends it

178 # to build archive filename.	182 # to build archive filename.

179 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha	183 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha

180 new file mode 100644	184 new file mode 100644

181 --- /dev/null	185 --- /dev/null

(...skipping 2282 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2464 else:	2468 else:

2465 message += "and the metric appears to have decreased. "	2469 message += "and the metric appears to have decreased. "

2466 if ((higher_is_better and metric_increased) or	2470 if ((higher_is_better and metric_increased) or

2467 (not higher_is_better and not metric_increased)):	2471 (not higher_is_better and not metric_increased)):

2468 results.error = (message + 'Then, the test results for the ends of '	2472 results.error = (message + 'Then, the test results for the ends of '

2469 'the given \'good\' - \'bad\' range of revisions '	2473 'the given \'good\' - \'bad\' range of revisions '

2470 'represent an improvement (and not a regression).')	2474 'represent an improvement (and not a regression).')

2471 return results	2475 return results

2472 print message, "Therefore we continue to bisect."	2476 print message, "Therefore we continue to bisect."

2473	2477

	2478 # Check how likely it is that the good and bad results are different

	2479 # beyond chance-induced variation.

	2480 if not self.opts.debug_ignore_regression_confidence:

	2481 regression_confidence = ConfidenceScore(known_bad_value['values'],

	2482 known_good_value['values'])

	2483 if regression_confidence < REGRESSION_CONFIDENCE:

	2484 results.error = ('We could not reproduce the regression with this '

	2485 'test/metric/platform combination with enough '

	2486 'confidence. There\'s still a chance that this is '

	2487 'actually a regression, but you may need to bisect '

	2488 'a different platform.')

	2489 return results

	2490

2474 # Can just mark the good and bad revisions explicitly here since we	2491 # Can just mark the good and bad revisions explicitly here since we

2475 # already know the results.	2492 # already know the results.

2476 bad_revision_data = revision_data[revision_list[0]]	2493 bad_revision_data = revision_data[revision_list[0]]

2477 bad_revision_data['external'] = bad_results[2]	2494 bad_revision_data['external'] = bad_results[2]

2478 bad_revision_data['perf_time'] = bad_results[3]	2495 bad_revision_data['perf_time'] = bad_results[3]

2479 bad_revision_data['build_time'] = bad_results[4]	2496 bad_revision_data['build_time'] = bad_results[4]

2480 bad_revision_data['passed'] = False	2497 bad_revision_data['passed'] = False

2481 bad_revision_data['value'] = known_bad_value	2498 bad_revision_data['value'] = known_bad_value

2482	2499

2483 good_revision_data = revision_data[revision_list[max_revision]]	2500 good_revision_data = revision_data[revision_list[max_revision]]

(...skipping 477 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2961 self.max_time_minutes = 20	2978 self.max_time_minutes = 20

2962 self.metric = None	2979 self.metric = None

2963 self.command = None	2980 self.command = None

2964 self.output_buildbot_annotations = None	2981 self.output_buildbot_annotations = None

2965 self.no_custom_deps = False	2982 self.no_custom_deps = False

2966 self.working_directory = None	2983 self.working_directory = None

2967 self.extra_src = None	2984 self.extra_src = None

2968 self.debug_ignore_build = None	2985 self.debug_ignore_build = None

2969 self.debug_ignore_sync = None	2986 self.debug_ignore_sync = None

2970 self.debug_ignore_perf_test = None	2987 self.debug_ignore_perf_test = None

	2988 self.debug_ignore_regression_confidence = None

2971 self.debug_fake_first_test_mean = 0	2989 self.debug_fake_first_test_mean = 0

2972 self.gs_bucket = None	2990 self.gs_bucket = None

2973 self.target_arch = 'ia32'	2991 self.target_arch = 'ia32'

2974 self.target_build_type = 'Release'	2992 self.target_build_type = 'Release'

2975 self.builder_host = None	2993 self.builder_host = None

2976 self.builder_port = None	2994 self.builder_port = None

2977 self.bisect_mode = BISECT_MODE_MEAN	2995 self.bisect_mode = BISECT_MODE_MEAN

2978 self.improvement_direction = 0	2996 self.improvement_direction = 0

2979	2997

2980 @staticmethod	2998 @staticmethod

(...skipping 147 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3128 group = optparse.OptionGroup(parser, 'Debug options')	3146 group = optparse.OptionGroup(parser, 'Debug options')

3129 group.add_option('--debug_ignore_build',	3147 group.add_option('--debug_ignore_build',

3130 action='store_true',	3148 action='store_true',

3131 help='DEBUG: Don\'t perform builds.')	3149 help='DEBUG: Don\'t perform builds.')

3132 group.add_option('--debug_ignore_sync',	3150 group.add_option('--debug_ignore_sync',

3133 action='store_true',	3151 action='store_true',

3134 help='DEBUG: Don\'t perform syncs.')	3152 help='DEBUG: Don\'t perform syncs.')

3135 group.add_option('--debug_ignore_perf_test',	3153 group.add_option('--debug_ignore_perf_test',

3136 action='store_true',	3154 action='store_true',

3137 help='DEBUG: Don\'t perform performance tests.')	3155 help='DEBUG: Don\'t perform performance tests.')

	3156 group.add_option('--debug_ignore_regression_confidence',

	3157 action='store_true',

	3158 help='DEBUG: Don\'t score the confidence of the initial '

	3159 'good and bad revisions\' test results.')

3138 group.add_option('--debug_fake_first_test_mean',	3160 group.add_option('--debug_fake_first_test_mean',

3139 type='int',	3161 type='int',

3140 default='0',	3162 default='0',

3141 help=('DEBUG: When faking performance tests, return this '	3163 help=('DEBUG: When faking performance tests, return this '

3142 'value as the mean of the first performance test, '	3164 'value as the mean of the first performance test, '

3143 'and return a mean of 0.0 for further tests.'))	3165 'and return a mean of 0.0 for further tests.'))

3144 parser.add_option_group(group)	3166 parser.add_option_group(group)

3145 return parser	3167 return parser

3146	3168

3147 def ParseCommandLine(self):	3169 def ParseCommandLine(self):

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3289 # bugs. If you change this, please update the perf dashboard as well.	3311 # bugs. If you change this, please update the perf dashboard as well.

3290 bisect_utils.OutputAnnotationStepStart('Results')	3312 bisect_utils.OutputAnnotationStepStart('Results')

3291 print 'Error: %s' % e.message	3313 print 'Error: %s' % e.message

3292 if opts.output_buildbot_annotations:	3314 if opts.output_buildbot_annotations:

3293 bisect_utils.OutputAnnotationStepClosed()	3315 bisect_utils.OutputAnnotationStepClosed()

3294 return 1	3316 return 1

3295	3317

3296	3318

3297 if __name__ == '__main__':	3319 if __name__ == '__main__':

3298 sys.exit(main())	3320 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »