Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(509)

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing comments Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Performance Test Bisect Tool 6 """Performance Test Bisect Tool
7 7
8 This script bisects a series of changelists using binary search. It starts at 8 This script bisects a series of changelists using binary search. It starts at
9 a bad revision where a performance metric has regressed, and asks for a last 9 a bad revision where a performance metric has regressed, and asks for a last
10 known-good revision. It will then binary search across this revision range by 10 known-good revision. It will then binary search across this revision range by
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 BUILD_RESULT_FAIL = 1 68 BUILD_RESULT_FAIL = 1
69 BUILD_RESULT_SKIPPED = 2 69 BUILD_RESULT_SKIPPED = 2
70 70
71 # Maximum time in seconds to wait after posting build request to the try server. 71 # Maximum time in seconds to wait after posting build request to the try server.
72 # TODO: Change these values based on the actual time taken by buildbots on 72 # TODO: Change these values based on the actual time taken by buildbots on
73 # the try server. 73 # the try server.
74 MAX_MAC_BUILD_TIME = 14400 74 MAX_MAC_BUILD_TIME = 14400
75 MAX_WIN_BUILD_TIME = 14400 75 MAX_WIN_BUILD_TIME = 14400
76 MAX_LINUX_BUILD_TIME = 14400 76 MAX_LINUX_BUILD_TIME = 14400
77 77
78 # The confidence percentage we require to consider the initial range a
79 # regression based on the test results of the inital good and bad revisions.
80 REGRESSION_CONFIDENCE = 95
81
78 # Patch template to add a new file, DEPS.sha under src folder. 82 # Patch template to add a new file, DEPS.sha under src folder.
79 # This file contains SHA1 value of the DEPS changes made while bisecting 83 # This file contains SHA1 value of the DEPS changes made while bisecting
80 # dependency repositories. This patch send along with DEPS patch to try server. 84 # dependency repositories. This patch send along with DEPS patch to try server.
81 # When a build requested is posted with a patch, bisect builders on try server, 85 # When a build requested is posted with a patch, bisect builders on try server,
82 # once build is produced, it reads SHA value from this file and appends it 86 # once build is produced, it reads SHA value from this file and appends it
83 # to build archive filename. 87 # to build archive filename.
84 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha 88 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha
85 new file mode 100644 89 new file mode 100644
86 --- /dev/null 90 --- /dev/null
87 +++ DEPS.sha 91 +++ DEPS.sha
88 @@ -0,0 +1 @@ 92 @@ -0,0 +1 @@
89 +%(deps_sha)s 93 +%(deps_sha)s
90 """ 94 """
91 95
96 REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """
97 We could not reproduce the regression with this test/metric/platform combination
98 with enough confidence.
99
100 Here are the results for the initial revision range:
101 'Good' revision: {good_rev}
102 \tmean: {good_mean}
103 \tstd.err.:{good_std_err}
104 \tsample size:{good_sample_size}
105 'Bad' revision: {bad_rev}
106 \tmean: {bad_mean}
107 \tstd.err.:{bad_std_err}
108 \tsample size:{bad_sample_size}
109
110 NOTE: There's still a chance that this is actually a regression, but you may
111 need to bisect a different platform."""
112
92 # Git branch name used to run bisect try jobs. 113 # Git branch name used to run bisect try jobs.
93 BISECT_TRYJOB_BRANCH = 'bisect-tryjob' 114 BISECT_TRYJOB_BRANCH = 'bisect-tryjob'
94 # Git master branch name. 115 # Git master branch name.
95 BISECT_MASTER_BRANCH = 'master' 116 BISECT_MASTER_BRANCH = 'master'
96 # File to store 'git diff' content. 117 # File to store 'git diff' content.
97 BISECT_PATCH_FILE = 'deps_patch.txt' 118 BISECT_PATCH_FILE = 'deps_patch.txt'
98 # SVN repo where the bisect try jobs are submitted. 119 # SVN repo where the bisect try jobs are submitted.
99 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf' 120 SVN_REPO_URL = 'svn://svn.chromium.org/chrome-try/try-perf'
100 121
101 class RunGitError(Exception): 122 class RunGitError(Exception):
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after
582 603
583 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'): 604 if arg_dict.has_key('--profile-dir') and arg_dict.has_key('--browser'):
584 profile_path, profile_type = os.path.split(arg_dict['--profile-dir']) 605 profile_path, profile_type = os.path.split(arg_dict['--profile-dir'])
585 return not bisect_utils.RunProcess(['python', path_to_generate, 606 return not bisect_utils.RunProcess(['python', path_to_generate,
586 '--profile-type-to-generate', profile_type, 607 '--profile-type-to-generate', profile_type,
587 '--browser', arg_dict['--browser'], '--output-dir', profile_path]) 608 '--browser', arg_dict['--browser'], '--output-dir', profile_path])
588 return False 609 return False
589 return True 610 return True
590 611
591 612
613 def _CheckRegressionConfidenceError(
614 good_revision,
615 bad_revision,
616 known_good_value,
617 known_bad_value):
618 """Checks whether we can be confident beyond a certain degree that the given
619 metrics represent a regression.
620
621 Args:
622 good_revision: string representing the commit considered 'good'
623 bad_revision: Same as above for 'bad'.
624 known_good_value: A dict with at least: 'values', 'mean' and 'std_err'
625 known_bad_value: Same as above.
626
627 Returns:
628 False if there is no error (i.e. we can be confident there's a regressioni),
629 a string containing the details of the lack of confidence otherwise.
630 """
631 error = False
632 # Adding good and bad values to a parameter list.
633 confidenceParams = []
634 for l in [known_bad_value['values'], known_good_value['values']]:
635 # Flatten if needed
636 if isinstance(l, list) and all([isinstance(x, list) for x in l]):
637 confidenceParams.append(sum(l, []))
638 else:
639 confidenceParams.append(l)
640 regression_confidence = BisectResults.ConfidenceScore(*confidenceParams)
641 if regression_confidence < REGRESSION_CONFIDENCE:
642 error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format(
643 good_rev=good_revision,
644 good_mean=known_good_value['mean'],
645 good_std_err=known_good_value['std_err'],
646 good_sample_size=len(known_good_value['values']),
647 bad_rev=bad_revision,
648 bad_mean=known_bad_value['mean'],
649 bad_std_err=known_bad_value['std_err'],
650 bad_sample_size=len(known_bad_value['values']))
651 return error
652
592 class DepotDirectoryRegistry(object): 653 class DepotDirectoryRegistry(object):
593 654
594 def __init__(self, src_cwd): 655 def __init__(self, src_cwd):
595 self.depot_cwd = {} 656 self.depot_cwd = {}
596 for depot in bisect_utils.DEPOT_NAMES: 657 for depot in bisect_utils.DEPOT_NAMES:
597 # The working directory of each depot is just the path to the depot, but 658 # The working directory of each depot is just the path to the depot, but
598 # since we're already in 'src', we can skip that part. 659 # since we're already in 'src', we can skip that part.
599 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:] 660 path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:]
600 self.AddDepot(depot, os.path.join(src_cwd, path_in_src)) 661 self.AddDepot(depot, os.path.join(src_cwd, path_in_src))
601 662
(...skipping 1608 matching lines...) Expand 10 before | Expand all | Expand 10 after
2210 '\'good\' - \'bad\' range of revisions represent an ' 2271 '\'good\' - \'bad\' range of revisions represent an '
2211 'improvement (and not a regression).') 2272 'improvement (and not a regression).')
2212 return BisectResults(error=error) 2273 return BisectResults(error=error)
2213 print message, "Therefore we continue to bisect." 2274 print message, "Therefore we continue to bisect."
2214 2275
2215 bisect_state = BisectState(target_depot, revision_list) 2276 bisect_state = BisectState(target_depot, revision_list)
2216 revision_states = bisect_state.GetRevisionStates() 2277 revision_states = bisect_state.GetRevisionStates()
2217 2278
2218 min_revision = 0 2279 min_revision = 0
2219 max_revision = len(revision_states) - 1 2280 max_revision = len(revision_states) - 1
2281 # Check how likely it is that the good and bad results are different
2282 # beyond chance-induced variation.
2283 if not self.opts.debug_ignore_regression_confidence:
2284 error = _CheckRegressionConfidenceError(good_revision,
2285 bad_revision,
2286 known_good_value,
2287 known_bad_value)
2288 if error:
2289 return BisectResults(error=error)
2220 2290
2221 # Can just mark the good and bad revisions explicitly here since we 2291 # Can just mark the good and bad revisions explicitly here since we
2222 # already know the results. 2292 # already know the results.
2223 bad_revision_state = revision_states[min_revision] 2293 bad_revision_state = revision_states[min_revision]
2224 bad_revision_state.external = bad_results[2] 2294 bad_revision_state.external = bad_results[2]
2225 bad_revision_state.perf_time = bad_results[3] 2295 bad_revision_state.perf_time = bad_results[3]
2226 bad_revision_state.build_time = bad_results[4] 2296 bad_revision_state.build_time = bad_results[4]
2227 bad_revision_state.passed = False 2297 bad_revision_state.passed = False
2228 bad_revision_state.value = known_bad_value 2298 bad_revision_state.value = known_bad_value
2229 2299
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
2418 self.max_time_minutes = 20 2488 self.max_time_minutes = 20
2419 self.metric = None 2489 self.metric = None
2420 self.command = None 2490 self.command = None
2421 self.output_buildbot_annotations = None 2491 self.output_buildbot_annotations = None
2422 self.no_custom_deps = False 2492 self.no_custom_deps = False
2423 self.working_directory = None 2493 self.working_directory = None
2424 self.extra_src = None 2494 self.extra_src = None
2425 self.debug_ignore_build = None 2495 self.debug_ignore_build = None
2426 self.debug_ignore_sync = None 2496 self.debug_ignore_sync = None
2427 self.debug_ignore_perf_test = None 2497 self.debug_ignore_perf_test = None
2498 self.debug_ignore_regression_confidence = None
2428 self.debug_fake_first_test_mean = 0 2499 self.debug_fake_first_test_mean = 0
2429 self.gs_bucket = None 2500 self.gs_bucket = None
2430 self.target_arch = 'ia32' 2501 self.target_arch = 'ia32'
2431 self.target_build_type = 'Release' 2502 self.target_build_type = 'Release'
2432 self.builder_host = None 2503 self.builder_host = None
2433 self.builder_port = None 2504 self.builder_port = None
2434 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN 2505 self.bisect_mode = bisect_utils.BISECT_MODE_MEAN
2435 self.improvement_direction = 0 2506 self.improvement_direction = 0
2436 2507
2437 @staticmethod 2508 @staticmethod
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
2586 group = optparse.OptionGroup(parser, 'Debug options') 2657 group = optparse.OptionGroup(parser, 'Debug options')
2587 group.add_option('--debug_ignore_build', 2658 group.add_option('--debug_ignore_build',
2588 action='store_true', 2659 action='store_true',
2589 help='DEBUG: Don\'t perform builds.') 2660 help='DEBUG: Don\'t perform builds.')
2590 group.add_option('--debug_ignore_sync', 2661 group.add_option('--debug_ignore_sync',
2591 action='store_true', 2662 action='store_true',
2592 help='DEBUG: Don\'t perform syncs.') 2663 help='DEBUG: Don\'t perform syncs.')
2593 group.add_option('--debug_ignore_perf_test', 2664 group.add_option('--debug_ignore_perf_test',
2594 action='store_true', 2665 action='store_true',
2595 help='DEBUG: Don\'t perform performance tests.') 2666 help='DEBUG: Don\'t perform performance tests.')
2667 group.add_option('--debug_ignore_regression_confidence',
2668 action='store_true',
2669 help='DEBUG: Don\'t score the confidence of the initial '
2670 'good and bad revisions\' test results.')
2596 group.add_option('--debug_fake_first_test_mean', 2671 group.add_option('--debug_fake_first_test_mean',
2597 type='int', 2672 type='int',
2598 default='0', 2673 default='0',
2599 help=('DEBUG: When faking performance tests, return this ' 2674 help=('DEBUG: When faking performance tests, return this '
2600 'value as the mean of the first performance test, ' 2675 'value as the mean of the first performance test, '
2601 'and return a mean of 0.0 for further tests.')) 2676 'and return a mean of 0.0 for further tests.'))
2602 parser.add_option_group(group) 2677 parser.add_option_group(group)
2603 return parser 2678 return parser
2604 2679
2605 def ParseCommandLine(self): 2680 def ParseCommandLine(self):
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
2747 # bugs. If you change this, please update the perf dashboard as well. 2822 # bugs. If you change this, please update the perf dashboard as well.
2748 bisect_utils.OutputAnnotationStepStart('Results') 2823 bisect_utils.OutputAnnotationStepStart('Results')
2749 print 'Error: %s' % e.message 2824 print 'Error: %s' % e.message
2750 if opts.output_buildbot_annotations: 2825 if opts.output_buildbot_annotations:
2751 bisect_utils.OutputAnnotationStepClosed() 2826 bisect_utils.OutputAnnotationStepClosed()
2752 return 1 2827 return 1
2753 2828
2754 2829
2755 if __name__ == '__main__': 2830 if __name__ == '__main__':
2756 sys.exit(main()) 2831 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698