Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(352)

Unified Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebasing after significant refactoring. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/auto_bisect/bisect_perf_regression.py
diff --git a/tools/auto_bisect/bisect_perf_regression.py b/tools/auto_bisect/bisect_perf_regression.py
index f90074f4d7533adda3937ef5e8740afa4898efef..becd04dde7281df5a302a74826d028454cb37f21 100755
--- a/tools/auto_bisect/bisect_perf_regression.py
+++ b/tools/auto_bisect/bisect_perf_regression.py
@@ -75,6 +75,10 @@ MAX_MAC_BUILD_TIME = 14400
MAX_WIN_BUILD_TIME = 14400
MAX_LINUX_BUILD_TIME = 14400
+# The confidence percentage we require to consider the initial range a
+# regression based on the test results of the inital good and bad revisions.
+REGRESSION_CONFIDENCE = 95
+
# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
# dependency repositories. This patch send along with DEPS patch to try server.
@@ -89,6 +93,23 @@ new file mode 100644
+%(deps_sha)s
"""
+REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """
+We could not reproduce the regression with this test/metric/platform combination
+with enough confidence.
+
+Here are the results for the initial revision range:
+\'Good\' revision: {}
qyearsley 2014/10/23 00:38:39 Giving names to each of these template fields woul
RobertoCN 2014/10/23 19:51:45 Done.
+\tmean: {}
+\tstd.err.:{}
+\tsample size:{}
+\'Bad\' revision: {}
+\tmean: {}
+\tstd.err.:{}
+\tsample size:{}
+
+NOTE: There\'s still a chance that this is actually a regression, but you may
qyearsley 2014/10/23 00:38:39 Escaping single quotes isn't necessary insider a "
RobertoCN 2014/10/23 19:51:45 Done.
+ need to bisect a different platform."""
+
# Git branch name used to run bisect try jobs.
BISECT_TRYJOB_BRANCH = 'bisect-tryjob'
# Git master branch name.
@@ -2217,6 +2238,25 @@ class BisectPerformanceMetrics(object):
min_revision = 0
max_revision = len(revision_states) - 1
+ # Check how likely it is that the good and bad results are different
+ # beyond chance-induced variation.
+ if not self.opts.debug_ignore_regression_confidence:
+ # Adding good and bad values to a parameter list.
+ confidenceParams = []
+ for l in [known_bad_value['values'], known_good_value['values']]:
+ # Flatten if needed
+ if isinstance(l, list) and all([isinstance(x, list) for x in l]):
+ confidenceParams.append(sum(l, []))
+ else:
+ confidenceParams.append(l)
+ regression_confidence = BisectResults.ConfidenceScore(*confidenceParams)
+ if regression_confidence < REGRESSION_CONFIDENCE:
+ error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format(
+ good_revision, known_good_value['mean'],
+ known_good_value['std_err'], len(known_good_value['values']),
+ bad_revision, known_bad_value['mean'],
+ known_bad_value['std_err'], len(known_bad_value['values']))
+ return BisectResults(error=error)
qyearsley 2014/10/23 00:38:38 Might be a good idea to extract everything under t
RobertoCN 2014/10/23 19:51:45 Done.
# Can just mark the good and bad revisions explicitly here since we
# already know the results.
@@ -2425,6 +2465,7 @@ class BisectOptions(object):
self.debug_ignore_build = None
self.debug_ignore_sync = None
self.debug_ignore_perf_test = None
+ self.debug_ignore_regression_confidence = None
self.debug_fake_first_test_mean = 0
self.gs_bucket = None
self.target_arch = 'ia32'
@@ -2593,6 +2634,10 @@ class BisectOptions(object):
group.add_option('--debug_ignore_perf_test',
action='store_true',
help='DEBUG: Don\'t perform performance tests.')
+ group.add_option('--debug_ignore_regression_confidence',
+ action='store_true',
+ help='DEBUG: Don\'t score the confidence of the initial '
+ 'good and bad revisions\' test results.')
group.add_option('--debug_fake_first_test_mean',
type='int',
default='0',
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698