Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 644323002: Requiring confidence in initial regression range before bisecting. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebasing. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/auto_bisect/bisect_perf_regression.py
diff --git a/tools/auto_bisect/bisect_perf_regression.py b/tools/auto_bisect/bisect_perf_regression.py
index e80fd43999e03584150e54fe4cd943c9b84485fb..01b4fabc46dcaf47ad6d3d380b40a05d19c3b22d 100755
--- a/tools/auto_bisect/bisect_perf_regression.py
+++ b/tools/auto_bisect/bisect_perf_regression.py
@@ -50,6 +50,7 @@ sys.path.append(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'telemetry'))
from bisect_results import BisectResults
+from bisect_results import ConfidenceScore
import bisect_utils
import builder
import math_utils
@@ -169,6 +170,9 @@ MAX_LINUX_BUILD_TIME = 14400
# The percentage at which confidence is considered high.
HIGH_CONFIDENCE = 95
+# The confidence percentage we require to consider the initial range a
+# regression based on the test results of the inital good and bad revisions.
+REGRESSION_CONFIDENCE = 95
# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
@@ -2471,6 +2475,19 @@ class BisectPerformanceMetrics(object):
return results
print message, "Therefore we continue to bisect."
+ # Check how likely it is that the good and bad results are different
+ # beyond chance-induced variation.
+ if not self.opts.debug_ignore_regression_confidence:
+ regression_confidence = ConfidenceScore(known_bad_value['values'],
+ known_good_value['values'])
+ if regression_confidence < REGRESSION_CONFIDENCE:
+ results.error = ('We could not reproduce the regression with this '
+ 'test/metric/platform combination with enough '
+ 'confidence. There\'s still a chance that this is '
+ 'actually a regression, but you may need to bisect '
+ 'a different platform.')
+ return results
+
# Can just mark the good and bad revisions explicitly here since we
# already know the results.
bad_revision_data = revision_data[revision_list[0]]
@@ -2968,6 +2985,7 @@ class BisectOptions(object):
self.debug_ignore_build = None
self.debug_ignore_sync = None
self.debug_ignore_perf_test = None
+ self.debug_ignore_regression_confidence = None
self.debug_fake_first_test_mean = 0
self.gs_bucket = None
self.target_arch = 'ia32'
@@ -3135,6 +3153,10 @@ class BisectOptions(object):
group.add_option('--debug_ignore_perf_test',
action='store_true',
help='DEBUG: Don\'t perform performance tests.')
+ group.add_option('--debug_ignore_regression_confidence',
+ action='store_true',
+ help='DEBUG: Don\'t score the confidence of the initial '
+ 'good and bad revisions\' test results.')
group.add_option('--debug_fake_first_test_mean',
type='int',
default='0',
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698