Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(287)

Unified Diff: tools/bisect-perf-regression.py

Issue 463743002: Return 0 for confidence when there's only results for one "good" or one "bad" rev. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/bisect-perf-regression_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/bisect-perf-regression.py
diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py
index 41b999a07078523d22e0b24596049b0be61055b3..363aa1b5ffb36b814e85d49d35a2e1b40176448a 100755
--- a/tools/bisect-perf-regression.py
+++ b/tools/bisect-perf-regression.py
@@ -171,6 +171,9 @@ MAX_MAC_BUILD_TIME = 14400
MAX_WIN_BUILD_TIME = 14400
MAX_LINUX_BUILD_TIME = 14400
+# The confidence percentage at which confidence can be consider "high".
+HIGH_CONFIDENCE = 95
+
# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
# dependency repositories. This patch send along with DEPS patch to tryserver.
@@ -191,9 +194,9 @@ BISECT_MODE_MEAN = 'mean'
BISECT_MODE_STD_DEV = 'std_dev'
BISECT_MODE_RETURN_CODE = 'return_code'
-# The perf dashboard specifically looks for the string
-# "Estimated Confidence: 95%" to decide whether or not to cc the author(s).
-# If you change this, please update the perf dashboard as well.
+# The perf dashboard looks for a string like "Estimated Confidence: 95%"
+# to decide whether or not to cc the author(s). If you change this, please
+# update the perf dashboard as well.
RESULTS_BANNER = """
===== BISECT JOB RESULTS =====
Status: %(status)s
@@ -280,12 +283,18 @@ def ConfidenceScore(good_results_lists, bad_results_lists):
Returns:
A number in the range [0, 100].
"""
- if not good_results_lists or not bad_results_lists:
+ # If there's only one item in either list, this means only one revision was
+ # classified good or bad; this isn't good enough evidence to make a decision.
+ # If an empty list was passed, that also implies zero confidence.
+ if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:
return 0.0
# Flatten the lists of results lists.
sample1 = sum(good_results_lists, [])
sample2 = sum(bad_results_lists, [])
+
+ # If there were only empty lists in either of the lists (this is unexpected
+ # and normally shouldn't happen), then we also want to return 0.
if not sample1 or not sample2:
return 0.0
@@ -2889,7 +2898,7 @@ class BisectPerformanceMetrics(object):
if not results_dict['confidence']:
return None
confidence_status = 'Successful with %(level)s confidence%(warning)s.'
- if results_dict['confidence'] >= 95:
+ if results_dict['confidence'] >= HIGH_CONFIDENCE:
level = 'high'
else:
level = 'low'
@@ -3173,18 +3182,13 @@ class BisectPerformanceMetrics(object):
if self.opts.repeat_test_count == 1:
self.warnings.append('Tests were only set to run once. This may '
'be insufficient to get meaningful results.')
- if results_dict['confidence'] < 100:
- if results_dict['confidence']:
- self.warnings.append(
- 'Confidence is less than 100%. There could be other candidates '
- 'for this regression. Try bisecting again with increased '
- 'repeat_count or on a sub-metric that shows the regression more '
- 'clearly.')
- else:
- self.warnings.append(
- 'Confidence is 0%. Try bisecting again on another platform, with '
- 'increased repeat_count or on a sub-metric that shows the '
- 'regression more clearly.')
+ if 0 < results_dict['confidence'] < HIGH_CONFIDENCE:
+ self.warnings.append('Confidence is not high. Try bisecting again '
+ 'with increased repeat_count, larger range, or '
+ 'on another metric.')
+ if not results_dict['confidence']:
+ self.warnings.append('Confidence score is 0%. Try bisecting again on '
+ 'another platform or another metric.')
def FormatAndPrintResults(self, bisect_results):
"""Prints the results from a bisection run in a readable format.
« no previous file with comments | « no previous file | tools/bisect-perf-regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698