tools/bisect-perf-regression.py - Issue 463743002: Return 0 for confidence when there's only results for one "good" or one "bad" rev.

Unified Diff: tools/bisect-perf-regression.py

Issue 463743002: Return 0 for confidence when there's only results for one "good" or one "bad" rev. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/bisect-perf-regression.py

diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py

index 41b999a07078523d22e0b24596049b0be61055b3..363aa1b5ffb36b814e85d49d35a2e1b40176448a 100755

--- a/tools/bisect-perf-regression.py

+++ b/tools/bisect-perf-regression.py

@@ -171,6 +171,9 @@ MAX_MAC_BUILD_TIME = 14400

MAX_WIN_BUILD_TIME = 14400

MAX_LINUX_BUILD_TIME = 14400

+# The confidence percentage at which confidence can be consider "high".

+HIGH_CONFIDENCE = 95

# Patch template to add a new file, DEPS.sha under src folder.

# This file contains SHA1 value of the DEPS changes made while bisecting

# dependency repositories. This patch send along with DEPS patch to tryserver.

@@ -191,9 +194,9 @@ BISECT_MODE_MEAN = 'mean'

BISECT_MODE_STD_DEV = 'std_dev'

BISECT_MODE_RETURN_CODE = 'return_code'

-# The perf dashboard specifically looks for the string

-# "Estimated Confidence: 95%" to decide whether or not to cc the author(s).

-# If you change this, please update the perf dashboard as well.

+# The perf dashboard looks for a string like "Estimated Confidence: 95%"

+# to decide whether or not to cc the author(s). If you change this, please

+# update the perf dashboard as well.

RESULTS_BANNER = """

===== BISECT JOB RESULTS =====

Status: %(status)s

@@ -280,12 +283,18 @@ def ConfidenceScore(good_results_lists, bad_results_lists):

Returns:

A number in the range [0, 100].

"""

- if not good_results_lists or not bad_results_lists:

+ # If there's only one item in either list, this means only one revision was

+ # classified good or bad; this isn't good enough evidence to make a decision.

+ # If an empty list was passed, that also implies zero confidence.

+ if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:

return 0.0

# Flatten the lists of results lists.

sample1 = sum(good_results_lists, [])

sample2 = sum(bad_results_lists, [])

+ # If there were only empty lists in either of the lists (this is unexpected

+ # and normally shouldn't happen), then we also want to return 0.

if not sample1 or not sample2:

return 0.0

@@ -2889,7 +2898,7 @@ class BisectPerformanceMetrics(object):

if not results_dict['confidence']:

return None

confidence_status = 'Successful with %(level)s confidence%(warning)s.'

- if results_dict['confidence'] >= 95:

+ if results_dict['confidence'] >= HIGH_CONFIDENCE:

level = 'high'

else:

level = 'low'

@@ -3173,18 +3182,13 @@ class BisectPerformanceMetrics(object):

if self.opts.repeat_test_count == 1:

self.warnings.append('Tests were only set to run once. This may '

'be insufficient to get meaningful results.')

- if results_dict['confidence'] < 100:

- if results_dict['confidence']:

- self.warnings.append(

- 'Confidence is less than 100%. There could be other candidates '

- 'for this regression. Try bisecting again with increased '

- 'repeat_count or on a sub-metric that shows the regression more '

- 'clearly.')

- else:

- self.warnings.append(

- 'Confidence is 0%. Try bisecting again on another platform, with '

- 'increased repeat_count or on a sub-metric that shows the '

- 'regression more clearly.')

+ if 0 < results_dict['confidence'] < HIGH_CONFIDENCE:

+ self.warnings.append('Confidence is not high. Try bisecting again '

+ 'with increased repeat_count, larger range, or '

+ 'on another metric.')

+ if not results_dict['confidence']:

+ self.warnings.append('Confidence score is 0%. Try bisecting again on '

+ 'another platform or another metric.')

def FormatAndPrintResults(self, bisect_results):

"""Prints the results from a bisection run in a readable format.

« no previous file with comments | « no previous file | tools/bisect-perf-regression_test.py » ('j') | no next file with comments »