tools/auto_bisect/bisect_perf_regression.py - Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists

Unified Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebasing after significant refactoring. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/auto_bisect/bisect_perf_regression.py

diff --git a/tools/auto_bisect/bisect_perf_regression.py b/tools/auto_bisect/bisect_perf_regression.py

index f90074f4d7533adda3937ef5e8740afa4898efef..becd04dde7281df5a302a74826d028454cb37f21 100755

--- a/tools/auto_bisect/bisect_perf_regression.py

+++ b/tools/auto_bisect/bisect_perf_regression.py

@@ -75,6 +75,10 @@ MAX_MAC_BUILD_TIME = 14400

MAX_WIN_BUILD_TIME = 14400

MAX_LINUX_BUILD_TIME = 14400

+# The confidence percentage we require to consider the initial range a

+# regression based on the test results of the inital good and bad revisions.

+REGRESSION_CONFIDENCE = 95

# Patch template to add a new file, DEPS.sha under src folder.

# This file contains SHA1 value of the DEPS changes made while bisecting

# dependency repositories. This patch send along with DEPS patch to try server.

@@ -89,6 +93,23 @@ new file mode 100644

+%(deps_sha)s

"""

+REGRESSION_CONFIDENCE_ERROR_TEMPLATE = """

+We could not reproduce the regression with this test/metric/platform combination

+with enough confidence.

+Here are the results for the initial revision range:

+\'Good\' revision: {}

qyearsley 2014/10/23 00:38:39 Giving names to each of these template fields woul

RobertoCN 2014/10/23 19:51:45 Done.

+\tmean: {}

+\tstd.err.:{}

+\tsample size:{}

+\'Bad\' revision: {}

+\tmean: {}

+\tstd.err.:{}

+\tsample size:{}

+NOTE: There\'s still a chance that this is actually a regression, but you may

qyearsley 2014/10/23 00:38:39 Escaping single quotes isn't necessary insider a "

RobertoCN 2014/10/23 19:51:45 Done.

+ need to bisect a different platform."""

# Git branch name used to run bisect try jobs.

BISECT_TRYJOB_BRANCH = 'bisect-tryjob'

# Git master branch name.

@@ -2217,6 +2238,25 @@ class BisectPerformanceMetrics(object):

min_revision = 0

max_revision = len(revision_states) - 1

+ # Check how likely it is that the good and bad results are different

+ # beyond chance-induced variation.

+ if not self.opts.debug_ignore_regression_confidence:

+ # Adding good and bad values to a parameter list.

+ confidenceParams = []

+ for l in [known_bad_value['values'], known_good_value['values']]:

+ # Flatten if needed

+ if isinstance(l, list) and all([isinstance(x, list) for x in l]):

+ confidenceParams.append(sum(l, []))

+ else:

+ confidenceParams.append(l)

+ regression_confidence = BisectResults.ConfidenceScore(*confidenceParams)

+ if regression_confidence < REGRESSION_CONFIDENCE:

+ error = REGRESSION_CONFIDENCE_ERROR_TEMPLATE.format(

+ good_revision, known_good_value['mean'],

+ known_good_value['std_err'], len(known_good_value['values']),

+ bad_revision, known_bad_value['mean'],

+ known_bad_value['std_err'], len(known_bad_value['values']))

+ return BisectResults(error=error)

qyearsley 2014/10/23 00:38:38 Might be a good idea to extract everything under t

RobertoCN 2014/10/23 19:51:45 Done.

# Can just mark the good and bad revisions explicitly here since we

# already know the results.

@@ -2425,6 +2465,7 @@ class BisectOptions(object):

self.debug_ignore_build = None

self.debug_ignore_sync = None

self.debug_ignore_perf_test = None

+ self.debug_ignore_regression_confidence = None

self.debug_fake_first_test_mean = 0

self.gs_bucket = None

self.target_arch = 'ia32'

@@ -2593,6 +2634,10 @@ class BisectOptions(object):

group.add_option('--debug_ignore_perf_test',

action='store_true',

help='DEBUG: Don\'t perform performance tests.')

+ group.add_option('--debug_ignore_regression_confidence',

+ action='store_true',

+ help='DEBUG: Don\'t score the confidence of the initial '

+ 'good and bad revisions\' test results.')

group.add_option('--debug_fake_first_test_mean',

type='int',

default='0',

« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »