tools/auto_bisect/bisect_results.py - Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists

Unified Diff: tools/auto_bisect/bisect_results.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressing comments Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/auto_bisect/bisect_results.py

diff --git a/tools/auto_bisect/bisect_results.py b/tools/auto_bisect/bisect_results.py

index 9bcaeb683a0fc6d146d3de043afba0e2148c46a9..281afe61dd9131b549c0aa0fd5c6bdd3c7e47be7 100644

--- a/tools/auto_bisect/bisect_results.py

+++ b/tools/auto_bisect/bisect_results.py

@@ -109,7 +109,7 @@ class BisectResults(object):

return warnings

@staticmethod

- def ConfidenceScore(good_results_lists, bad_results_lists,

+ def ConfidenceScore(sample1, sample2,

accept_single_bad_or_good=False):

"""Calculates a confidence score.

@@ -119,8 +119,8 @@ class BisectResults(object):

Args:

- good_results_lists: A list of lists of "good" result numbers.

- bad_results_lists: A list of lists of "bad" result numbers.

+ sample1: A flat list of "good" result numbers.

+ sample2: A flat list of "bad" result numbers.

accept_single_bad_or_good: If True, computes confidence even if there is

just one bad or good revision, otherwise single good or bad revision

always returns 0.0 confidence. This flag will probably get away when

@@ -134,13 +134,9 @@ class BisectResults(object):

# classified good or bad; this isn't good enough evidence to make a

# decision. If an empty list was passed, that also implies zero confidence.

if not accept_single_bad_or_good:

- if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:

+ if len(sample1) <= 1 or len(sample2) <= 1:

return 0.0

- # Flatten the lists of results lists.

- sample1 = sum(good_results_lists, [])

- sample2 = sum(bad_results_lists, [])

# If there were only empty lists in either of the lists (this is unexpected

# and normally shouldn't happen), then we also want to return 0.

if not sample1 or not sample2:

@@ -171,7 +167,9 @@ class BisectResults(object):

if revision_state.value:

current_values = revision_state.value['values']

if previous_values:

- confidence = cls.ConfidenceScore(previous_values, [current_values],

+ confidence_params = (sum(previous_values, []),

+ sum([current_values], []))

+ confidence = cls.ConfidenceScore(*confidence_params,

accept_single_bad_or_good=True)

mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))

mean_of_current_runs = math_utils.Mean(current_values)

@@ -253,7 +251,8 @@ class BisectResults(object):

# Give a "confidence" in the bisect. At the moment we use how distinct the

# values are before and after the last broken revision, and how noisy the

# overall graph is.

- confidence = cls.ConfidenceScore(working_means, broken_means)

+ confidence_params = (sum(working_means, []), sum(broken_means, []))

+ confidence = cls.ConfidenceScore(*confidence_params)

bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs

« no previous file with comments | « tools/auto_bisect/bisect_perf_regression_test.py ('k') | tools/auto_bisect/bisect_results_test.py » ('j') | no next file with comments »