Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: tools/auto_bisect/bisect_results.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing comments Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/auto_bisect/bisect_perf_regression_test.py ('k') | tools/auto_bisect/bisect_results_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/auto_bisect/bisect_results.py
diff --git a/tools/auto_bisect/bisect_results.py b/tools/auto_bisect/bisect_results.py
index 9bcaeb683a0fc6d146d3de043afba0e2148c46a9..281afe61dd9131b549c0aa0fd5c6bdd3c7e47be7 100644
--- a/tools/auto_bisect/bisect_results.py
+++ b/tools/auto_bisect/bisect_results.py
@@ -109,7 +109,7 @@ class BisectResults(object):
return warnings
@staticmethod
- def ConfidenceScore(good_results_lists, bad_results_lists,
+ def ConfidenceScore(sample1, sample2,
accept_single_bad_or_good=False):
"""Calculates a confidence score.
@@ -119,8 +119,8 @@ class BisectResults(object):
Args:
- good_results_lists: A list of lists of "good" result numbers.
- bad_results_lists: A list of lists of "bad" result numbers.
+ sample1: A flat list of "good" result numbers.
+ sample2: A flat list of "bad" result numbers.
accept_single_bad_or_good: If True, computes confidence even if there is
just one bad or good revision, otherwise single good or bad revision
always returns 0.0 confidence. This flag will probably get away when
@@ -134,13 +134,9 @@ class BisectResults(object):
# classified good or bad; this isn't good enough evidence to make a
# decision. If an empty list was passed, that also implies zero confidence.
if not accept_single_bad_or_good:
- if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:
+ if len(sample1) <= 1 or len(sample2) <= 1:
return 0.0
- # Flatten the lists of results lists.
- sample1 = sum(good_results_lists, [])
- sample2 = sum(bad_results_lists, [])
-
# If there were only empty lists in either of the lists (this is unexpected
# and normally shouldn't happen), then we also want to return 0.
if not sample1 or not sample2:
@@ -171,7 +167,9 @@ class BisectResults(object):
if revision_state.value:
current_values = revision_state.value['values']
if previous_values:
- confidence = cls.ConfidenceScore(previous_values, [current_values],
+ confidence_params = (sum(previous_values, []),
+ sum([current_values], []))
+ confidence = cls.ConfidenceScore(*confidence_params,
accept_single_bad_or_good=True)
mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
mean_of_current_runs = math_utils.Mean(current_values)
@@ -253,7 +251,8 @@ class BisectResults(object):
# Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the
# overall graph is.
- confidence = cls.ConfidenceScore(working_means, broken_means)
+ confidence_params = (sum(working_means, []), sum(broken_means, []))
+ confidence = cls.ConfidenceScore(*confidence_params)
bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs
« no previous file with comments | « tools/auto_bisect/bisect_perf_regression_test.py ('k') | tools/auto_bisect/bisect_results_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698