Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(473)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 1610203003: Iteratively increase sample size for good/bad classification. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Making initial regression check have a timeout. Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/bisector.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/bisector.py b/scripts/slave/recipe_modules/auto_bisect/bisector.py
index e7677c13d0e15a503dbf7444676fb88bcd116f82..5e2ebe9acc10e093a6a0fc97eae0818bdc116e86 100644
--- a/scripts/slave/recipe_modules/auto_bisect/bisector.py
+++ b/scripts/slave/recipe_modules/auto_bisect/bisector.py
@@ -4,6 +4,7 @@
import json
import re
+import time
from . import bisect_results
from . import depot_config
@@ -42,6 +43,14 @@ VALID_RESULT_CODES = (
# to move *too* far from the original revision, we'll cap the search at 25%.
DEFAULT_SEARCH_RANGE_PERCENTAGE = 0.25
+# How long to re-test the initial good-bad range for until significant
+# difference is established.
+REGRESSION_CHECK_TIMEOUT = 6 * 60 * 60
dtu 2016/02/05 00:18:01 Is that six hours? Do we really have that much tim
+
+# Significance level to use for determining difference between revisions via
+# hypothesis testing.
+SIGNIFICANCE_LEVEL = 0.05
+
class Bisector(object):
"""This class abstracts an ongoing bisect (or n-sect) job."""
@@ -114,6 +123,38 @@ class Bisector(object):
if init_revisions:
self._expand_chromium_revision_range()
+ def significantly_different(
+ self, list_a, list_b,
+ significance_level=SIGNIFICANCE_LEVEL): # pragma: no cover
+ """Uses an external script to run hypothesis testing with scipy.
+
+ The reason why we need an external script is that scipy is not available to
+ the default python installed in all platforms. We instead rely on an
+ anaconda environment to provide those packages.
+
+ Args:
+ list_a, list_b: Two lists representing samples to be compared.
+ significance_level: Self-describing. As a decimal fraction.
+
+ Returns:
+ A boolean indicating whether the null hypothesis ~(that the lists are
+ samples from the same population) can be rejected at the specified
+ significance level.
+ """
+ step_result = self.api.m.python(
+ 'Checking sample difference',
+ self.api.resource('significantly_different.py'),
+ [json.dumps(list_a), json.dumps(list_b), str(significance_level)],
+ stdout=self.api.m.json.output())
+ results = step_result.stdout
+ if results is None:
+ assert self.dummy_builds
+ return True
+ significantly_different = results['significantly_different']
+ step_result.presentation.logs[str(significantly_different)] = [
+ 'See json.output for details']
+ return significantly_different
+
def config_step(self):
"""Yields a simple echo step that outputs the bisect config."""
api = self.api
@@ -412,7 +453,7 @@ class Bisector(object):
'an improvement rather than a regression, given the '
'expected direction of improvement.')
- def check_initial_confidence(self):
+ def check_initial_confidence(self): # pragma: no cover
"""Checks that the initial range presents a clear enough regression.
We calculate the confidence of the results of the given 'good'
@@ -428,41 +469,44 @@ class Bisector(object):
if self.required_initial_confidence is None:
return True # pragma: no cover
+ # TODO(robertocn): Remove all uses of "confidence".
if self.dummy_initial_confidence is not None:
self.initial_confidence = float(
self.dummy_initial_confidence)
+ if (float(self.initial_confidence) <
+ float(self.required_initial_confidence)):
+ self._set_insufficient_confidence_warning()
+ return False
+ return True
+
+ if self.dummy_builds:
+ dummy_result = self.good_rev.values != self.bad_rev.values
+ if not dummy_result:
+ self._set_insufficient_confidence_warning()
+ return dummy_result
dtu 2016/02/05 00:18:01 Seems like there's a bunch of technical debt relat
+
+ expiration_time = time.time() + REGRESSION_CHECK_TIMEOUT
+ while time.time() < expiration_time:
+ if len(self.good_rev.values) >= 5 and len(self.bad_rev.values) >= 5:
+ if self.significantly_different(self.good_rev.values,
+ self.bad_rev.values):
+ return True
+ min(self.good_rev, self.bad_rev, key=lambda x: len(x.values)).retest()
+ self._set_insufficient_confidence_warning()
+ return False
- else: # pragma: no cover
- if len(self.good_rev.values) < 5 or len(self.bad_rev.values) < 5:
- # If there are too few values, the confidence score is not a good way to
- # determine whether the regression is reproducible.
- # TODO(robertocn): Investigate a straightforward approach to deal with
- # these cases. Such as the mean of one group lying within the range of
- # the other.
- return True
- self.initial_confidence = (
- self.api.m.math_utils.confidence_score(
- self.good_rev.values,
- self.bad_rev.values))
- if (self.initial_confidence <
- self.required_initial_confidence): # pragma: no cover
- self._set_insufficient_confidence_warning(self.initial_confidence)
- return False
- return True
def get_exception(self):
raise NotImplementedError() # pragma: no cover
# TODO: should return an exception with the details of the failure.
def _set_insufficient_confidence_warning(
- self, actual_confidence): # pragma: no cover
+ self): # pragma: no cover
"""Adds a warning about the lack of initial regression confidence."""
self.failed_initial_confidence = True
self.surface_result('LO_INIT_CONF')
self.warnings.append(
- ('Bisect failed to reproduce the regression with enough confidence. '
- 'Needed {:.2f}%, got {:.2f}%.').format(
- self.required_initial_confidence, actual_confidence))
+ 'Bisect failed to reproduce the regression with enough confidence.')
def _results_debug_message(self):
"""Returns a string with values used to debug a bisect result."""

Powered by Google App Engine
This is Rietveld 408576698