| Index: tools/auto_bisect/bisect_perf_regression.py
|
| diff --git a/tools/auto_bisect/bisect_perf_regression.py b/tools/auto_bisect/bisect_perf_regression.py
|
| index 429624a1e1ab0126d55c3e43388cd2797712e55c..5167b1d3c1a1376fd295b22119de45bfb5a5d7f3 100755
|
| --- a/tools/auto_bisect/bisect_perf_regression.py
|
| +++ b/tools/auto_bisect/bisect_perf_regression.py
|
| @@ -76,9 +76,6 @@ BUILD_RESULT_SUCCEED = 0
|
| BUILD_RESULT_FAIL = 1
|
| BUILD_RESULT_SKIPPED = 2
|
|
|
| -# The confidence percentage we require to consider the initial range a
|
| -# regression based on the test results of the initial good and bad revisions.
|
| -REGRESSION_CONFIDENCE = 80
|
| # How many times to repeat the test on the last known good and first known bad
|
| # revisions in order to assess a more accurate confidence score in the
|
| # regression culprit.
|
| @@ -475,12 +472,15 @@ def _GenerateProfileIfNecessary(command_args):
|
| return True
|
|
|
|
|
| -def _IsRegressionReproduced(known_good_result, known_bad_result):
|
| +def _IsRegressionReproduced(known_good_result, known_bad_result,
|
| + required_initial_confidence):
|
| """Checks whether the regression was reproduced based on the initial values.
|
|
|
| Args:
|
| known_good_result: A dict with the keys "values", "mean" and "std_err".
|
| known_bad_result: Same as above.
|
| + required_initial_confidence: Minimum confidence score for the given
|
| + good and bad revisions to avoid early aborting.
|
|
|
| Returns:
|
| True if there is a clear change between the result values for the given
|
| @@ -492,12 +492,12 @@ def _IsRegressionReproduced(known_good_result, known_bad_result):
|
| return map(math_utils.Mean, values)
|
| return values
|
|
|
| - p_value = BisectResults.ConfidenceScore(
|
| + initial_confidence = BisectResults.ConfidenceScore(
|
| PossiblyFlatten(known_bad_result['values']),
|
| PossiblyFlatten(known_good_result['values']),
|
| accept_single_bad_or_good=True)
|
|
|
| - return p_value > REGRESSION_CONFIDENCE
|
| + return initial_confidence >= required_initial_confidence
|
|
|
|
|
| def _RegressionNotReproducedWarningMessage(
|
| @@ -2329,7 +2329,8 @@ class BisectPerformanceMetrics(object):
|
| # beyond chance-induced variation.
|
| if not (self.opts.debug_ignore_regression_confidence or
|
| self._IsBisectModeReturnCode()):
|
| - if not _IsRegressionReproduced(known_good_value, known_bad_value):
|
| + if not _IsRegressionReproduced(known_good_value, known_bad_value,
|
| + self.opts.required_initial_confidence):
|
| # If there is no significant difference between "good" and "bad"
|
| # revision results, then the "bad revision" is considered "good".
|
| # TODO(qyearsley): Remove this if it is not necessary.
|
| @@ -2582,6 +2583,7 @@ class BisectOptions(object):
|
| self.bisect_mode = bisect_utils.BISECT_MODE_MEAN
|
| self.improvement_direction = 0
|
| self.bug_id = ''
|
| + self.required_initial_confidence = 80.0
|
|
|
| @staticmethod
|
| def _AddBisectOptionsGroup(parser):
|
| @@ -2633,6 +2635,11 @@ class BisectOptions(object):
|
| 'If this number is given, bisect will attempt to ' +
|
| 'verify that the bug is not closed before '
|
| 'starting.')
|
| + group.add_argument('--required_initial_confidence', type=float,
|
| + default=80.0,
|
| + help='The required confidence score for the initial '
|
| + 'check to see whether there is a significant '
|
| + 'difference between given good and bad revisions.')
|
|
|
| @staticmethod
|
| def _AddBuildOptionsGroup(parser):
|
|
|