Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(676)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py

Issue 1702013004: Revert of Iteratively increase sample size for good/bad classification. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py b/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py
index d3b3092ecdb071cc6447976d96ffc01081f19beb..28b149303858a44f28daa847fb8f43f8fcbcf05e 100644
--- a/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py
+++ b/scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py
@@ -3,7 +3,6 @@
# found in the LICENSE file.
import json
-import math
import tempfile
import os
import uuid
@@ -13,9 +12,6 @@
if 'CACHE_TEST_RESULTS' in os.environ: # pragma: no cover
from . import test_results_cache
-# These relate to how to increase the number of repetitions during re-test
-MINIMUM_SAMPLE_SIZE = 5
-INCREASE_FACTOR = 1.5
class PerfRevisionState(revision_state.RevisionState):
"""Contains the state and results for one revision in a perf bisect job."""
@@ -25,10 +21,9 @@
self.values = []
self.mean_value = None
self.std_dev = None
- self.repeat_count = MINIMUM_SAMPLE_SIZE
self._test_config = None
- def _read_test_results(self, check_revision_goodness=True):
+ def _read_test_results(self):
"""Gets the test results from GS and checks if the rev is good or bad."""
test_results = self._get_test_results()
# Results will contain the keys 'results' and 'output' where output is the
@@ -41,7 +36,7 @@
if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover
self.bisector.surface_result('MISSING_METRIC')
return
- self.values += results['values']
+ self.values = results['values']
if self.bisector.is_return_code_mode():
retcodes = test_results['retcodes']
overall_return_code = 0 if all(v == 0 for v in retcodes) else 1
@@ -55,13 +50,8 @@
self.status = PerfRevisionState.FAILED
self.bisector.surface_result('MISSING_METRIC')
return
- # If we have already decided on the goodness of this revision, we shouldn't
- # recheck it.
- if self.good or self.bad:
- check_revision_goodness = False
# We cannot test the goodness of the initial rev range.
- if (self.bisector.good_rev != self and self.bisector.bad_rev != self and
- check_revision_goodness):
+ if self.bisector.good_rev != self and self.bisector.bad_rev != self:
if self._check_revision_good():
self.good = True
else:
@@ -123,13 +113,14 @@
required_test_properties = {
'truncate_percent',
'metric',
+ 'max_time_minutes',
'command',
+ 'repeat_count',
'test_type'
}
for k, v in self.bisector.bisect_config.iteritems():
if k in required_test_properties:
result[k] = v
- result['repeat_count'] = self.repeat_count
self._test_config = result
return result
@@ -164,10 +155,8 @@
self.test_results_url = (self.bisector.api.GS_RESULTS_URL +
self.job_name + '.results')
if api.m.bisect_tester.local_test_enabled(): # pragma: no cover
- skip_download = self.bisector.last_tested_revision == self
- self.bisector.last_tested_revision = self
overrides = perf_test_properties['properties']
- api.run_local_test_run(api.m, overrides, skip_download=skip_download)
+ api.run_local_test_run(api.m, overrides)
else:
step_name = 'Triggering test job for ' + str(self.revision_string)
api.m.trigger(perf_test_properties, name=step_name)
@@ -202,15 +191,6 @@
'job_name': self.job_name,
}
- def retest(self): # pragma: no cover
- # We need at least 5 samples for applying Mann-Whitney U test
- # with P < 0.01, two-tailed .
- target_sample_size = max(5, math.ceil(len(self.values) * 1.5))
- self.status = PerfRevisionState.NEED_MORE_DATA
- self.repeat_count = target_sample_size - len(self.values)
- self.start_job()
- self.bisector.wait_for_any([self])
-
def _get_test_results(self):
"""Tries to get the results of a test job from cloud storage."""
api = self.bisector.api
@@ -228,62 +208,20 @@
def _check_revision_good(self):
"""Determines if a revision is good or bad.
- Iteratively increment the sample size of the revision being tested, the last
- known good revision, and the first known bad revision until a relationship
- of significant difference can be established betweeb the results of the
- revision being tested and one of the other two.
-
- If the results do not converge towards finding a significant difference in
- either direction, this is expected to timeout eventually. This scenario
- should be rather rare, since it is expected that the fkbr and lkgr are
- significantly different as a precondition.
+ Note that our current approach is to determine whether it is closer to
+ either the 'good' and 'bad' revisions given for the bisect job.
Returns:
- True if the results of testing this revision are significantly different
- from those of testing the earliest known bad revision.
- False if they are instead significantly different form those of testing
- the latest knwon good revision.
+ True if this revision is closer to the initial good revision's value than
+ to the initial bad revision's value. False otherwise.
"""
-
- while True:
- diff_from_good = self.bisector.significantly_different(
- self.bisector.lkgr.values, self.values)
- diff_from_bad = self.bisector.significantly_different(
- self.bisector.fkbr.values, self.values)
-
- if diff_from_good and diff_from_bad:
- # Multiple regressions.
- # For now, proceed bisecting the biggest difference of the means.
- dist_from_good = abs(self.mean_value - self.bisector.lkgr.mean_value)
- dist_from_bad = abs(self.mean_value - self.bisector.fkbr.mean_value)
- if dist_from_good > dist_from_bad:
- # TODO(robertocn): Add way to handle the secondary regression
- #self.bisector.handle_secondary_regression(self, self.bisector.fkbr)
- return False
- else:
- #self.bisector.handle_secondary_regression(self.bisector.lkgr, self)
- return True
-
- if diff_from_good or diff_from_bad: # pragma: no cover
- return diff_from_bad
-
- self._next_retest() # pragma: no cover
-
-
- def _next_retest(self): # pragma: no cover
- """Chooses one of current, lkgr, fkbr to retest.
-
- Look for the smallest sample and retest that. If the last tested revision
- is tied for the smallest sample, use that to take advantage of the fact
- that it is already downloaded and unzipped.
- """
- next_revision_to_test = min(self.bisector.lkgr, self, self.bisector.fkbr,
- key=lambda x: len(x.values))
- if (len(self.bisector.last_tested_revision.values) ==
- next_revision_to_test.values):
- self.bisector.last_tested_revision.retest()
- else:
- next_revision_to_test.retest()
+ # TODO: Reevaluate this approach
+ bisector = self.bisector
+ distance_to_good = abs(self.mean_value - bisector.good_rev.mean_value)
+ distance_to_bad = abs(self.mean_value - bisector.bad_rev.mean_value)
+ if distance_to_good < distance_to_bad:
+ return True
+ return False
def __repr__(self):
return ('PerfRevisionState(cp=%s, values=%r, mean_value=%r, std_dev=%r)' %

Powered by Google App Engine
This is Rietveld 408576698