Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2366)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/revision_state.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Addressing all early feedback. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/revision_state.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/revision_state.py b/scripts/slave/recipe_modules/auto_bisect/revision_state.py
index 4e0644bc4e14f7b9ef344d2258b9cfc6353f2b4d..fbd02936b17d4a00cbb58d531db0b78b0c4bd5be 100644
--- a/scripts/slave/recipe_modules/auto_bisect/revision_state.py
+++ b/scripts/slave/recipe_modules/auto_bisect/revision_state.py
@@ -10,6 +10,7 @@ class so that the bisect module and recipe can use it.
See perf_revision_state for an example.
"""
+import collections
import hashlib
import json
import math
@@ -18,31 +19,18 @@ import tempfile
import re
import uuid
-from . import depot_config
+from auto_bisect import depot_config
+from auto_bisect import bisect_exceptions
# These relate to how to increase the number of repetitions during re-test
MINIMUM_SAMPLE_SIZE = 5
INCREASE_FACTOR = 1.5
+NOT_SIGNIFICANTLY_DIFFERENT, SIGNIFICANTLY_DIFFERENT, NEED_MORE_DATA = range(3)
dtu 2016/09/13 23:57:27 I think Nat or Tony? said that whenever you're goi
RobertoCN 2016/09/21 22:22:48 Done.
+
class RevisionState(object):
"""Abstracts the state of a single revision on a bisect job."""
- # Possible values for the status attribute of RevisionState:
- (
- NEW, # A revision_state object that has just been initialized.
- BUILDING, # Requested a build for this revision, waiting for it.
- TESTING, # A test job for this revision was triggered, waiting for it.
- TESTED, # The test job completed with non-failing results.
- FAILED, # Either the build or the test jobs failed or timed out.
- ABORTED, # The build or test job was aborted. (For use in multi-secting).
- SKIPPED, # A revision that was not built or tested for a special reason,
- # such as those ranges that we know are broken, or when nudging
- # revisions.
- NEED_MORE_DATA, # Current number of test values is too small to establish
- # a statistically significant difference between this
- # revision and the revisions known to be good and bad.
- ) = xrange(8)
-
def __init__(self, bisector, commit_hash, depot_name=None,
base_revision=None):
"""Creates a new instance to track the state of a revision.
@@ -58,10 +46,10 @@ class RevisionState(object):
super(RevisionState, self).__init__()
self.bisector = bisector
self._good = None
+ self.failed = False
self.deps = None
self.test_results_url = None
self.build_archived = False
- self.status = RevisionState.NEW
self.next_revision = None
self.previous_revision = None
self.job_name = None
@@ -75,23 +63,18 @@ class RevisionState(object):
self.revision_overrides = {}
self.build_id = None
if self.base_revision:
- assert self.base_revision.deps_file_contents
self.needs_patch = True
self.revision_overrides[self.depot['src']] = self.commit_hash
- self.deps_patch, self.deps_file_contents = self.bisector.make_deps_patch(
- self.base_revision, self.base_revision.deps_file_contents,
- self.depot, self.commit_hash)
- self.deps_sha = hashlib.sha1(self.deps_patch).hexdigest()
- self.deps_sha_patch = self.bisector.make_deps_sha_file(self.deps_sha)
+ self.deps_sha = hashlib.sha1(self.revision_string()).hexdigest()
self.deps = dict(base_revision.deps)
self.deps[self.depot_name] = self.commit_hash
else:
self.needs_patch = False
self.build_url = self.bisector.get_platform_gs_prefix() + self._gs_suffix()
- self.values = []
- self.mean_value = None
- self.overall_return_code = None
- self.std_dev = None
+ self.valueset_paths = []
+ self.chartjson_paths = []
+ self.debug_values = []
+ self.return_codes = []
self._test_config = None
if self.bisector.test_type == 'perf':
@@ -101,23 +84,6 @@ class RevisionState(object):
'repeat_count', MINIMUM_SAMPLE_SIZE)
@property
- def tested(self):
- return self.status in (RevisionState.TESTED,)
-
- @property
- def in_progress(self):
- return self.status in (RevisionState.BUILDING, RevisionState.TESTING,
- RevisionState.NEED_MORE_DATA)
-
- @property
- def failed(self):
- return self.status == RevisionState.FAILED
-
- @property
- def aborted(self):
- return self.status == RevisionState.ABORTED
-
- @property
def good(self):
return self._good == True
@@ -133,18 +99,51 @@ class RevisionState(object):
def bad(self, value):
self._good = not value
+ @property
+ def test_run_count(self):
+ return max(
+ len(self.valueset_paths),
+ len(self.chartjson_paths),
+ len(self.return_codes))
+
+ @property
+ def mean(self):
+ if self.debug_values:
+ return float(sum(self.debug_values))/len(self.debug_values)
+
+ @property
+ def std_dev(self):
+ if self.debug_values:
+ mn = self.mean
+ return math.sqrt(sum(pow(x - mn, 2) for x in self.debug_values))
+
def start_job(self):
- """Starts a build, or a test job if the build is available."""
- if self.status == RevisionState.NEW and not self._is_build_archived():
- self._request_build()
- self.status = RevisionState.BUILDING
- return
-
- if self._is_build_archived() and self.status in (
- RevisionState.NEW, RevisionState.BUILDING,
- RevisionState.NEED_MORE_DATA):
+ api = self.bisector.api
+ try:
+ if not self._is_build_archived():
+ self._request_build()
+ with api.m.step.nest('Waiting for build'):
+ while not self._is_build_archived():
+ api.m.python.inline(
+ 'sleeping',
+ """
+ import sys
+ import time
+ time.sleep(20*60)
+ sys.exit(0)
+ """)
+ if self._is_build_failed():
+ self.failed = True
+ return
+
self._do_test()
- self.status = RevisionState.TESTING
+ # TODO(robertocn): Add a test to remove this CL
+ while not self._check_revision_good(): # pragma: no cover
+ min(self, self.bisector.lkgr, self.bisector.fkbr,
+ key=lambda(x): x.test_run_count)._do_test()
+
+ except bisect_exceptions.UntestableRevisionException:
+ self.failed = True
def deps_change(self):
"""Uses `git show` to see if a given commit contains a DEPS change."""
@@ -159,9 +158,9 @@ class RevisionState(object):
name = 'Checking DEPS for ' + self.commit_hash
step_result = api.m.git(
'show', '--name-only', '--pretty=format:',
- self.commit_hash, cwd=cwd, stdout=api.m.raw_io.output(), name=name)
- if self.bisector.dummy_builds and not self.commit_hash.startswith('dcdc'):
- return False
+ self.commit_hash, cwd=cwd, stdout=api.m.raw_io.output(), name=name,
+ step_test_data=lambda: api._test_data['deps_change'][self.commit_hash]
+ )
if 'DEPS' in step_result.stdout.splitlines(): # pragma: no cover
return True
return False # pragma: no cover
@@ -189,17 +188,15 @@ class RevisionState(object):
def read_deps(self, recipe_tester_name):
"""Sets the dependencies for this revision from the contents of DEPS."""
api = self.bisector.api
- if self.deps:
- return
if self.bisector.internal_bisect: # pragma: no cover
- self.deps_file_contents = self._read_content(
+ deps_file_contents = self._read_content(
depot_config.DEPOT_DEPS_NAME[self.depot_name]['url'],
depot_config.DEPOT_DEPS_NAME[self.depot_name]['deps_file'],
self.commit_hash)
# On April 5th, 2016 .DEPS.git was changed to DEPS on android-chrome repo,
# we are doing this in order to support both deps files.
- if not self.deps_file_contents:
- self.deps_file_contents = self._read_content(
+ if not deps_file_contents:
+ deps_file_contents = self._read_content(
depot_config.DEPOT_DEPS_NAME[self.depot_name]['url'],
depot_config.DEPS_FILENAME,
self.commit_hash)
@@ -208,11 +205,13 @@ class RevisionState(object):
'fetch file %s:%s' % (self.commit_hash, depot_config.DEPS_FILENAME),
api.resource('fetch_file.py'),
[depot_config.DEPS_FILENAME, '--commit', self.commit_hash],
- stdout=api.m.raw_io.output())
- self.deps_file_contents = step_result.stdout
+ stdout=api.m.raw_io.output(),
+ step_test_data=lambda: api._test_data['deps'][self.commit_hash]
+ )
+ deps_file_contents = step_result.stdout
try:
deps_data = self._gen_deps_local_scope()
- exec(self.deps_file_contents or 'deps = {}', {}, deps_data)
+ exec (deps_file_contents or 'deps = {}') in {}, deps_data
deps_data = deps_data['deps']
except ImportError: # pragma: no cover
# TODO(robertocn): Implement manual parsing of DEPS when exec fails.
@@ -243,40 +242,16 @@ class RevisionState(object):
self.deps = results
return
- def update_status(self):
- """Checks on the pending jobs and updates status accordingly.
-
- This method will check for the build to complete and then trigger the test,
- or will wait for the test as appropriate.
-
- To wait for the test we try to get the buildbot job url from GS, and if
- available, we query the status of such job.
- """
- if self.status == RevisionState.BUILDING:
- if self._is_build_archived():
- self.start_job()
- elif self._is_build_failed(): # pragma: no cover
- self.status = RevisionState.FAILED
- elif (self.status in (RevisionState.TESTING, RevisionState.NEED_MORE_DATA)
- and self._results_available()):
- # If we have already decided whether the revision is good or bad we
- # shouldn't check again
- check_revision_goodness = not(self.good or self.bad)
- self._read_test_results(
- check_revision_goodness=check_revision_goodness)
- # We assume _read_test_results may have changed the status to a broken
- # state such as FAILED or ABORTED.
- if self.status in (RevisionState.TESTING, RevisionState.NEED_MORE_DATA):
- self.status = RevisionState.TESTED
-
def _is_build_archived(self):
"""Checks if the revision is already built and archived."""
if not self.build_archived:
api = self.bisector.api
- self.build_archived = api.gsutil_file_exists(self.build_url)
-
- if self.bisector.dummy_builds:
- self.build_archived = self.in_progress
+ self.build_archived = api.gsutil_file_exists(
+ self.build_url,
+ step_test_data=lambda: api._test_data.get(
+ 'gsutil_exists', {}).get(self.commit_hash).pop()
+ if api._test_data.get('gsutil_exists', {}).get(self.commit_hash)
+ else collections.namedtuple('retcode_attr', ['retcode'])(0))
dtu 2016/09/13 23:57:27 Maybe this is: api._test_data.get('gsutil_exists'
RobertoCN 2016/09/21 22:22:48 That wouldn't work because after popping the last
return self.build_archived
@@ -285,20 +260,11 @@ class RevisionState(object):
result = api.m.buildbucket.get_build(
self.build_id,
api.m.service_account.get_json_path(api.SERVICE_ACCOUNT),
- step_test_data=lambda: api.m.json.test_api.output_stream(
- {'build': {'result': 'SUCCESS', 'status': 'COMPLETED'}}
- ))
+ step_test_data=lambda: api.test_api.buildbot_job_status_mock(
+ api._test_data.get('build_status', {}).get(self.commit_hash, [])))
return (result.stdout['build']['status'] == 'COMPLETED' and
result.stdout['build'].get('result') != 'SUCCESS')
- def _results_available(self):
- """Checks if the results for the test job have been uploaded."""
- api = self.bisector.api
- result = api.gsutil_file_exists(self.test_results_url)
- if self.bisector.dummy_builds:
- return self.in_progress
- return result # pragma: no cover
-
def _gs_suffix(self):
"""Provides the expected right half of the build filename.
@@ -312,58 +278,23 @@ class RevisionState(object):
name_parts.append(self.deps_sha)
return '%s.zip' % '_'.join(name_parts)
- def _read_test_results(self, check_revision_goodness=True):
- """Gets the test results from GS and checks if the rev is good or bad."""
- test_results = self._get_test_results()
- # Results will contain the keys 'results' and 'output' where output is the
- # stdout of the command, and 'results' is itself a dict with the key
- # 'values' unless the test failed, in which case 'results' will contain
- # the 'error' key explaining the type of error.
- results = test_results['results']
- if results.get('errors'):
- self.status = RevisionState.FAILED
- if 'MISSING_METRIC' in results.get('errors'): # pragma: no cover
+ def _read_test_results(self, results):
+ # Results will be a dictionary containing path to chartjsons, paths to
+ # valueset, list of return codes.
+ self.return_codes.extend(results.get('retcodes', []))
+ if results.get('errors'): # pragma: no cover
+ self.failed = True
+ if 'MISSING_METRIC' in results.get('errors'):
self.bisector.surface_result('MISSING_METRIC')
- return
- self.values += results['values']
- api = self.bisector.api
- if test_results.get('retcodes') and test_results['retcodes'][-1] != 0 and (
- api.m.chromium.c.TARGET_PLATFORM == 'android'): #pragma: no cover
- api.m.chromium_android.device_status()
- current_connected_devices = api.m.chromium_android.devices
- current_device = api.m.bisect_tester.device_to_test
- if current_device not in current_connected_devices:
- # We need to manually raise step failure here because we are catching
- # them further down the line to enable return_code bisects and bisecting
- # on benchmarks that are a little flaky.
- raise api.m.step.StepFailure('Test device disconnected.')
- if self.bisector.is_return_code_mode():
- retcodes = test_results['retcodes']
- self.overall_return_code = 0 if all(v == 0 for v in retcodes) else 1
- # Keeping mean_value for compatibility with dashboard.
- # TODO(robertocn): refactor mean_value, specially when uploading results
- # to dashboard.
- self.mean_value = self.overall_return_code
- elif self.values:
- api = self.bisector.api
- self.mean_value = api.m.math_utils.mean(self.values)
- self.std_dev = api.m.math_utils.standard_deviation(self.values)
- # Values were not found, but the test did not otherwise fail.
+ raise bisect_exceptions.UntestableRevisionException(results['errors'])
+ elif self.bisector.is_return_code_mode():
+ assert len(results['retcodes'])
else:
- self.status = RevisionState.FAILED
- self.bisector.surface_result('MISSING_METRIC')
- return
- # If we have already decided on the goodness of this revision, we shouldn't
- # recheck it.
- if self.good or self.bad:
- check_revision_goodness = False
- # We cannot test the goodness of the initial rev range.
- if (self.bisector.good_rev != self and self.bisector.bad_rev != self and
- check_revision_goodness):
- if self._check_revision_good():
- self.good = True
- else:
- self.bad = True
+ self.valueset_paths.extend(results.get('valueset_paths'))
+ self.chartjson_paths.extend(results.get('chartjson_paths'))
+ if results.get('retcodes') and 0 not in results['retcodes']:
+ raise bisect_exceptions.UntestableRevisionException(
+ 'got non-zero return code on all runs for ' + self.commit_hash)
def _request_build(self):
"""Posts a request to buildbot to build this revision and archive it."""
@@ -431,10 +362,10 @@ class RevisionState(object):
the test will be run on the same machine. Otherwise, this posts
a request to buildbot to download and perf-test this build.
"""
- if self.bisector.bisect_config.get('dummy_job_names'):
- self.job_name = self.commit_hash + '-test'
- else: # pragma: no cover
- self.job_name = uuid.uuid4().hex
+ if self.test_run_count: # pragma: no cover
+ self.repeat_count = max(MINIMUM_SAMPLE_SIZE, math.ceil(
+ self.test_run_count * 1.5)) - self.test_run_count
+
api = self.bisector.api
# Stores revision map for different repos eg, android-chrome, src, v8 etc.
revision_ladder = {}
@@ -444,102 +375,82 @@ class RevisionState(object):
revision_ladder[top_revision.depot_name] = top_revision.commit_hash
top_revision = top_revision.base_revision
perf_test_properties = {
- 'builder_name': self.bisector.get_perf_tester_name(),
'properties': {
'revision': top_revision.commit_hash,
'parent_got_revision': top_revision.commit_hash,
'parent_build_archive_url': self.build_url,
'bisect_config': self._get_bisect_config_for_tester(),
- 'job_name': self.job_name,
'revision_ladder': revision_ladder,
},
}
- self.test_results_url = (self.bisector.api.GS_RESULTS_URL +
- self.job_name + '.results')
- if (api.m.bisect_tester.local_test_enabled() or
- self.bisector.internal_bisect): # pragma: no cover
- skip_download = self.bisector.last_tested_revision == self
- self.bisector.last_tested_revision = self
- overrides = perf_test_properties['properties']
- api.run_local_test_run(overrides, skip_download=skip_download)
- else:
- step_name = 'Triggering test job for ' + self.commit_hash
- api.m.trigger(perf_test_properties, name=step_name)
-
- def retest(self): # pragma: no cover
- # We need at least 5 samples for applying Mann-Whitney U test
- # with P < 0.01, two-tailed .
- target_sample_size = max(5, math.ceil(len(self.values) * 1.5))
- self.status = RevisionState.NEED_MORE_DATA
- self.repeat_count = target_sample_size - len(self.values)
- self.start_job()
- self.bisector.wait_for(self)
-
- def _get_test_results(self):
- """Tries to get the results of a test job from cloud storage."""
- api = self.bisector.api
- try:
- stdout = api.m.raw_io.output()
- name = 'Get test results for build ' + self.commit_hash
- step_result = api.m.gsutil.cat(self.test_results_url, stdout=stdout,
- name=name)
- if not step_result.stdout:
- raise api.m.step.StepFailure('Test for build %s failed' %
- self.revision_string())
- except api.m.step.StepFailure as sf: # pragma: no cover
- self.bisector.surface_result('TEST_FAILURE')
- return {'results': {'errors': str(sf)}}
- else:
- return json.loads(step_result.stdout)
+ skip_download = self.bisector.last_tested_revision == self
+ self.bisector.last_tested_revision = self
+ overrides = perf_test_properties['properties']
- def _check_revision_good(self):
- """Determines if a revision is good or bad.
+ def run_test_step_test_data():
dtu 2016/09/13 23:57:27 Hmm, this feels out of place. Is there a way to pu
RobertoCN 2016/09/21 22:22:48 That makes sense. I'll give it a try in a separate
+ """Returns a single step data object when called.
- Iteratively increment the sample size of the revision being tested, the last
- known good revision, and the first known bad revision until a relationship
- of significant difference can be established betweeb the results of the
- revision being tested and one of the other two.
+ These are expected to be populated by the test_api.
+ """
+ if api._test_data['run_results'].get(self.commit_hash):
+ return api._test_data['run_results'][self.commit_hash].pop(0)
+ return api._test_data['run_results']['default']
- If the results do not converge towards finding a significant difference in
- either direction, this is expected to timeout eventually. This scenario
- should be rather rare, since it is expected that the fkbr and lkgr are
- significantly different as a precondition.
+ self._read_test_results(api.run_local_test_run(
+ overrides, skip_download=skip_download,
+ step_test_data=run_test_step_test_data
+ ))
+
+ def _check_revision_good(self): # pragma: no cover
+ """Determines if a revision is good or bad.
Returns:
- True if the results of testing this revision are significantly different
- from those of testing the earliest known bad revision.
- False if they are instead significantly different form those of testing
- the latest knwon good revision.
+ True if the revision is either good or bad, False if it cannot be
+ determined from the available data.
"""
+ # Do not reclassify revisions. Important for reference range.
+ if self.good or self.bad:
+ return True
+
lkgr = self.bisector.lkgr
fkbr = self.bisector.fkbr
-
if self.bisector.is_return_code_mode():
- return self.overall_return_code == lkgr.overall_return_code
-
- while True:
- diff_from_good = self.bisector.significantly_different(
- lkgr.values[:len(fkbr.values)], self.values)
- diff_from_bad = self.bisector.significantly_different(
- fkbr.values[:len(lkgr.values)], self.values)
-
- if diff_from_good and diff_from_bad:
- # Multiple regressions.
- # For now, proceed bisecting the biggest difference of the means.
- dist_from_good = abs(self.mean_value - lkgr.mean_value)
- dist_from_bad = abs(self.mean_value - fkbr.mean_value)
- if dist_from_good > dist_from_bad:
- # TODO(robertocn): Add way to handle the secondary regression
- #self.bisector.handle_secondary_regression(self, fkbr)
- return False
- else:
- #self.bisector.handle_secondary_regression(lkgr, self)
- return True
-
- if diff_from_good or diff_from_bad: # pragma: no cover
- return diff_from_bad
+ if self.overall_return_code == lkgr.overall_return_code:
+ self.good = True
+ else:
+ self.bad = True
+ return True
+ diff_from_good = self.bisector.compare_revisions(self, lkgr)
+ diff_from_bad = self.bisector.compare_revisions(self, fkbr)
+ if (diff_from_good == NOT_SIGNIFICANTLY_DIFFERENT and
+ diff_from_bad == NOT_SIGNIFICANTLY_DIFFERENT):
+ # We have reached the max number of samples and have not established
+ # difference, give up.
+ raise bisect_exceptions.InconclusiveBisectException()
+ if (diff_from_good == SIGNIFICANTLY_DIFFERENT and
+ diff_from_bad == SIGNIFICANTLY_DIFFERENT):
+ # Multiple regressions.
+ # For now, proceed bisecting the biggest difference of the means.
+ dist_from_good = abs(self.mean - lkgr.mean)
+ dist_from_bad = abs(self.mean - fkbr.mean)
+ if dist_from_good > dist_from_bad:
+ # TODO(robertocn): Add way to handle the secondary regression
+ #self.bisector.handle_secondary_regression(self, fkbr)
+ self.bad = True
+ return True
+ else:
+ #self.bisector.handle_secondary_regression(lkgr, self)
+ self.good = True
+ return True
+ if diff_from_good == SIGNIFICANTLY_DIFFERENT:
+ self.bad = True
+ return True
+ elif diff_from_bad == SIGNIFICANTLY_DIFFERENT:
+ self.good = True
+ return True
+ # NEED_MORE_DATA
+ return False
- self._next_retest() # pragma: no cover
def revision_string(self):
if self._rev_str:
@@ -558,25 +469,24 @@ class RevisionState(object):
self._rev_str = result
return self._rev_str
- def _next_retest(self): # pragma: no cover
- """Chooses one of current, lkgr, fkbr to retest.
-
- Look for the smallest sample and retest that. If the last tested revision
- is tied for the smallest sample, use that to take advantage of the fact
- that it is already downloaded and unzipped.
- """
- next_revision_to_test = min(self.bisector.lkgr, self, self.bisector.fkbr,
- key=lambda x: len(x.values))
- if (len(self.bisector.last_tested_revision.values) ==
- next_revision_to_test.values):
- self.bisector.last_tested_revision.retest()
- else:
- next_revision_to_test.retest()
-
- def __repr__(self):
- if self.overall_return_code is not None:
- return ('RevisionState(rev=%s, values=%r, overall_return_code=%r, '
- 'std_dev=%r)') % (self.revision_string(), self.values,
+ def __repr__(self): # pragma: no cover
+ if not self.test_run_count:
+ return ('RevisionState(rev=%s), values=[]' % self.revision_string())
+ if self.bisector.is_return_code_mode():
+ return ('RevisionState(rev=%s, mean=%r, overall_return_code=%r, '
+ 'std_dev=%r)') % (self.revision_string(), self.mean,
self.overall_return_code, self.std_dev)
- return ('RevisionState(rev=%s, values=%r, mean_value=%r, std_dev=%r)' % (
- self.revision_string(), self.values, self.mean_value, self.std_dev))
+ return ('RevisionState(rev=%s, mean_value=%r, std_dev=%r)' % (
+ self.revision_string(), self.mean, self.std_dev))
+
+ @property
+ def overall_return_code(self):
+ if self.bisector.is_return_code_mode():
+ if self.return_codes:
+ if max(self.return_codes):
+ return 1
+ return 0
+ raise ValueError('overall_return_code needs non-empty sample'
+ ) # pragma: no cover
+ raise ValueError('overall_return_code only applies to return_code bisects'
+ ) # pragma: no cover

Powered by Google App Engine
This is Rietveld 408576698