| Index: verification/try_server.py
|
| diff --git a/verification/try_server.py b/verification/try_server.py
|
| index d9c17da49b003546e2e9e178403fe9c60813007f..c6d410dd634a8c9bf389c4328875f62544d5df82 100644
|
| --- a/verification/try_server.py
|
| +++ b/verification/try_server.py
|
| @@ -14,7 +14,6 @@ import trychange
|
|
|
| import buildbot_json
|
| import model
|
| -from thirdparty.datastructures import SortedDict
|
| from verification import base
|
|
|
|
|
| @@ -80,115 +79,61 @@ class TryJobs(base.IVerifierStatus):
|
| return max(states)
|
|
|
|
|
| +def steps_quality(steps):
|
| + if not steps:
|
| + return None
|
| + return all(v in (True, None) for v in steps)
|
| +
|
| +
|
| class StepDb(object):
|
| - """Lists all steps for each revision known to have passed at least *once*."""
|
| + """Keeps statistics about all steps for each revisions."""
|
| max_cache = 200
|
|
|
| - def __init__(self, builders):
|
| + def __init__(self, builders, buildbot):
|
| self._need_full = True
|
| - # Builds with a patch.
|
| - self.patched_builds = dict((b, SortedDict()) for b in builders)
|
| - # Builds without a patch (with or without a clobber).
|
| - self.clean_builds = dict((b, SortedDict()) for b in builders)
|
| + self.builders = builders
|
| + self.buildbot = buildbot
|
|
|
| def need_full(self):
|
| result = self._need_full
|
| self._need_full = False
|
| return result
|
|
|
| - def step_quality(self, builder, revision, step):
|
| - """Returns if a step is known to have passed at least one time, in the
|
| - closed revision.
|
| -
|
| - Warning: A step index is not comparable across builders since each builder
|
| - has different steps in a different order.
|
| - """
|
| - if (revision not in self.patched_builds[builder] and
|
| - revision not in self.clean_builds[builder]):
|
| - return None
|
| - return (
|
| - (revision in self.patched_builds[builder] and
|
| - self.patched_builds[builder][revision][step]) or
|
| - (revision in self.clean_builds[builder] and
|
| - self.clean_builds[builder][revision][step]))
|
| -
|
| - def revision_quality(self, revision):
|
| - """Returns True if a revision succeeded at least one time on at least one
|
| - builder.
|
| - """
|
| - return reduce(or_3_way,
|
| - (self.revision_quality_builder(b, revision)
|
| - for b in self.patched_builds))
|
| -
|
| - def revision_quality_builder(self, builder, revision):
|
| - """Returns if a revision succeeded at least one time.
|
| -
|
| - Warning: A step index is not comparable across builders since each builder
|
| - has different steps in a different order.
|
| - """
|
| - if (revision not in self.patched_builds[builder] and
|
| - revision not in self.clean_builds[builder]):
|
| + def revision_quality_builder_steps(self, builder, revision):
|
| + steps = None
|
| + nb_builds = 0
|
| + for build in self.buildbot.builders[builder].builds.cached_children:
|
| + if build.revision != revision:
|
| + continue
|
| + nb_builds += 1
|
| + assert not steps or len(steps) == len(build.steps)
|
| + if not steps or len(steps) != len(build.steps):
|
| + # If the number of steps changed after a master restart, we need to
|
| + # ditch the previous steps.
|
| + # One workaround is to key by name.
|
| + steps = [None] * len(build.steps)
|
| + for step in build.steps:
|
| + steps[step.number] = or_3_way(
|
| + steps[step.number], step.simplified_result)
|
| + return steps, nb_builds
|
| +
|
| + def last_good_revision_builder(self, builder):
|
| + """Returns LKGR for this builder."""
|
| + state = {}
|
| + for build in self.buildbot.builders[builder].builds.cached_children:
|
| + state.setdefault(build.revision, [None] * len(build.steps))
|
| + for step in build.steps:
|
| + state[build.revision][step.number] = or_3_way(
|
| + state[build.revision][step.number],
|
| + step.simplified_result)
|
| +
|
| + revisions = [
|
| + revision for revision in sorted(state)
|
| + if all(v in (True, None) for v in state[revision])
|
| + ]
|
| + if not revisions:
|
| return None
|
| - bad_steps = []
|
| - for i, value in enumerate(self.patched_builds[builder].get(revision, [])):
|
| - if value is False:
|
| - bad_steps.append(i)
|
| - for i, value in enumerate(self.clean_builds[builder].get(revision, [])):
|
| - if value is False:
|
| - bad_steps.append(i)
|
| - if value and i in bad_steps:
|
| - bad_steps.remove(i)
|
| - return not bad_steps
|
| -
|
| - def seen_revisions(self):
|
| - """Returns all revisions that returned some status."""
|
| - revisions = set()
|
| - for builder in self.patched_builds:
|
| - revisions |= set(self.patched_builds[builder].keys())
|
| - revisions |= set(self.clean_builds[builder].keys())
|
| - return sorted(revisions)
|
| -
|
| - def good_revisions(self):
|
| - """Returns all revisions that succeeded on all builders."""
|
| - for revision in self.seen_revisions():
|
| - if self.revision_quality(revision):
|
| - yield revision
|
| -
|
| - def bad_revisions(self):
|
| - """Returns all revisions that never succeeded on any builder."""
|
| - for revision in self.seen_revisions():
|
| - if self.revision_quality(revision) is False:
|
| - yield revision
|
| -
|
| - def update(self, buildbot):
|
| - """Updates the internal db."""
|
| - for builder in self.clean_builds:
|
| - # Only access builds already cached.
|
| - for build in buildbot.builders[builder].builds.cached_children:
|
| - if build.data['sourceStamp'].get('hasPatch', False):
|
| - b = self.patched_builds[builder]
|
| - else:
|
| - b = self.clean_builds[builder]
|
| - new_values = [s.simplified_result for s in build.steps]
|
| - if build.revision not in b:
|
| - b[build.revision] = new_values
|
| - else:
|
| - len_b = len(b[build.revision])
|
| - len_n = len(new_values)
|
| - new_length = max(len_b, len_n)
|
| - b[build.revision].extend([None] * (new_length - len_b))
|
| - new_values.extend([None] * (new_length - len_n))
|
| - b[build.revision] = [
|
| - or_3_way(old_value, new_values[i])
|
| - for i, old_value in enumerate(b[build.revision])
|
| - ]
|
| -
|
| - for builds in self.patched_builds.itervalues():
|
| - while len(builds) > self.max_cache:
|
| - builds.popitem(builds.keyOrders[0])
|
| - for builds in self.clean_builds.itervalues():
|
| - while len(builds) > self.max_cache:
|
| - builds.popitem(builds.keyOrders[0])
|
| + return revisions[-1]
|
|
|
|
|
| class TryRunner(base.Verifier):
|
| @@ -219,7 +164,8 @@ class TryRunner(base.Verifier):
|
| # Only updates a job status once every 60 seconds.
|
| update_latency = 60
|
|
|
| - def __init__(self, try_server_url, commit_user, builders, tests, extra_flags):
|
| + def __init__(self, try_server_url, commit_user, builders, tests, extra_flags,
|
| + lkgr):
|
| super(TryRunner, self).__init__()
|
| self.commit_user = commit_user
|
| self.try_server_url = try_server_url
|
| @@ -227,8 +173,9 @@ class TryRunner(base.Verifier):
|
| self.tests = tests
|
| self.extra_flags = extra_flags or []
|
| self.status = buildbot_json.Buildbot(self.try_server_url)
|
| - self.step_db = StepDb(builders)
|
| + self.step_db = StepDb(self.builders, self.status)
|
| self.last_update = time.time() - self.update_latency
|
| + self.lkgr = lkgr
|
|
|
| def verify(self, pending, revision):
|
| """Sends a try job to the try server and returns a TryJob list."""
|
| @@ -258,7 +205,6 @@ class TryRunner(base.Verifier):
|
| self.last_update = time.time()
|
|
|
| self._reset_cache(queue)
|
| - self.step_db.update(self.status)
|
|
|
| # Do the actual processing to update the TryJob status.
|
| for pending, jobs in self.loop(queue, TryJobs, True):
|
| @@ -328,7 +274,7 @@ class TryRunner(base.Verifier):
|
| # First determine what data is needed.
|
| builds_to_cache = {}
|
| if self.step_db.need_full():
|
| - logging.info('Fetching all try jobs status because of good_revisions')
|
| + logging.info('Fetching all try jobs status to fetch good revisions')
|
| builders_to_cache = self.builders
|
| else:
|
| builders_to_cache = set()
|
| @@ -361,8 +307,8 @@ class TryRunner(base.Verifier):
|
| def _send_job(self, pending, revision, clobber, builders, tests=None):
|
| """Sends a try job."""
|
| # TODO(maruel): If revision is in self.bad_revisions[builder], choose
|
| - # max(self.good_revisions[builder]) ? That can't easily be done since the
|
| - # patch was already applied.
|
| + # self.last_good_revision_builder(builder) ? That can't easily be done since
|
| + # the patch was already applied.
|
| builders = builders or self.builders
|
| tests = tests or self.tests
|
| cmd = [
|
| @@ -430,6 +376,30 @@ class TryRunner(base.Verifier):
|
| return '%s/buildstatus?builder=%s&number=%s' % (
|
| self.try_server_url.rstrip('/'), job.builder, job.build)
|
|
|
| + def _error_msg(self, name, job, failed_steps):
|
| + """Constructs the error message."""
|
| + def steps_to_str(steps):
|
| + if len(steps) > 1:
|
| + return 'steps "%s"' % ', '.join(steps)
|
| + else:
|
| + return 'step "%s"' % steps[0]
|
| +
|
| + msg = u'Try job failure for %s on %s for %s' % (
|
| + name, job.builder, steps_to_str(failed_steps))
|
| + if job.clobber:
|
| + msg += ' (clobber build)'
|
| + msg += '.'
|
| + if job.failed_steps:
|
| + msg += u'\nIt\'s a second try, previously, %s failed.' % (
|
| + steps_to_str(job.failed_steps))
|
| + msg += '\n%s' % self._build_status_url(job)
|
| + logging.info(msg)
|
| + return msg
|
| +
|
| + def get_lkgr(self, builder):
|
| + """Caches builds for a builder so lkgr is more useful."""
|
| + return max(self.step_db.last_good_revision_builder(builder), self.lkgr())
|
| +
|
| def _handle_try_job(self, pending, jobs, job, build):
|
| """Determines if the try job is a good signal to commit the patch."""
|
| if build.simplified_result is None:
|
| @@ -438,6 +408,7 @@ class TryRunner(base.Verifier):
|
| assert job.result is None
|
| assert job.build is not None
|
| job.result = build.result
|
| + # Warning: This code assumes that steps do not abort build on failure.
|
| failed_steps = [
|
| step.name for step in build.steps if step.simplified_result is False]
|
| if job.get_state() != base.FAILED:
|
| @@ -449,49 +420,60 @@ class TryRunner(base.Verifier):
|
| self._build_status_url(job)))
|
| return
|
|
|
| - assert failed_steps
|
| - msg = (u'Try job failure for %s on %s for step%s %s:\n%s' % (
|
| - pending.pending_name(),
|
| - job.builder,
|
| - 's' if len(failed_steps) > 1 else '',
|
| - ', '.join(failed_steps),
|
| - self._build_status_url(job)))
|
| - logging.info(msg)
|
| -
|
| - # Special case update and compile.
|
| - if 'update' in job.failed_steps:
|
| - logging.debug('update is always a major failure')
|
| - jobs.error_message = msg
|
| + msg = self._error_msg(pending.pending_name(), job, failed_steps)
|
| + steps, _ = self.step_db.revision_quality_builder_steps(
|
| + job.builder, int(job.revision))
|
| + quality = steps_quality(steps)
|
| +
|
| + def retry(msg2, **kwargs):
|
| + """Retry a try job. Will use LKGR if quality is bad."""
|
| + if not quality:
|
| + lkgr = self.get_lkgr(job.builder)
|
| + if lkgr is None:
|
| + logging.error('lkgr should never be None.')
|
| + fail('Couldn\'t find a good revision, aborting.')
|
| + return
|
| + job.revision = lkgr
|
| + logging.info(
|
| + 'Retrying %s on %s, %s; rev=%s; %s' %
|
| + (pending.pending_name(), job.builder, kwargs, job.revision, msg2))
|
| job.failed_steps = failed_steps
|
| - return
|
| - if failed_steps == ['compile'] and not job.clobber:
|
| - logging.info('Trying again with clobber')
|
| - # Note this would reset flaky if tests if there has been. This is fine
|
| - # since a slave could be broken.
|
| + self._send_jobs(pending, [job], **kwargs)
|
| +
|
| + def fail(msg2):
|
| + jobs.error_message = msg + msg2
|
| + logging.info(jobs.error_message)
|
| job.failed_steps = failed_steps
|
| - job.clobber = True
|
| - self._send_jobs(pending, [job])
|
| - return
|
|
|
| - # Look at the quality of the revision on this builder.
|
| - # TODO(maruel): We should record the number of builds that were done on this
|
| - # revision? One or 2 builds don't give much signal.
|
| - quality = self.step_db.revision_quality_builder(
|
| - job.builder, int(job.revision))
|
| + if 'update' in failed_steps:
|
| + # Look at update quality specifically since it's a special step.
|
| + # Do not take in account nb_builds == 1.
|
| + if not quality and not steps[build.steps['update'].number]:
|
| + # 'update' never passed.
|
| + return retry('update has no quality')
|
| +
|
| + return fail(
|
| + '\n\nStep "update" is always a major failure.\n'
|
| + 'Look at the try server FAQ for more details.')
|
| +
|
| + if 'compile' in failed_steps:
|
| + if not job.clobber:
|
| + # Note: this resets previous test failure if there has been on the
|
| + # second previous try. This is fine since a slave could be broken.
|
| + job.clobber = True
|
| + return retry('retry compile with clobber')
|
| +
|
| + return fail('')
|
|
|
| if quality:
|
| if job.failed_steps:
|
| - logging.info('It\'s a second retry for %s on %s, abort' % (
|
| - pending.pending_name(), job.builder))
|
| - jobs.error_message = msg
|
| - job.failed_steps = failed_steps
|
| - else:
|
| - logging.info(
|
| - 'Retrying %s on %s' % (pending.pending_name(), job.builder))
|
| - job.failed_steps = failed_steps
|
| - self._send_jobs(pending, [job], tests=job.failed_steps)
|
| - return
|
| + # The job had already failed.
|
| + return fail('')
|
| +
|
| + return retry('Quality but first try', tests=failed_steps)
|
|
|
| - # TODO(maruel): Implement better auto-retry.
|
| - jobs.error_message = msg
|
| - job.failed_steps = failed_steps
|
| + # TODO(maruel): It would make sense to do a clobber build to see if the
|
| + # revision is indeed broken, since this algorithm assumes that the try
|
| + # server is continuously used for recent revisions!
|
| + # The revision looks like it's broken, retry with lkgr instead.
|
| + return retry('No quality, no idea', tests=failed_steps)
|
|
|