| Index: appengine/findit/waterfall/try_job_util.py
|
| diff --git a/appengine/findit/waterfall/try_job_util.py b/appengine/findit/waterfall/try_job_util.py
|
| index 3af3936746d06383d91c5590ff09b5e1e25899a4..0fab748365f3923018f9a09f0a3001dac4ab6f7e 100644
|
| --- a/appengine/findit/waterfall/try_job_util.py
|
| +++ b/appengine/findit/waterfall/try_job_util.py
|
| @@ -2,6 +2,7 @@
|
| # Use of this source code is governed by a BSD-style license that can be
|
| # found in the LICENSE file.
|
|
|
| +from collections import defaultdict
|
| from datetime import datetime
|
| from datetime import timedelta
|
| import logging
|
| @@ -16,73 +17,18 @@ from model.wf_analysis import WfAnalysis
|
| from model.wf_build import WfBuild
|
| from model.wf_failure_group import WfFailureGroup
|
| from model.wf_try_job import WfTryJob
|
| -from waterfall import swarming_tasks_to_try_job_pipeline
|
| from waterfall import waterfall_config
|
| -from waterfall.try_job_type import TryJobType
|
|
|
| # TODO(lijeffrey): Move this to config.
|
| MATCHING_GROUPS_SECONDS_AGO = 24 * 60 * 60 # 24 hours.
|
|
|
|
|
| -def _CheckFailureForTryJobKey(
|
| - master_name, builder_name, build_number,
|
| - failure_result_map, failed_step_or_test, failure):
|
| - """Compares the current_failure and first_failure for each failed_step/test.
|
| -
|
| - If equal, a new try_job needs to start;
|
| - If not, apply the key of the first_failure's try_job to this failure.
|
| - """
|
| - # TODO(chanli): Need to compare failures across builders
|
| - # after the grouping of failures is implemented.
|
| - # TODO(chanli): Need to handle cases where first failure is actually
|
| - # more than 20 builds back. The implementation should not be here,
|
| - # but need to be taken care of.
|
| - if not failure.get('last_pass'):
|
| - # Bail out since cannot figure out the good_revision.
|
| - return False, None
|
| -
|
| - if failure['current_failure'] == failure['first_failure']:
|
| - failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
|
| - master_name, builder_name, build_number)
|
| - return True, failure['last_pass'] # A new try_job is needed.
|
| - else:
|
| - failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
|
| - master_name, builder_name, failure['first_failure'])
|
| - return False, None
|
| -
|
| -
|
| -def _CheckIfNeedNewTryJobForTestFailure(
|
| - failure_level, master_name, builder_name, build_number,
|
| - failure_result_map, failures):
|
| - """Traverses failed steps or tests to check if a new try job is needed."""
|
| - need_new_try_job = False
|
| - last_pass = build_number
|
| - targeted_tests = {} if failure_level == 'step' else []
|
| -
|
| - for failure_name, failure in failures.iteritems():
|
| - if 'tests' in failure:
|
| - failure_result_map[failure_name] = {}
|
| - failure_targeted_tests, failure_need_try_job, failure_last_pass = (
|
| - _CheckIfNeedNewTryJobForTestFailure(
|
| - 'test', master_name, builder_name, build_number,
|
| - failure_result_map[failure_name], failure['tests']))
|
| - if failure_need_try_job:
|
| - targeted_tests[failure_name] = failure_targeted_tests
|
| - else:
|
| - failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey(
|
| - master_name, builder_name, build_number,
|
| - failure_result_map, failure_name, failure)
|
| - if failure_need_try_job:
|
| - if failure_level == 'step':
|
| - targeted_tests[failure_name] = []
|
| - else:
|
| - targeted_tests.append(failure.get('base_test_name', failure_name))
|
| +def _ShouldBailOutForOutdatedBuild(build):
|
| + return (datetime.utcnow() - build.start_time).days > 0
|
|
|
| - need_new_try_job = need_new_try_job or failure_need_try_job
|
| - last_pass = (failure_last_pass if failure_last_pass and
|
| - failure_last_pass < last_pass else last_pass)
|
|
|
| - return targeted_tests, need_new_try_job, last_pass
|
| +def _CurrentBuildKey(master_name, builder_name, build_number):
|
| + return '%s/%s/%d' % (master_name, builder_name, build_number)
|
|
|
|
|
| def _BlameListsIntersection(blame_list_1, blame_list_2):
|
| @@ -308,95 +254,60 @@ def ReviveOrCreateTryJobEntity(
|
| return try_job_entity_revived_or_created
|
|
|
|
|
| -def _NeedANewTryJob(
|
| - master_name, builder_name, build_number, build_failure_type, failed_steps,
|
| - failure_result_map, builds, signals, heuristic_result, force_try_job=False):
|
| - """Checks if a new try_job is needed."""
|
| - need_new_try_job = False
|
| - last_pass = build_number
|
| -
|
| - if 'compile' in failed_steps:
|
| - try_job_type = TryJobType.COMPILE
|
| - targeted_tests = None
|
| - need_new_try_job, last_pass = _CheckFailureForTryJobKey(
|
| - master_name, builder_name, build_number,
|
| - failure_result_map, TryJobType.COMPILE, failed_steps['compile'])
|
| - else:
|
| - try_job_type = TryJobType.TEST
|
| - targeted_tests, need_new_try_job, last_pass = (
|
| - _CheckIfNeedNewTryJobForTestFailure(
|
| - 'step', master_name, builder_name, build_number, failure_result_map,
|
| - failed_steps))
|
| -
|
| - need_new_try_job = (
|
| - need_new_try_job and ReviveOrCreateTryJobEntity(
|
| - master_name, builder_name, build_number, force_try_job) and
|
| - _IsBuildFailureUniqueAcrossPlatforms(
|
| - master_name, builder_name, build_number, build_failure_type,
|
| - builds[str(build_number)]['blame_list'], failed_steps, signals,
|
| - heuristic_result))
|
| -
|
| - # TODO(josiahk): Integrate _IsBuildFailureUniqueAcrossPlatforms() into
|
| - # need_new_try_job boolean
|
| - if need_new_try_job:
|
| - _IsBuildFailureUniqueAcrossPlatforms(
|
| - master_name, builder_name, build_number, build_failure_type,
|
| - builds[str(build_number)]['blame_list'], failed_steps, signals,
|
| - heuristic_result)
|
| -
|
| - return need_new_try_job, last_pass, try_job_type, targeted_tests
|
| +def _NeedANewCompileTryJob(
|
| + master_name, builder_name, build_number, failure_info):
|
|
|
| + compile_failure = failure_info['failed_steps'].get('compile', {})
|
| + if compile_failure:
|
| + analysis = WfAnalysis.Get(master_name, builder_name, build_number)
|
| + analysis.failure_result_map['compile'] = '%s/%s/%d' % (
|
| + master_name, builder_name, compile_failure['first_failure'])
|
| + analysis.put()
|
|
|
| -def _GetFailedTargetsFromSignals(signals, master_name, builder_name):
|
| - compile_targets = []
|
| + if compile_failure['first_failure'] == compile_failure['current_failure']:
|
| + return True
|
|
|
| - if not signals or 'compile' not in signals:
|
| - return compile_targets
|
| + return False
|
|
|
| - if signals['compile'].get('failed_output_nodes'):
|
| - return signals['compile'].get('failed_output_nodes')
|
|
|
| - strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures(
|
| - master_name, builder_name)
|
| - for source_target in signals['compile'].get('failed_targets', []):
|
| - # For link failures, we pass the executable targets directly to try-job, and
|
| - # there is no 'source' for link failures.
|
| - # For compile failures, only pass the object files as the compile targets
|
| - # for the bots that we use strict regex to extract such information.
|
| - if not source_target.get('source') or strict_regex:
|
| - compile_targets.append(source_target.get('target'))
|
| +def _CurrentBuildKeyInFailureResultMap(master_name, builder_name, build_number):
|
| + analysis = WfAnalysis.Get(master_name, builder_name, build_number)
|
| + failure_result_map = analysis.failure_result_map
|
| + current_build_key = _CurrentBuildKey(master_name, builder_name, build_number)
|
| + for step_keys in failure_result_map.itervalues():
|
| + for test_key in step_keys.itervalues():
|
| + if test_key == current_build_key:
|
| + return True
|
| + return False
|
|
|
| - return compile_targets
|
|
|
| +def _NeedANewTestTryJob(
|
| + master_name, builder_name, build_number, failure_info, force_try_job):
|
|
|
| -def _GetSuspectsFromHeuristicResult(heuristic_result):
|
| - suspected_revisions = set()
|
| - if not heuristic_result:
|
| - return list(suspected_revisions)
|
| - for failure in heuristic_result.get('failures', []):
|
| - for cl in failure['suspected_cls']:
|
| - suspected_revisions.add(cl['revision'])
|
| - return list(suspected_revisions)
|
| + if failure_info['failure_type'] != failure_type.TEST:
|
| + return False
|
|
|
| + if (not force_try_job and
|
| + waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)):
|
| + logging.info('Test try jobs on %s, %s are not supported yet.',
|
| + master_name, builder_name)
|
| + return False
|
|
|
| -def _ShouldBailOutForOutdatedBuild(build):
|
| - return (datetime.utcnow() - build.start_time).days > 0
|
| + return _CurrentBuildKeyInFailureResultMap(
|
| + master_name, builder_name, build_number)
|
|
|
|
|
| -def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result,
|
| - force_try_job=False):
|
| - master_name = failure_info['master_name']
|
| - builder_name = failure_info['builder_name']
|
| - build_number = failure_info['build_number']
|
| - failed_steps = failure_info.get('failed_steps', [])
|
| - builds = failure_info.get('builds', {})
|
| +def NeedANewTryJob(
|
| + master_name, builder_name, build_number, failure_info, signals,
|
| + heuristic_result, force_try_job=False):
|
|
|
| tryserver_mastername, tryserver_buildername = (
|
| waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name))
|
|
|
| + try_job_type = failure_info['failure_type']
|
| if not tryserver_mastername or not tryserver_buildername:
|
| logging.info('%s, %s is not supported yet.', master_name, builder_name)
|
| - return {}
|
| + return False
|
|
|
| if not force_try_job:
|
| build = WfBuild.Get(master_name, builder_name, build_number)
|
| @@ -404,53 +315,43 @@ def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result,
|
| if _ShouldBailOutForOutdatedBuild(build):
|
| logging.error('Build time %s is more than 24 hours old. '
|
| 'Try job will not be triggered.' % build.start_time)
|
| - return {}
|
| + return False
|
|
|
| - if (failure_info['failure_type'] == failure_type.TEST and
|
| - waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)):
|
| - logging.info('Test try jobs on %s, %s are not supported yet.',
|
| - master_name, builder_name)
|
| - return {}
|
| -
|
| - failure_result_map = {}
|
| - need_new_try_job, last_pass, try_job_type, targeted_tests = (
|
| - _NeedANewTryJob(master_name, builder_name, build_number,
|
| - failure_info['failure_type'], failed_steps,
|
| - failure_result_map, builds, signals, heuristic_result,
|
| - force_try_job))
|
| + need_new_try_job = (_NeedANewCompileTryJob(
|
| + master_name, builder_name, build_number, failure_info)
|
| + if try_job_type == failure_type.COMPILE else
|
| + _NeedANewTestTryJob(
|
| + master_name, builder_name, build_number, failure_info, force_try_job))
|
|
|
| if need_new_try_job:
|
| - compile_targets = (_GetFailedTargetsFromSignals(
|
| - signals, master_name, builder_name)
|
| - if try_job_type == TryJobType.COMPILE else None)
|
| - suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result)
|
| -
|
| - pipeline = (
|
| - swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline(
|
| - master_name, builder_name, build_number,
|
| - builds[str(last_pass)]['chromium_revision'],
|
| - builds[str(build_number)]['chromium_revision'],
|
| - builds[str(build_number)]['blame_list'],
|
| - try_job_type, compile_targets, targeted_tests, suspected_revisions,
|
| - force_try_job))
|
| -
|
| - pipeline.target = appengine_util.GetTargetNameForModule(
|
| - constants.WATERFALL_BACKEND)
|
| - pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE)
|
| -
|
| - if try_job_type == TryJobType.TEST: # pragma: no cover
|
| - logging_str = (
|
| - 'Trying to schedule swarming task(s) for build %s, %s, %s: %s'
|
| - ' because of %s failure. A try job may be triggered if some reliable'
|
| - ' failure is detected in task(s).') % (
|
| - master_name, builder_name, build_number,
|
| - pipeline.pipeline_status_path, try_job_type)
|
| - else: # pragma: no cover
|
| - logging_str = (
|
| - 'Try job was scheduled for build %s, %s, %s: %s because of %s '
|
| - 'failure.') % (
|
| - master_name, builder_name, build_number,
|
| - pipeline.pipeline_status_path, try_job_type)
|
| - logging.info(logging_str)
|
| -
|
| - return failure_result_map
|
| + # TODO(josiahk): Integrate this into need_new_try_job boolean
|
| + _IsBuildFailureUniqueAcrossPlatforms(
|
| + master_name, builder_name, build_number, try_job_type,
|
| + failure_info['builds'][str(build_number)]['blame_list'],
|
| + failure_info['failed_steps'], signals, heuristic_result)
|
| +
|
| + need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity(
|
| + master_name, builder_name, build_number, force_try_job)
|
| + return need_new_try_job
|
| +
|
| +
|
| +def GetFailedTargetsFromSignals(signals, master_name, builder_name):
|
| + compile_targets = []
|
| +
|
| + if not signals or 'compile' not in signals:
|
| + return compile_targets
|
| +
|
| + if signals['compile'].get('failed_output_nodes'):
|
| + return signals['compile'].get('failed_output_nodes')
|
| +
|
| + strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures(
|
| + master_name, builder_name)
|
| + for source_target in signals['compile'].get('failed_targets', []):
|
| + # For link failures, we pass the executable targets directly to try-job, and
|
| + # there is no 'source' for link failures.
|
| + # For compile failures, only pass the object files as the compile targets
|
| + # for the bots that we use strict regex to extract such information.
|
| + if not source_target.get('source') or strict_regex:
|
| + compile_targets.append(source_target.get('target'))
|
| +
|
| + return compile_targets
|
|
|