| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict |
| 5 from datetime import datetime | 6 from datetime import datetime |
| 6 import logging | 7 import logging |
| 7 | 8 |
| 8 from google.appengine.ext import ndb | 9 from google.appengine.ext import ndb |
| 9 | 10 |
| 10 from common import appengine_util | 11 from common import appengine_util |
| 11 from common import constants | 12 from common import constants |
| 12 from common.waterfall import failure_type | 13 from common.waterfall import failure_type |
| 13 from model import analysis_status | 14 from model import analysis_status |
| 14 from model.wf_analysis import WfAnalysis | 15 from model.wf_analysis import WfAnalysis |
| 15 from model.wf_build import WfBuild | 16 from model.wf_build import WfBuild |
| 16 from model.wf_failure_group import WfFailureGroup | 17 from model.wf_failure_group import WfFailureGroup |
| 17 from model.wf_try_job import WfTryJob | 18 from model.wf_try_job import WfTryJob |
| 18 from waterfall import swarming_tasks_to_try_job_pipeline | |
| 19 from waterfall import waterfall_config | 19 from waterfall import waterfall_config |
| 20 from waterfall.try_job_type import TryJobType | |
| 21 | 20 |
| 22 | 21 |
| 23 def _CheckFailureForTryJobKey( | 22 def _ShouldBailOutForOutdatedBuild(build): |
| 24 master_name, builder_name, build_number, | 23 return (datetime.utcnow() - build.start_time).days > 0 |
| 25 failure_result_map, failed_step_or_test, failure): | |
| 26 """Compares the current_failure and first_failure for each failed_step/test. | |
| 27 | 24 |
| 28 If equal, a new try_job needs to start; | 25 def _CurrentBuildKey(master_name, builder_name, build_number): |
| 29 If not, apply the key of the first_failure's try_job to this failure. | 26 return '%s/%s/%d' % (master_name, builder_name, build_number) |
| 30 """ | |
| 31 # TODO(chanli): Need to compare failures across builders | |
| 32 # after the grouping of failures is implemented. | |
| 33 # TODO(chanli): Need to handle cases where first failure is actually | |
| 34 # more than 20 builds back. The implementation should not be here, | |
| 35 # but need to be taken care of. | |
| 36 if not failure.get('last_pass'): | |
| 37 # Bail out since cannot figure out the good_revision. | |
| 38 return False, None | |
| 39 | |
| 40 if failure['current_failure'] == failure['first_failure']: | |
| 41 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 42 master_name, builder_name, build_number) | |
| 43 return True, failure['last_pass'] # A new try_job is needed. | |
| 44 else: | |
| 45 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 46 master_name, builder_name, failure['first_failure']) | |
| 47 return False, None | |
| 48 | |
| 49 | |
| 50 def _CheckIfNeedNewTryJobForTestFailure( | |
| 51 failure_level, master_name, builder_name, build_number, | |
| 52 failure_result_map, failures): | |
| 53 """Traverses failed steps or tests to check if a new try job is needed.""" | |
| 54 need_new_try_job = False | |
| 55 last_pass = build_number | |
| 56 targeted_tests = {} if failure_level == 'step' else [] | |
| 57 | |
| 58 for failure_name, failure in failures.iteritems(): | |
| 59 if 'tests' in failure: | |
| 60 failure_result_map[failure_name] = {} | |
| 61 failure_targeted_tests, failure_need_try_job, failure_last_pass = ( | |
| 62 _CheckIfNeedNewTryJobForTestFailure( | |
| 63 'test', master_name, builder_name, build_number, | |
| 64 failure_result_map[failure_name], failure['tests'])) | |
| 65 if failure_need_try_job: | |
| 66 targeted_tests[failure_name] = failure_targeted_tests | |
| 67 else: | |
| 68 failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey( | |
| 69 master_name, builder_name, build_number, | |
| 70 failure_result_map, failure_name, failure) | |
| 71 if failure_need_try_job: | |
| 72 if failure_level == 'step': | |
| 73 targeted_tests[failure_name] = [] | |
| 74 else: | |
| 75 targeted_tests.append(failure.get('base_test_name', failure_name)) | |
| 76 | |
| 77 need_new_try_job = need_new_try_job or failure_need_try_job | |
| 78 last_pass = (failure_last_pass if failure_last_pass and | |
| 79 failure_last_pass < last_pass else last_pass) | |
| 80 | |
| 81 return targeted_tests, need_new_try_job, last_pass | |
| 82 | 27 |
| 83 | 28 |
| 84 def _BlameListsIntersection(blame_list_1, blame_list_2): | 29 def _BlameListsIntersection(blame_list_1, blame_list_2): |
| 85 return set(blame_list_1) & set(blame_list_2) | 30 return set(blame_list_1) & set(blame_list_2) |
| 86 | 31 |
| 87 | 32 |
| 88 def _GetStepsAndTests(failed_steps): | 33 def _GetStepsAndTests(failed_steps): |
| 89 """Extracts failed steps and tests from failed_steps data structure. | 34 """Extracts failed steps and tests from failed_steps data structure. |
| 90 | 35 |
| 91 Args: | 36 Args: |
| (...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 290 try_job.put() | 235 try_job.put() |
| 291 else: | 236 else: |
| 292 try_job_entity_revived_or_created = False | 237 try_job_entity_revived_or_created = False |
| 293 else: | 238 else: |
| 294 try_job = WfTryJob.Create(master_name, builder_name, build_number) | 239 try_job = WfTryJob.Create(master_name, builder_name, build_number) |
| 295 try_job.put() | 240 try_job.put() |
| 296 | 241 |
| 297 return try_job_entity_revived_or_created | 242 return try_job_entity_revived_or_created |
| 298 | 243 |
| 299 | 244 |
| 300 def _NeedANewTryJob( | 245 def _NeedANewTryJobForCompile( |
| 301 master_name, builder_name, build_number, build_failure_type, failed_steps, | 246 master_name, builder_name, build_number, failure_info): |
| 302 failure_result_map, builds, signals, heuristic_result, force_try_job=False): | |
| 303 """Checks if a new try_job is needed.""" | |
| 304 need_new_try_job = False | |
| 305 last_pass = build_number | |
| 306 | 247 |
| 307 if 'compile' in failed_steps: | 248 compile_failure = failure_info['failed_steps'].get('compile', {}) |
| 308 try_job_type = TryJobType.COMPILE | 249 if compile_failure: |
| 309 targeted_tests = None | 250 analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| 310 need_new_try_job, last_pass = _CheckFailureForTryJobKey( | 251 analysis.failure_result_map['compile'] = '%s/%s/%d' % ( |
| 311 master_name, builder_name, build_number, | 252 master_name, builder_name, compile_failure['first_failure']) |
| 312 failure_result_map, TryJobType.COMPILE, failed_steps['compile']) | 253 analysis.put() |
| 313 else: | |
| 314 try_job_type = TryJobType.TEST | |
| 315 targeted_tests, need_new_try_job, last_pass = ( | |
| 316 _CheckIfNeedNewTryJobForTestFailure( | |
| 317 'step', master_name, builder_name, build_number, failure_result_map, | |
| 318 failed_steps)) | |
| 319 | 254 |
| 320 # TODO(josiahk): Integrate this into need_new_try_job boolean | 255 if compile_failure['first_failure'] == compile_failure['current_failure']: |
| 321 _IsBuildFailureUniqueAcrossPlatforms( | 256 return True |
| 322 master_name, builder_name, build_number, build_failure_type, | |
| 323 builds[str(build_number)]['blame_list'], failed_steps, signals, | |
| 324 heuristic_result) | |
| 325 | 257 |
| 326 need_new_try_job = ( | 258 return False |
| 327 need_new_try_job and ReviveOrCreateTryJobEntity( | |
| 328 master_name, builder_name, build_number, force_try_job)) | |
| 329 | 259 |
| 330 return need_new_try_job, last_pass, try_job_type, targeted_tests | 260 def _NeedANewTryJobForTest( |
| 261 master_name, builder_name, build_number, failure_info, force_try_job): |
| 262 |
| 263 if failure_info['failure_type'] != failure_type.TEST: |
| 264 return False |
| 265 |
| 266 if not force_try_job: |
| 267 if waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name): |
| 268 logging.info('Test try jobs on %s, %s are not supported yet.', |
| 269 master_name, builder_name) |
| 270 return False |
| 271 |
| 272 analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| 273 failure_result_map = analysis.failure_result_map |
| 274 current_build_key = _CurrentBuildKey(master_name, builder_name, build_number) |
| 275 for step_keys in failure_result_map.itervalues(): |
| 276 for test_key in step_keys.itervalues(): |
| 277 if test_key == current_build_key: |
| 278 return True |
| 279 return False |
| 280 |
| 281 def NeedANewTryJob( |
| 282 master_name, builder_name, build_number, failure_info, signals, |
| 283 heuristic_result, force_try_job=False): |
| 284 |
| 285 tryserver_mastername, tryserver_buildername = ( |
| 286 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) |
| 287 |
| 288 try_job_type = failure_info['failure_type'] |
| 289 if not tryserver_mastername or not tryserver_buildername: |
| 290 logging.info('%s, %s is not supported yet.', master_name, builder_name) |
| 291 return False |
| 292 |
| 293 if not force_try_job: |
| 294 build = WfBuild.Get(master_name, builder_name, build_number) |
| 295 |
| 296 if _ShouldBailOutForOutdatedBuild(build): |
| 297 logging.error('Build time %s is more than 24 hours old. ' |
| 298 'Try job will not be triggered.' % build.start_time) |
| 299 return False |
| 300 |
| 301 need_new_try_job = (_NeedANewTryJobForCompile( |
| 302 master_name, builder_name, build_number, failure_info) |
| 303 if try_job_type == failure_type.COMPILE else |
| 304 _NeedANewTryJobForTest( |
| 305 master_name, builder_name, build_number, failure_info, force_try_job)) |
| 306 |
| 307 if need_new_try_job: |
| 308 # TODO(josiahk): Integrate this into need_new_try_job boolean |
| 309 _IsBuildFailureUniqueAcrossPlatforms( |
| 310 master_name, builder_name, build_number, try_job_type, |
| 311 failure_info['builds'][str(build_number)]['blame_list'], |
| 312 failure_info['failed_steps'], signals, heuristic_result) |
| 313 |
| 314 need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity( |
| 315 master_name, builder_name, build_number, force_try_job) |
| 316 return need_new_try_job |
| 331 | 317 |
| 332 | 318 |
| 333 def _GetFailedTargetsFromSignals(signals, master_name, builder_name): | 319 def GetFailedTargetsFromSignals(signals, master_name, builder_name): |
| 334 compile_targets = [] | 320 compile_targets = [] |
| 335 | 321 |
| 336 if not signals or 'compile' not in signals: | 322 if not signals or 'compile' not in signals: |
| 337 return compile_targets | 323 return compile_targets |
| 338 | 324 |
| 339 if signals['compile'].get('failed_output_nodes'): | 325 if signals['compile'].get('failed_output_nodes'): |
| 340 return signals['compile'].get('failed_output_nodes') | 326 return signals['compile'].get('failed_output_nodes') |
| 341 | 327 |
| 342 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( | 328 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( |
| 343 master_name, builder_name) | 329 master_name, builder_name) |
| 344 for source_target in signals['compile'].get('failed_targets', []): | 330 for source_target in signals['compile'].get('failed_targets', []): |
| 345 # For link failures, we pass the executable targets directly to try-job, and | 331 # For link failures, we pass the executable targets directly to try-job, and |
| 346 # there is no 'source' for link failures. | 332 # there is no 'source' for link failures. |
| 347 # For compile failures, only pass the object files as the compile targets | 333 # For compile failures, only pass the object files as the compile targets |
| 348 # for the bots that we use strict regex to extract such information. | 334 # for the bots that we use strict regex to extract such information. |
| 349 if not source_target.get('source') or strict_regex: | 335 if not source_target.get('source') or strict_regex: |
| 350 compile_targets.append(source_target.get('target')) | 336 compile_targets.append(source_target.get('target')) |
| 351 | 337 |
| 352 return compile_targets | 338 return compile_targets |
| 353 | |
| 354 | |
| 355 def _GetSuspectsFromHeuristicResult(heuristic_result): | |
| 356 suspected_revisions = set() | |
| 357 if not heuristic_result: | |
| 358 return list(suspected_revisions) | |
| 359 for failure in heuristic_result.get('failures', []): | |
| 360 for cl in failure['suspected_cls']: | |
| 361 suspected_revisions.add(cl['revision']) | |
| 362 return list(suspected_revisions) | |
| 363 | |
| 364 | |
| 365 def _ShouldBailOutForOutdatedBuild(build): | |
| 366 return (datetime.utcnow() - build.start_time).days > 0 | |
| 367 | |
| 368 | |
| 369 def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result, | |
| 370 force_try_job=False): | |
| 371 master_name = failure_info['master_name'] | |
| 372 builder_name = failure_info['builder_name'] | |
| 373 build_number = failure_info['build_number'] | |
| 374 failed_steps = failure_info.get('failed_steps', []) | |
| 375 builds = failure_info.get('builds', {}) | |
| 376 | |
| 377 tryserver_mastername, tryserver_buildername = ( | |
| 378 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) | |
| 379 | |
| 380 if not tryserver_mastername or not tryserver_buildername: | |
| 381 logging.info('%s, %s is not supported yet.', master_name, builder_name) | |
| 382 return {} | |
| 383 | |
| 384 if not force_try_job: | |
| 385 build = WfBuild.Get(master_name, builder_name, build_number) | |
| 386 | |
| 387 if _ShouldBailOutForOutdatedBuild(build): | |
| 388 logging.error('Build time %s is more than 24 hours old. ' | |
| 389 'Try job will not be triggered.' % build.start_time) | |
| 390 return {} | |
| 391 | |
| 392 if (failure_info['failure_type'] == failure_type.TEST and | |
| 393 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)): | |
| 394 logging.info('Test try jobs on %s, %s are not supported yet.', | |
| 395 master_name, builder_name) | |
| 396 return {} | |
| 397 | |
| 398 failure_result_map = {} | |
| 399 need_new_try_job, last_pass, try_job_type, targeted_tests = ( | |
| 400 _NeedANewTryJob(master_name, builder_name, build_number, | |
| 401 failure_info['failure_type'], failed_steps, | |
| 402 failure_result_map, builds, signals, heuristic_result, | |
| 403 force_try_job)) | |
| 404 | |
| 405 if need_new_try_job: | |
| 406 compile_targets = (_GetFailedTargetsFromSignals( | |
| 407 signals, master_name, builder_name) | |
| 408 if try_job_type == TryJobType.COMPILE else None) | |
| 409 suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result) | |
| 410 | |
| 411 pipeline = ( | |
| 412 swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline( | |
| 413 master_name, builder_name, build_number, | |
| 414 builds[str(last_pass)]['chromium_revision'], | |
| 415 builds[str(build_number)]['chromium_revision'], | |
| 416 builds[str(build_number)]['blame_list'], | |
| 417 try_job_type, compile_targets, targeted_tests, suspected_revisions, | |
| 418 force_try_job)) | |
| 419 | |
| 420 pipeline.target = appengine_util.GetTargetNameForModule( | |
| 421 constants.WATERFALL_BACKEND) | |
| 422 pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE) | |
| 423 | |
| 424 if try_job_type == TryJobType.TEST: # pragma: no cover | |
| 425 logging_str = ( | |
| 426 'Trying to schedule swarming task(s) for build %s, %s, %s: %s' | |
| 427 ' because of %s failure. A try job may be triggered if some reliable' | |
| 428 ' failure is detected in task(s).') % ( | |
| 429 master_name, builder_name, build_number, | |
| 430 pipeline.pipeline_status_path, try_job_type) | |
| 431 else: # pragma: no cover | |
| 432 logging_str = ( | |
| 433 'Try job was scheduled for build %s, %s, %s: %s because of %s ' | |
| 434 'failure.') % ( | |
| 435 master_name, builder_name, build_number, | |
| 436 pipeline.pipeline_status_path, try_job_type) | |
| 437 logging.info(logging_str) | |
| 438 | |
| 439 return failure_result_map | |
| OLD | NEW |