| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict |
| 5 from datetime import datetime | 6 from datetime import datetime |
| 6 import logging | 7 import logging |
| 7 | 8 |
| 8 from google.appengine.ext import ndb | 9 from google.appengine.ext import ndb |
| 9 | 10 |
| 10 from common import appengine_util | 11 from common import appengine_util |
| 11 from common import constants | 12 from common import constants |
| 12 from common.waterfall import failure_type | 13 from common.waterfall import failure_type |
| 13 from model import analysis_status | 14 from model import analysis_status |
| 14 from model.wf_analysis import WfAnalysis | 15 from model.wf_analysis import WfAnalysis |
| 15 from model.wf_build import WfBuild | 16 from model.wf_build import WfBuild |
| 16 from model.wf_failure_group import WfFailureGroup | 17 from model.wf_failure_group import WfFailureGroup |
| 17 from model.wf_try_job import WfTryJob | 18 from model.wf_try_job import WfTryJob |
| 18 from waterfall import swarming_tasks_to_try_job_pipeline | |
| 19 from waterfall import waterfall_config | 19 from waterfall import waterfall_config |
| 20 from waterfall.try_job_type import TryJobType | |
| 21 | 20 |
| 22 | 21 |
| 23 def _CheckFailureForTryJobKey( | 22 def _ShouldBailOutForOutdatedBuild(build): |
| 24 master_name, builder_name, build_number, | 23 return (datetime.utcnow() - build.start_time).days > 0 |
| 25 failure_result_map, failed_step_or_test, failure): | |
| 26 """Compares the current_failure and first_failure for each failed_step/test. | |
| 27 | |
| 28 If equal, a new try_job needs to start; | |
| 29 If not, apply the key of the first_failure's try_job to this failure. | |
| 30 """ | |
| 31 # TODO(chanli): Need to compare failures across builders | |
| 32 # after the grouping of failures is implemented. | |
| 33 # TODO(chanli): Need to handle cases where first failure is actually | |
| 34 # more than 20 builds back. The implementation should not be here, | |
| 35 # but need to be taken care of. | |
| 36 if not failure.get('last_pass'): | |
| 37 # Bail out since cannot figure out the good_revision. | |
| 38 return False, None | |
| 39 | |
| 40 if failure['current_failure'] == failure['first_failure']: | |
| 41 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 42 master_name, builder_name, build_number) | |
| 43 return True, failure['last_pass'] # A new try_job is needed. | |
| 44 else: | |
| 45 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 46 master_name, builder_name, failure['first_failure']) | |
| 47 return False, None | |
| 48 | 24 |
| 49 | 25 |
| 50 def _CheckIfNeedNewTryJobForTestFailure( | 26 def _CurrentBuildKey(master_name, builder_name, build_number): |
| 51 failure_level, master_name, builder_name, build_number, | 27 return '%s/%s/%d' % (master_name, builder_name, build_number) |
| 52 failure_result_map, failures): | |
| 53 """Traverses failed steps or tests to check if a new try job is needed.""" | |
| 54 need_new_try_job = False | |
| 55 last_pass = build_number | |
| 56 targeted_tests = {} if failure_level == 'step' else [] | |
| 57 | |
| 58 for failure_name, failure in failures.iteritems(): | |
| 59 if 'tests' in failure: | |
| 60 failure_result_map[failure_name] = {} | |
| 61 failure_targeted_tests, failure_need_try_job, failure_last_pass = ( | |
| 62 _CheckIfNeedNewTryJobForTestFailure( | |
| 63 'test', master_name, builder_name, build_number, | |
| 64 failure_result_map[failure_name], failure['tests'])) | |
| 65 if failure_need_try_job: | |
| 66 targeted_tests[failure_name] = failure_targeted_tests | |
| 67 else: | |
| 68 failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey( | |
| 69 master_name, builder_name, build_number, | |
| 70 failure_result_map, failure_name, failure) | |
| 71 if failure_need_try_job: | |
| 72 if failure_level == 'step': | |
| 73 targeted_tests[failure_name] = [] | |
| 74 else: | |
| 75 targeted_tests.append(failure.get('base_test_name', failure_name)) | |
| 76 | |
| 77 need_new_try_job = need_new_try_job or failure_need_try_job | |
| 78 last_pass = (failure_last_pass if failure_last_pass and | |
| 79 failure_last_pass < last_pass else last_pass) | |
| 80 | |
| 81 return targeted_tests, need_new_try_job, last_pass | |
| 82 | 28 |
| 83 | 29 |
| 84 def _BlameListsIntersection(blame_list_1, blame_list_2): | 30 def _BlameListsIntersection(blame_list_1, blame_list_2): |
| 85 return set(blame_list_1) & set(blame_list_2) | 31 return set(blame_list_1) & set(blame_list_2) |
| 86 | 32 |
| 87 | 33 |
| 88 def _GetStepsAndTests(failed_steps): | 34 def _GetStepsAndTests(failed_steps): |
| 89 """Extracts failed steps and tests from failed_steps data structure. | 35 """Extracts failed steps and tests from failed_steps data structure. |
| 90 | 36 |
| 91 Args: | 37 Args: |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 291 try_job.put() | 237 try_job.put() |
| 292 else: | 238 else: |
| 293 try_job_entity_revived_or_created = False | 239 try_job_entity_revived_or_created = False |
| 294 else: | 240 else: |
| 295 try_job = WfTryJob.Create(master_name, builder_name, build_number) | 241 try_job = WfTryJob.Create(master_name, builder_name, build_number) |
| 296 try_job.put() | 242 try_job.put() |
| 297 | 243 |
| 298 return try_job_entity_revived_or_created | 244 return try_job_entity_revived_or_created |
| 299 | 245 |
| 300 | 246 |
| 301 def _NeedANewTryJob( | 247 def _NeedANewCompileTryJob( |
| 302 master_name, builder_name, build_number, build_failure_type, failed_steps, | 248 master_name, builder_name, build_number, failure_info): |
| 303 failure_result_map, builds, signals, heuristic_result, force_try_job=False): | |
| 304 """Checks if a new try_job is needed.""" | |
| 305 need_new_try_job = False | |
| 306 last_pass = build_number | |
| 307 | 249 |
| 308 if 'compile' in failed_steps: | 250 compile_failure = failure_info['failed_steps'].get('compile', {}) |
| 309 try_job_type = TryJobType.COMPILE | 251 if compile_failure: |
| 310 targeted_tests = None | 252 analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| 311 need_new_try_job, last_pass = _CheckFailureForTryJobKey( | 253 analysis.failure_result_map['compile'] = '%s/%s/%d' % ( |
| 312 master_name, builder_name, build_number, | 254 master_name, builder_name, compile_failure['first_failure']) |
| 313 failure_result_map, TryJobType.COMPILE, failed_steps['compile']) | 255 analysis.put() |
| 314 else: | 256 |
| 315 try_job_type = TryJobType.TEST | 257 if compile_failure['first_failure'] == compile_failure['current_failure']: |
| 316 targeted_tests, need_new_try_job, last_pass = ( | 258 return True |
| 317 _CheckIfNeedNewTryJobForTestFailure( | 259 |
| 318 'step', master_name, builder_name, build_number, failure_result_map, | 260 return False |
| 319 failed_steps)) | |
| 320 | 261 |
| 321 | 262 |
| 322 need_new_try_job = ( | 263 def _CurrentBuildKeyInFailureResultMap(master_name, builder_name, build_number): |
| 323 need_new_try_job and ReviveOrCreateTryJobEntity( | 264 analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| 324 master_name, builder_name, build_number, force_try_job)) | 265 failure_result_map = analysis.failure_result_map |
| 325 | 266 current_build_key = _CurrentBuildKey(master_name, builder_name, build_number) |
| 326 # TODO(josiahk): Integrate _IsBuildFailureUniqueAcrossPlatforms() into | 267 for step_keys in failure_result_map.itervalues(): |
| 327 # need_new_try_job boolean | 268 for test_key in step_keys.itervalues(): |
| 328 if need_new_try_job: | 269 if test_key == current_build_key: |
| 329 _IsBuildFailureUniqueAcrossPlatforms( | 270 return True |
| 330 master_name, builder_name, build_number, build_failure_type, | 271 return False |
| 331 builds[str(build_number)]['blame_list'], failed_steps, signals, | |
| 332 heuristic_result) | |
| 333 | |
| 334 return need_new_try_job, last_pass, try_job_type, targeted_tests | |
| 335 | 272 |
| 336 | 273 |
| 337 def _GetFailedTargetsFromSignals(signals, master_name, builder_name): | 274 def _NeedANewTestTryJob( |
| 338 compile_targets = [] | 275 master_name, builder_name, build_number, failure_info, force_try_job): |
| 339 | 276 |
| 340 if not signals or 'compile' not in signals: | 277 if failure_info['failure_type'] != failure_type.TEST: |
| 341 return compile_targets | 278 return False |
| 342 | 279 |
| 343 if signals['compile'].get('failed_output_nodes'): | 280 if (not force_try_job and |
| 344 return signals['compile'].get('failed_output_nodes') | 281 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)): |
| 282 logging.info('Test try jobs on %s, %s are not supported yet.', |
| 283 master_name, builder_name) |
| 284 return False |
| 345 | 285 |
| 346 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( | 286 return _CurrentBuildKeyInFailureResultMap( |
| 347 master_name, builder_name) | 287 master_name, builder_name, build_number) |
| 348 for source_target in signals['compile'].get('failed_targets', []): | |
| 349 # For link failures, we pass the executable targets directly to try-job, and | |
| 350 # there is no 'source' for link failures. | |
| 351 # For compile failures, only pass the object files as the compile targets | |
| 352 # for the bots that we use strict regex to extract such information. | |
| 353 if not source_target.get('source') or strict_regex: | |
| 354 compile_targets.append(source_target.get('target')) | |
| 355 | |
| 356 return compile_targets | |
| 357 | 288 |
| 358 | 289 |
| 359 def _GetSuspectsFromHeuristicResult(heuristic_result): | 290 def NeedANewTryJob( |
| 360 suspected_revisions = set() | 291 master_name, builder_name, build_number, failure_info, signals, |
| 361 if not heuristic_result: | 292 heuristic_result, force_try_job=False): |
| 362 return list(suspected_revisions) | |
| 363 for failure in heuristic_result.get('failures', []): | |
| 364 for cl in failure['suspected_cls']: | |
| 365 suspected_revisions.add(cl['revision']) | |
| 366 return list(suspected_revisions) | |
| 367 | |
| 368 | |
| 369 def _ShouldBailOutForOutdatedBuild(build): | |
| 370 return (datetime.utcnow() - build.start_time).days > 0 | |
| 371 | |
| 372 | |
| 373 def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result, | |
| 374 force_try_job=False): | |
| 375 master_name = failure_info['master_name'] | |
| 376 builder_name = failure_info['builder_name'] | |
| 377 build_number = failure_info['build_number'] | |
| 378 failed_steps = failure_info.get('failed_steps', []) | |
| 379 builds = failure_info.get('builds', {}) | |
| 380 | 293 |
| 381 tryserver_mastername, tryserver_buildername = ( | 294 tryserver_mastername, tryserver_buildername = ( |
| 382 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) | 295 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) |
| 383 | 296 |
| 297 try_job_type = failure_info['failure_type'] |
| 384 if not tryserver_mastername or not tryserver_buildername: | 298 if not tryserver_mastername or not tryserver_buildername: |
| 385 logging.info('%s, %s is not supported yet.', master_name, builder_name) | 299 logging.info('%s, %s is not supported yet.', master_name, builder_name) |
| 386 return {} | 300 return False |
| 387 | 301 |
| 388 if not force_try_job: | 302 if not force_try_job: |
| 389 build = WfBuild.Get(master_name, builder_name, build_number) | 303 build = WfBuild.Get(master_name, builder_name, build_number) |
| 390 | 304 |
| 391 if _ShouldBailOutForOutdatedBuild(build): | 305 if _ShouldBailOutForOutdatedBuild(build): |
| 392 logging.error('Build time %s is more than 24 hours old. ' | 306 logging.error('Build time %s is more than 24 hours old. ' |
| 393 'Try job will not be triggered.' % build.start_time) | 307 'Try job will not be triggered.' % build.start_time) |
| 394 return {} | 308 return False |
| 395 | 309 |
| 396 if (failure_info['failure_type'] == failure_type.TEST and | 310 need_new_try_job = (_NeedANewCompileTryJob( |
| 397 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)): | 311 master_name, builder_name, build_number, failure_info) |
| 398 logging.info('Test try jobs on %s, %s are not supported yet.', | 312 if try_job_type == failure_type.COMPILE else |
| 399 master_name, builder_name) | 313 _NeedANewTestTryJob( |
| 400 return {} | 314 master_name, builder_name, build_number, failure_info, force_try_job)) |
| 401 | |
| 402 failure_result_map = {} | |
| 403 need_new_try_job, last_pass, try_job_type, targeted_tests = ( | |
| 404 _NeedANewTryJob(master_name, builder_name, build_number, | |
| 405 failure_info['failure_type'], failed_steps, | |
| 406 failure_result_map, builds, signals, heuristic_result, | |
| 407 force_try_job)) | |
| 408 | 315 |
| 409 if need_new_try_job: | 316 if need_new_try_job: |
| 410 compile_targets = (_GetFailedTargetsFromSignals( | 317 # TODO(josiahk): Integrate this into need_new_try_job boolean |
| 411 signals, master_name, builder_name) | 318 _IsBuildFailureUniqueAcrossPlatforms( |
| 412 if try_job_type == TryJobType.COMPILE else None) | 319 master_name, builder_name, build_number, try_job_type, |
| 413 suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result) | 320 failure_info['builds'][str(build_number)]['blame_list'], |
| 321 failure_info['failed_steps'], signals, heuristic_result) |
| 414 | 322 |
| 415 pipeline = ( | 323 need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity( |
| 416 swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline( | 324 master_name, builder_name, build_number, force_try_job) |
| 417 master_name, builder_name, build_number, | 325 return need_new_try_job |
| 418 builds[str(last_pass)]['chromium_revision'], | |
| 419 builds[str(build_number)]['chromium_revision'], | |
| 420 builds[str(build_number)]['blame_list'], | |
| 421 try_job_type, compile_targets, targeted_tests, suspected_revisions, | |
| 422 force_try_job)) | |
| 423 | 326 |
| 424 pipeline.target = appengine_util.GetTargetNameForModule( | |
| 425 constants.WATERFALL_BACKEND) | |
| 426 pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE) | |
| 427 | 327 |
| 428 if try_job_type == TryJobType.TEST: # pragma: no cover | 328 def GetFailedTargetsFromSignals(signals, master_name, builder_name): |
| 429 logging_str = ( | 329 compile_targets = [] |
| 430 'Trying to schedule swarming task(s) for build %s, %s, %s: %s' | |
| 431 ' because of %s failure. A try job may be triggered if some reliable' | |
| 432 ' failure is detected in task(s).') % ( | |
| 433 master_name, builder_name, build_number, | |
| 434 pipeline.pipeline_status_path, try_job_type) | |
| 435 else: # pragma: no cover | |
| 436 logging_str = ( | |
| 437 'Try job was scheduled for build %s, %s, %s: %s because of %s ' | |
| 438 'failure.') % ( | |
| 439 master_name, builder_name, build_number, | |
| 440 pipeline.pipeline_status_path, try_job_type) | |
| 441 logging.info(logging_str) | |
| 442 | 330 |
| 443 return failure_result_map | 331 if not signals or 'compile' not in signals: |
| 332 return compile_targets |
| 333 |
| 334 if signals['compile'].get('failed_output_nodes'): |
| 335 return signals['compile'].get('failed_output_nodes') |
| 336 |
| 337 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( |
| 338 master_name, builder_name) |
| 339 for source_target in signals['compile'].get('failed_targets', []): |
| 340 # For link failures, we pass the executable targets directly to try-job, and |
| 341 # there is no 'source' for link failures. |
| 342 # For compile failures, only pass the object files as the compile targets |
| 343 # for the bots that we use strict regex to extract such information. |
| 344 if not source_target.get('source') or strict_regex: |
| 345 compile_targets.append(source_target.get('target')) |
| 346 |
| 347 return compile_targets |
| OLD | NEW |