Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict | |
| 5 from datetime import datetime | 6 from datetime import datetime |
| 6 import logging | 7 import logging |
| 7 | 8 |
| 8 from google.appengine.ext import ndb | 9 from google.appengine.ext import ndb |
| 9 | 10 |
| 10 from common import appengine_util | 11 from common import appengine_util |
| 11 from common import constants | 12 from common import constants |
| 12 from common.waterfall import failure_type | 13 from common.waterfall import failure_type |
| 13 from model import analysis_status | 14 from model import analysis_status |
| 14 from model.wf_analysis import WfAnalysis | 15 from model.wf_analysis import WfAnalysis |
| 15 from model.wf_build import WfBuild | 16 from model.wf_build import WfBuild |
| 16 from model.wf_failure_group import WfFailureGroup | 17 from model.wf_failure_group import WfFailureGroup |
| 17 from model.wf_try_job import WfTryJob | 18 from model.wf_try_job import WfTryJob |
| 18 from waterfall import swarming_tasks_to_try_job_pipeline | |
| 19 from waterfall import waterfall_config | 19 from waterfall import waterfall_config |
| 20 from waterfall.try_job_type import TryJobType | |
| 21 | 20 |
| 22 | 21 |
| 23 def _CheckFailureForTryJobKey( | 22 def _ShouldBailOutForOutdatedBuild(build): |
| 24 master_name, builder_name, build_number, | 23 return (datetime.utcnow() - build.start_time).days > 0 |
| 25 failure_result_map, failed_step_or_test, failure): | |
| 26 """Compares the current_failure and first_failure for each failed_step/test. | |
| 27 | 24 |
| 28 If equal, a new try_job needs to start; | 25 def _CurrentBuildKey(master_name, builder_name, build_number): |
| 29 If not, apply the key of the first_failure's try_job to this failure. | 26 return '%s/%s/%d' % (master_name, builder_name, build_number) |
| 30 """ | |
| 31 # TODO(chanli): Need to compare failures across builders | |
| 32 # after the grouping of failures is implemented. | |
| 33 # TODO(chanli): Need to handle cases where first failure is actually | |
| 34 # more than 20 builds back. The implementation should not be here, | |
| 35 # but need to be taken care of. | |
| 36 if not failure.get('last_pass'): | |
| 37 # Bail out since cannot figure out the good_revision. | |
| 38 return False, None | |
| 39 | |
| 40 if failure['current_failure'] == failure['first_failure']: | |
| 41 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 42 master_name, builder_name, build_number) | |
| 43 return True, failure['last_pass'] # A new try_job is needed. | |
| 44 else: | |
| 45 failure_result_map[failed_step_or_test] = '%s/%s/%s' % ( | |
| 46 master_name, builder_name, failure['first_failure']) | |
| 47 return False, None | |
| 48 | |
| 49 | |
| 50 def _CheckIfNeedNewTryJobForTestFailure( | |
| 51 failure_level, master_name, builder_name, build_number, | |
| 52 failure_result_map, failures): | |
| 53 """Traverses failed steps or tests to check if a new try job is needed.""" | |
| 54 need_new_try_job = False | |
| 55 last_pass = build_number | |
| 56 targeted_tests = {} if failure_level == 'step' else [] | |
| 57 | |
| 58 for failure_name, failure in failures.iteritems(): | |
| 59 if 'tests' in failure: | |
| 60 failure_result_map[failure_name] = {} | |
| 61 failure_targeted_tests, failure_need_try_job, failure_last_pass = ( | |
| 62 _CheckIfNeedNewTryJobForTestFailure( | |
| 63 'test', master_name, builder_name, build_number, | |
| 64 failure_result_map[failure_name], failure['tests'])) | |
| 65 if failure_need_try_job: | |
| 66 targeted_tests[failure_name] = failure_targeted_tests | |
| 67 else: | |
| 68 failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey( | |
| 69 master_name, builder_name, build_number, | |
| 70 failure_result_map, failure_name, failure) | |
| 71 if failure_need_try_job: | |
| 72 if failure_level == 'step': | |
| 73 targeted_tests[failure_name] = [] | |
| 74 else: | |
| 75 targeted_tests.append(failure.get('base_test_name', failure_name)) | |
| 76 | |
| 77 need_new_try_job = need_new_try_job or failure_need_try_job | |
| 78 last_pass = (failure_last_pass if failure_last_pass and | |
| 79 failure_last_pass < last_pass else last_pass) | |
| 80 | |
| 81 return targeted_tests, need_new_try_job, last_pass | |
| 82 | 27 |
| 83 | 28 |
| 84 def _BlameListsIntersection(blame_list_1, blame_list_2): | 29 def _BlameListsIntersection(blame_list_1, blame_list_2): |
| 85 return set(blame_list_1) & set(blame_list_2) | 30 return set(blame_list_1) & set(blame_list_2) |
| 86 | 31 |
| 87 | 32 |
| 88 def _GetStepsAndTests(failed_steps): | 33 def _GetStepsAndTests(failed_steps): |
| 89 """Extracts failed steps and tests from failed_steps data structure. | 34 """Extracts failed steps and tests from failed_steps data structure. |
| 90 | 35 |
| 91 Args: | 36 Args: |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 291 try_job.put() | 236 try_job.put() |
| 292 else: | 237 else: |
| 293 try_job_entity_revived_or_created = False | 238 try_job_entity_revived_or_created = False |
| 294 else: | 239 else: |
| 295 try_job = WfTryJob.Create(master_name, builder_name, build_number) | 240 try_job = WfTryJob.Create(master_name, builder_name, build_number) |
| 296 try_job.put() | 241 try_job.put() |
| 297 | 242 |
| 298 return try_job_entity_revived_or_created | 243 return try_job_entity_revived_or_created |
| 299 | 244 |
| 300 | 245 |
| 301 def _NeedANewTryJob( | 246 def _NeedANewTryJobForCompile( |
|
lijeffrey
2016/08/09 04:07:34
nit: how about _NeedANewCompileTryJob/_NeedANewTes
chanli
2016/08/09 17:08:43
Done.
| |
| 302 master_name, builder_name, build_number, build_failure_type, failed_steps, | 247 master_name, builder_name, build_number, failure_info): |
| 303 failure_result_map, builds, signals, heuristic_result, force_try_job=False): | |
| 304 """Checks if a new try_job is needed.""" | |
| 305 need_new_try_job = False | |
| 306 last_pass = build_number | |
| 307 | 248 |
| 308 if 'compile' in failed_steps: | 249 compile_failure = failure_info['failed_steps'].get('compile', {}) |
| 309 try_job_type = TryJobType.COMPILE | 250 if compile_failure: |
| 310 targeted_tests = None | 251 analysis = WfAnalysis.Get(master_name, builder_name, build_number) |
| 311 need_new_try_job, last_pass = _CheckFailureForTryJobKey( | 252 analysis.failure_result_map['compile'] = '%s/%s/%d' % ( |
| 312 master_name, builder_name, build_number, | 253 master_name, builder_name, compile_failure['first_failure']) |
| 313 failure_result_map, TryJobType.COMPILE, failed_steps['compile']) | 254 analysis.put() |
| 314 else: | 255 |
| 315 try_job_type = TryJobType.TEST | 256 if compile_failure['first_failure'] == compile_failure['current_failure']: |
| 316 targeted_tests, need_new_try_job, last_pass = ( | 257 return True |
| 317 _CheckIfNeedNewTryJobForTestFailure( | 258 |
| 318 'step', master_name, builder_name, build_number, failure_result_map, | 259 return False |
| 319 failed_steps)) | 260 |
| 261 def _NeedANewTryJobForTest( | |
| 262 master_name, builder_name, build_number, failure_info, force_try_job): | |
| 263 | |
| 264 if failure_info['failure_type'] != failure_type.TEST: | |
| 265 return False | |
| 266 | |
| 267 if not force_try_job: | |
| 268 if waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name): | |
|
lijeffrey
2016/08/09 04:07:34
nit: It looks like these if statements can be comb
chanli
2016/08/09 17:08:43
Done.
| |
| 269 logging.info('Test try jobs on %s, %s are not supported yet.', | |
| 270 master_name, builder_name) | |
| 271 return False | |
| 272 | |
| 273 analysis = WfAnalysis.Get(master_name, builder_name, build_number) | |
| 274 failure_result_map = analysis.failure_result_map | |
|
lijeffrey
2016/08/09 04:07:34
nit: break this part of the code into a separate f
chanli
2016/08/09 17:08:43
Done.
| |
| 275 current_build_key = _CurrentBuildKey(master_name, builder_name, build_number) | |
| 276 for step_keys in failure_result_map.itervalues(): | |
| 277 for test_key in step_keys.itervalues(): | |
| 278 if test_key == current_build_key: | |
| 279 return True | |
| 280 return False | |
| 281 | |
| 282 def NeedANewTryJob( | |
| 283 master_name, builder_name, build_number, failure_info, signals, | |
| 284 heuristic_result, force_try_job=False): | |
| 285 | |
| 286 tryserver_mastername, tryserver_buildername = ( | |
| 287 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) | |
| 288 | |
| 289 try_job_type = failure_info['failure_type'] | |
| 290 if not tryserver_mastername or not tryserver_buildername: | |
| 291 logging.info('%s, %s is not supported yet.', master_name, builder_name) | |
| 292 return False | |
| 293 | |
| 294 if not force_try_job: | |
| 295 build = WfBuild.Get(master_name, builder_name, build_number) | |
| 296 | |
| 297 if _ShouldBailOutForOutdatedBuild(build): | |
| 298 logging.error('Build time %s is more than 24 hours old. ' | |
| 299 'Try job will not be triggered.' % build.start_time) | |
| 300 return False | |
| 301 | |
| 302 need_new_try_job = (_NeedANewTryJobForCompile( | |
| 303 master_name, builder_name, build_number, failure_info) | |
| 304 if try_job_type == failure_type.COMPILE else | |
| 305 _NeedANewTryJobForTest( | |
| 306 master_name, builder_name, build_number, failure_info, force_try_job)) | |
| 307 | |
| 308 if need_new_try_job: | |
| 309 # TODO(josiahk): Integrate this into need_new_try_job boolean | |
| 310 _IsBuildFailureUniqueAcrossPlatforms( | |
| 311 master_name, builder_name, build_number, try_job_type, | |
| 312 failure_info['builds'][str(build_number)]['blame_list'], | |
| 313 failure_info['failed_steps'], signals, heuristic_result) | |
| 314 | |
| 315 need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity( | |
| 316 master_name, builder_name, build_number, force_try_job) | |
| 317 return need_new_try_job | |
| 320 | 318 |
| 321 | 319 |
| 322 need_new_try_job = ( | 320 def GetFailedTargetsFromSignals(signals, master_name, builder_name): |
| 323 need_new_try_job and ReviveOrCreateTryJobEntity( | |
| 324 master_name, builder_name, build_number, force_try_job)) | |
| 325 | |
| 326 # TODO(josiahk): Integrate _IsBuildFailureUniqueAcrossPlatforms() into | |
| 327 # need_new_try_job boolean | |
| 328 if need_new_try_job: | |
| 329 _IsBuildFailureUniqueAcrossPlatforms( | |
| 330 master_name, builder_name, build_number, build_failure_type, | |
| 331 builds[str(build_number)]['blame_list'], failed_steps, signals, | |
| 332 heuristic_result) | |
| 333 | |
| 334 return need_new_try_job, last_pass, try_job_type, targeted_tests | |
| 335 | |
| 336 | |
| 337 def _GetFailedTargetsFromSignals(signals, master_name, builder_name): | |
| 338 compile_targets = [] | 321 compile_targets = [] |
| 339 | 322 |
| 340 if not signals or 'compile' not in signals: | 323 if not signals or 'compile' not in signals: |
| 341 return compile_targets | 324 return compile_targets |
| 342 | 325 |
| 343 if signals['compile'].get('failed_output_nodes'): | 326 if signals['compile'].get('failed_output_nodes'): |
| 344 return signals['compile'].get('failed_output_nodes') | 327 return signals['compile'].get('failed_output_nodes') |
| 345 | 328 |
| 346 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( | 329 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( |
| 347 master_name, builder_name) | 330 master_name, builder_name) |
| 348 for source_target in signals['compile'].get('failed_targets', []): | 331 for source_target in signals['compile'].get('failed_targets', []): |
| 349 # For link failures, we pass the executable targets directly to try-job, and | 332 # For link failures, we pass the executable targets directly to try-job, and |
| 350 # there is no 'source' for link failures. | 333 # there is no 'source' for link failures. |
| 351 # For compile failures, only pass the object files as the compile targets | 334 # For compile failures, only pass the object files as the compile targets |
| 352 # for the bots that we use strict regex to extract such information. | 335 # for the bots that we use strict regex to extract such information. |
| 353 if not source_target.get('source') or strict_regex: | 336 if not source_target.get('source') or strict_regex: |
| 354 compile_targets.append(source_target.get('target')) | 337 compile_targets.append(source_target.get('target')) |
| 355 | 338 |
| 356 return compile_targets | 339 return compile_targets |
| 357 | |
| 358 | |
| 359 def _GetSuspectsFromHeuristicResult(heuristic_result): | |
| 360 suspected_revisions = set() | |
| 361 if not heuristic_result: | |
| 362 return list(suspected_revisions) | |
| 363 for failure in heuristic_result.get('failures', []): | |
| 364 for cl in failure['suspected_cls']: | |
| 365 suspected_revisions.add(cl['revision']) | |
| 366 return list(suspected_revisions) | |
| 367 | |
| 368 | |
| 369 def _ShouldBailOutForOutdatedBuild(build): | |
| 370 return (datetime.utcnow() - build.start_time).days > 0 | |
| 371 | |
| 372 | |
| 373 def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result, | |
| 374 force_try_job=False): | |
| 375 master_name = failure_info['master_name'] | |
| 376 builder_name = failure_info['builder_name'] | |
| 377 build_number = failure_info['build_number'] | |
| 378 failed_steps = failure_info.get('failed_steps', []) | |
| 379 builds = failure_info.get('builds', {}) | |
| 380 | |
| 381 tryserver_mastername, tryserver_buildername = ( | |
| 382 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) | |
| 383 | |
| 384 if not tryserver_mastername or not tryserver_buildername: | |
| 385 logging.info('%s, %s is not supported yet.', master_name, builder_name) | |
| 386 return {} | |
| 387 | |
| 388 if not force_try_job: | |
| 389 build = WfBuild.Get(master_name, builder_name, build_number) | |
| 390 | |
| 391 if _ShouldBailOutForOutdatedBuild(build): | |
| 392 logging.error('Build time %s is more than 24 hours old. ' | |
| 393 'Try job will not be triggered.' % build.start_time) | |
| 394 return {} | |
| 395 | |
| 396 if (failure_info['failure_type'] == failure_type.TEST and | |
| 397 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)): | |
| 398 logging.info('Test try jobs on %s, %s are not supported yet.', | |
| 399 master_name, builder_name) | |
| 400 return {} | |
| 401 | |
| 402 failure_result_map = {} | |
| 403 need_new_try_job, last_pass, try_job_type, targeted_tests = ( | |
| 404 _NeedANewTryJob(master_name, builder_name, build_number, | |
| 405 failure_info['failure_type'], failed_steps, | |
| 406 failure_result_map, builds, signals, heuristic_result, | |
| 407 force_try_job)) | |
| 408 | |
| 409 if need_new_try_job: | |
| 410 compile_targets = (_GetFailedTargetsFromSignals( | |
| 411 signals, master_name, builder_name) | |
| 412 if try_job_type == TryJobType.COMPILE else None) | |
| 413 suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result) | |
| 414 | |
| 415 pipeline = ( | |
| 416 swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline( | |
| 417 master_name, builder_name, build_number, | |
| 418 builds[str(last_pass)]['chromium_revision'], | |
| 419 builds[str(build_number)]['chromium_revision'], | |
| 420 builds[str(build_number)]['blame_list'], | |
| 421 try_job_type, compile_targets, targeted_tests, suspected_revisions, | |
| 422 force_try_job)) | |
| 423 | |
| 424 pipeline.target = appengine_util.GetTargetNameForModule( | |
| 425 constants.WATERFALL_BACKEND) | |
| 426 pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE) | |
| 427 | |
| 428 if try_job_type == TryJobType.TEST: # pragma: no cover | |
| 429 logging_str = ( | |
| 430 'Trying to schedule swarming task(s) for build %s, %s, %s: %s' | |
| 431 ' because of %s failure. A try job may be triggered if some reliable' | |
| 432 ' failure is detected in task(s).') % ( | |
| 433 master_name, builder_name, build_number, | |
| 434 pipeline.pipeline_status_path, try_job_type) | |
| 435 else: # pragma: no cover | |
| 436 logging_str = ( | |
| 437 'Try job was scheduled for build %s, %s, %s: %s because of %s ' | |
| 438 'failure.') % ( | |
| 439 master_name, builder_name, build_number, | |
| 440 pipeline.pipeline_status_path, try_job_type) | |
| 441 logging.info(logging_str) | |
| 442 | |
| 443 return failure_result_map | |
| OLD | NEW |