Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(766)

Side by Side Diff: appengine/findit/waterfall/try_job_util.py

Issue 2187763004: [Findit] Refactor Findit pipeline. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: rebase Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/waterfall/try_job_type.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import defaultdict
5 from datetime import datetime 6 from datetime import datetime
6 from datetime import timedelta 7 from datetime import timedelta
7 import logging 8 import logging
8 9
9 from google.appengine.ext import ndb 10 from google.appengine.ext import ndb
10 11
11 from common import appengine_util 12 from common import appengine_util
12 from common import constants 13 from common import constants
13 from common.waterfall import failure_type 14 from common.waterfall import failure_type
14 from model import analysis_status 15 from model import analysis_status
15 from model.wf_analysis import WfAnalysis 16 from model.wf_analysis import WfAnalysis
16 from model.wf_build import WfBuild 17 from model.wf_build import WfBuild
17 from model.wf_failure_group import WfFailureGroup 18 from model.wf_failure_group import WfFailureGroup
18 from model.wf_try_job import WfTryJob 19 from model.wf_try_job import WfTryJob
19 from waterfall import swarming_tasks_to_try_job_pipeline
20 from waterfall import waterfall_config 20 from waterfall import waterfall_config
21 from waterfall.try_job_type import TryJobType
22 21
23 # TODO(lijeffrey): Move this to config. 22 # TODO(lijeffrey): Move this to config.
24 MATCHING_GROUPS_SECONDS_AGO = 24 * 60 * 60 # 24 hours. 23 MATCHING_GROUPS_SECONDS_AGO = 24 * 60 * 60 # 24 hours.
25 24
26 25
27 def _CheckFailureForTryJobKey( 26 def _ShouldBailOutForOutdatedBuild(build):
28 master_name, builder_name, build_number, 27 return (datetime.utcnow() - build.start_time).days > 0
29 failure_result_map, failed_step_or_test, failure):
30 """Compares the current_failure and first_failure for each failed_step/test.
31
32 If equal, a new try_job needs to start;
33 If not, apply the key of the first_failure's try_job to this failure.
34 """
35 # TODO(chanli): Need to compare failures across builders
36 # after the grouping of failures is implemented.
37 # TODO(chanli): Need to handle cases where first failure is actually
38 # more than 20 builds back. The implementation should not be here,
39 # but need to be taken care of.
40 if not failure.get('last_pass'):
41 # Bail out since cannot figure out the good_revision.
42 return False, None
43
44 if failure['current_failure'] == failure['first_failure']:
45 failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
46 master_name, builder_name, build_number)
47 return True, failure['last_pass'] # A new try_job is needed.
48 else:
49 failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
50 master_name, builder_name, failure['first_failure'])
51 return False, None
52 28
53 29
54 def _CheckIfNeedNewTryJobForTestFailure( 30 def _CurrentBuildKey(master_name, builder_name, build_number):
55 failure_level, master_name, builder_name, build_number, 31 return '%s/%s/%d' % (master_name, builder_name, build_number)
56 failure_result_map, failures):
57 """Traverses failed steps or tests to check if a new try job is needed."""
58 need_new_try_job = False
59 last_pass = build_number
60 targeted_tests = {} if failure_level == 'step' else []
61
62 for failure_name, failure in failures.iteritems():
63 if 'tests' in failure:
64 failure_result_map[failure_name] = {}
65 failure_targeted_tests, failure_need_try_job, failure_last_pass = (
66 _CheckIfNeedNewTryJobForTestFailure(
67 'test', master_name, builder_name, build_number,
68 failure_result_map[failure_name], failure['tests']))
69 if failure_need_try_job:
70 targeted_tests[failure_name] = failure_targeted_tests
71 else:
72 failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey(
73 master_name, builder_name, build_number,
74 failure_result_map, failure_name, failure)
75 if failure_need_try_job:
76 if failure_level == 'step':
77 targeted_tests[failure_name] = []
78 else:
79 targeted_tests.append(failure.get('base_test_name', failure_name))
80
81 need_new_try_job = need_new_try_job or failure_need_try_job
82 last_pass = (failure_last_pass if failure_last_pass and
83 failure_last_pass < last_pass else last_pass)
84
85 return targeted_tests, need_new_try_job, last_pass
86 32
87 33
88 def _BlameListsIntersection(blame_list_1, blame_list_2): 34 def _BlameListsIntersection(blame_list_1, blame_list_2):
89 return set(blame_list_1) & set(blame_list_2) 35 return set(blame_list_1) & set(blame_list_2)
90 36
91 37
92 def _GetStepsAndTests(failed_steps): 38 def _GetStepsAndTests(failed_steps):
93 """Extracts failed steps and tests from failed_steps data structure. 39 """Extracts failed steps and tests from failed_steps data structure.
94 40
95 Args: 41 Args:
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 try_job.put() 247 try_job.put()
302 else: 248 else:
303 try_job_entity_revived_or_created = False 249 try_job_entity_revived_or_created = False
304 else: 250 else:
305 try_job = WfTryJob.Create(master_name, builder_name, build_number) 251 try_job = WfTryJob.Create(master_name, builder_name, build_number)
306 try_job.put() 252 try_job.put()
307 253
308 return try_job_entity_revived_or_created 254 return try_job_entity_revived_or_created
309 255
310 256
311 def _NeedANewTryJob( 257 def _NeedANewCompileTryJob(
312 master_name, builder_name, build_number, build_failure_type, failed_steps, 258 master_name, builder_name, build_number, failure_info):
313 failure_result_map, builds, signals, heuristic_result, force_try_job=False):
314 """Checks if a new try_job is needed."""
315 need_new_try_job = False
316 last_pass = build_number
317 259
318 if 'compile' in failed_steps: 260 compile_failure = failure_info['failed_steps'].get('compile', {})
319 try_job_type = TryJobType.COMPILE 261 if compile_failure:
320 targeted_tests = None 262 analysis = WfAnalysis.Get(master_name, builder_name, build_number)
321 need_new_try_job, last_pass = _CheckFailureForTryJobKey( 263 analysis.failure_result_map['compile'] = '%s/%s/%d' % (
322 master_name, builder_name, build_number, 264 master_name, builder_name, compile_failure['first_failure'])
323 failure_result_map, TryJobType.COMPILE, failed_steps['compile']) 265 analysis.put()
324 else:
325 try_job_type = TryJobType.TEST
326 targeted_tests, need_new_try_job, last_pass = (
327 _CheckIfNeedNewTryJobForTestFailure(
328 'step', master_name, builder_name, build_number, failure_result_map,
329 failed_steps))
330 266
331 need_new_try_job = ( 267 if compile_failure['first_failure'] == compile_failure['current_failure']:
332 need_new_try_job and ReviveOrCreateTryJobEntity( 268 return True
333 master_name, builder_name, build_number, force_try_job) and
334 _IsBuildFailureUniqueAcrossPlatforms(
335 master_name, builder_name, build_number, build_failure_type,
336 builds[str(build_number)]['blame_list'], failed_steps, signals,
337 heuristic_result))
338 269
339 # TODO(josiahk): Integrate _IsBuildFailureUniqueAcrossPlatforms() into 270 return False
340 # need_new_try_job boolean
341 if need_new_try_job:
342 _IsBuildFailureUniqueAcrossPlatforms(
343 master_name, builder_name, build_number, build_failure_type,
344 builds[str(build_number)]['blame_list'], failed_steps, signals,
345 heuristic_result)
346
347 return need_new_try_job, last_pass, try_job_type, targeted_tests
348 271
349 272
350 def _GetFailedTargetsFromSignals(signals, master_name, builder_name): 273 def _CurrentBuildKeyInFailureResultMap(master_name, builder_name, build_number):
351 compile_targets = [] 274 analysis = WfAnalysis.Get(master_name, builder_name, build_number)
352 275 failure_result_map = analysis.failure_result_map
353 if not signals or 'compile' not in signals: 276 current_build_key = _CurrentBuildKey(master_name, builder_name, build_number)
354 return compile_targets 277 for step_keys in failure_result_map.itervalues():
355 278 for test_key in step_keys.itervalues():
356 if signals['compile'].get('failed_output_nodes'): 279 if test_key == current_build_key:
357 return signals['compile'].get('failed_output_nodes') 280 return True
358 281 return False
359 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures(
360 master_name, builder_name)
361 for source_target in signals['compile'].get('failed_targets', []):
362 # For link failures, we pass the executable targets directly to try-job, and
363 # there is no 'source' for link failures.
364 # For compile failures, only pass the object files as the compile targets
365 # for the bots that we use strict regex to extract such information.
366 if not source_target.get('source') or strict_regex:
367 compile_targets.append(source_target.get('target'))
368
369 return compile_targets
370 282
371 283
372 def _GetSuspectsFromHeuristicResult(heuristic_result): 284 def _NeedANewTestTryJob(
373 suspected_revisions = set() 285 master_name, builder_name, build_number, failure_info, force_try_job):
374 if not heuristic_result: 286
375 return list(suspected_revisions) 287 if failure_info['failure_type'] != failure_type.TEST:
376 for failure in heuristic_result.get('failures', []): 288 return False
377 for cl in failure['suspected_cls']: 289
378 suspected_revisions.add(cl['revision']) 290 if (not force_try_job and
379 return list(suspected_revisions) 291 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)):
292 logging.info('Test try jobs on %s, %s are not supported yet.',
293 master_name, builder_name)
294 return False
295
296 return _CurrentBuildKeyInFailureResultMap(
297 master_name, builder_name, build_number)
380 298
381 299
382 def _ShouldBailOutForOutdatedBuild(build): 300 def NeedANewTryJob(
383 return (datetime.utcnow() - build.start_time).days > 0 301 master_name, builder_name, build_number, failure_info, signals,
384 302 heuristic_result, force_try_job=False):
385
386 def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result,
387 force_try_job=False):
388 master_name = failure_info['master_name']
389 builder_name = failure_info['builder_name']
390 build_number = failure_info['build_number']
391 failed_steps = failure_info.get('failed_steps', [])
392 builds = failure_info.get('builds', {})
393 303
394 tryserver_mastername, tryserver_buildername = ( 304 tryserver_mastername, tryserver_buildername = (
395 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name)) 305 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name))
396 306
307 try_job_type = failure_info['failure_type']
397 if not tryserver_mastername or not tryserver_buildername: 308 if not tryserver_mastername or not tryserver_buildername:
398 logging.info('%s, %s is not supported yet.', master_name, builder_name) 309 logging.info('%s, %s is not supported yet.', master_name, builder_name)
399 return {} 310 return False
400 311
401 if not force_try_job: 312 if not force_try_job:
402 build = WfBuild.Get(master_name, builder_name, build_number) 313 build = WfBuild.Get(master_name, builder_name, build_number)
403 314
404 if _ShouldBailOutForOutdatedBuild(build): 315 if _ShouldBailOutForOutdatedBuild(build):
405 logging.error('Build time %s is more than 24 hours old. ' 316 logging.error('Build time %s is more than 24 hours old. '
406 'Try job will not be triggered.' % build.start_time) 317 'Try job will not be triggered.' % build.start_time)
407 return {} 318 return False
408 319
409 if (failure_info['failure_type'] == failure_type.TEST and 320 need_new_try_job = (_NeedANewCompileTryJob(
410 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)): 321 master_name, builder_name, build_number, failure_info)
411 logging.info('Test try jobs on %s, %s are not supported yet.', 322 if try_job_type == failure_type.COMPILE else
412 master_name, builder_name) 323 _NeedANewTestTryJob(
413 return {} 324 master_name, builder_name, build_number, failure_info, force_try_job))
414
415 failure_result_map = {}
416 need_new_try_job, last_pass, try_job_type, targeted_tests = (
417 _NeedANewTryJob(master_name, builder_name, build_number,
418 failure_info['failure_type'], failed_steps,
419 failure_result_map, builds, signals, heuristic_result,
420 force_try_job))
421 325
422 if need_new_try_job: 326 if need_new_try_job:
423 compile_targets = (_GetFailedTargetsFromSignals( 327 # TODO(josiahk): Integrate this into need_new_try_job boolean
424 signals, master_name, builder_name) 328 _IsBuildFailureUniqueAcrossPlatforms(
425 if try_job_type == TryJobType.COMPILE else None) 329 master_name, builder_name, build_number, try_job_type,
426 suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result) 330 failure_info['builds'][str(build_number)]['blame_list'],
331 failure_info['failed_steps'], signals, heuristic_result)
427 332
428 pipeline = ( 333 need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity(
429 swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline( 334 master_name, builder_name, build_number, force_try_job)
430 master_name, builder_name, build_number, 335 return need_new_try_job
431 builds[str(last_pass)]['chromium_revision'],
432 builds[str(build_number)]['chromium_revision'],
433 builds[str(build_number)]['blame_list'],
434 try_job_type, compile_targets, targeted_tests, suspected_revisions,
435 force_try_job))
436 336
437 pipeline.target = appengine_util.GetTargetNameForModule(
438 constants.WATERFALL_BACKEND)
439 pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE)
440 337
441 if try_job_type == TryJobType.TEST: # pragma: no cover 338 def GetFailedTargetsFromSignals(signals, master_name, builder_name):
442 logging_str = ( 339 compile_targets = []
443 'Trying to schedule swarming task(s) for build %s, %s, %s: %s'
444 ' because of %s failure. A try job may be triggered if some reliable'
445 ' failure is detected in task(s).') % (
446 master_name, builder_name, build_number,
447 pipeline.pipeline_status_path, try_job_type)
448 else: # pragma: no cover
449 logging_str = (
450 'Try job was scheduled for build %s, %s, %s: %s because of %s '
451 'failure.') % (
452 master_name, builder_name, build_number,
453 pipeline.pipeline_status_path, try_job_type)
454 logging.info(logging_str)
455 340
456 return failure_result_map 341 if not signals or 'compile' not in signals:
342 return compile_targets
343
344 if signals['compile'].get('failed_output_nodes'):
345 return signals['compile'].get('failed_output_nodes')
346
347 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures(
348 master_name, builder_name)
349 for source_target in signals['compile'].get('failed_targets', []):
350 # For link failures, we pass the executable targets directly to try-job, and
351 # there is no 'source' for link failures.
352 # For compile failures, only pass the object files as the compile targets
353 # for the bots that we use strict regex to extract such information.
354 if not source_target.get('source') or strict_regex:
355 compile_targets.append(source_target.get('target'))
356
357 return compile_targets
OLDNEW
« no previous file with comments | « appengine/findit/waterfall/try_job_type.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698