Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1248)

Side by Side Diff: appengine/findit/waterfall/try_job_util.py

Issue 2187763004: [Findit] Refactor Findit pipeline. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase and address comments. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import defaultdict
5 from datetime import datetime 6 from datetime import datetime
6 import logging 7 import logging
7 8
8 from google.appengine.ext import ndb 9 from google.appengine.ext import ndb
9 10
10 from common import appengine_util 11 from common import appengine_util
11 from common import constants 12 from common import constants
12 from common.waterfall import failure_type 13 from common.waterfall import failure_type
13 from model import analysis_status 14 from model import analysis_status
14 from model.wf_analysis import WfAnalysis 15 from model.wf_analysis import WfAnalysis
15 from model.wf_build import WfBuild 16 from model.wf_build import WfBuild
16 from model.wf_failure_group import WfFailureGroup 17 from model.wf_failure_group import WfFailureGroup
17 from model.wf_try_job import WfTryJob 18 from model.wf_try_job import WfTryJob
18 from waterfall import swarming_tasks_to_try_job_pipeline
19 from waterfall import waterfall_config 19 from waterfall import waterfall_config
20 from waterfall.try_job_type import TryJobType
21 20
22 21
23 def _CheckFailureForTryJobKey( 22 def _ShouldBailOutForOutdatedBuild(build):
24 master_name, builder_name, build_number, 23 return (datetime.utcnow() - build.start_time).days > 0
25 failure_result_map, failed_step_or_test, failure):
26 """Compares the current_failure and first_failure for each failed_step/test.
27 24
28 If equal, a new try_job needs to start; 25 def _CurrentBuildKey(master_name, builder_name, build_number):
29 If not, apply the key of the first_failure's try_job to this failure. 26 return '%s/%s/%d' % (master_name, builder_name, build_number)
30 """
31 # TODO(chanli): Need to compare failures across builders
32 # after the grouping of failures is implemented.
33 # TODO(chanli): Need to handle cases where first failure is actually
34 # more than 20 builds back. The implementation should not be here,
35 # but need to be taken care of.
36 if not failure.get('last_pass'):
37 # Bail out since cannot figure out the good_revision.
38 return False, None
39
40 if failure['current_failure'] == failure['first_failure']:
41 failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
42 master_name, builder_name, build_number)
43 return True, failure['last_pass'] # A new try_job is needed.
44 else:
45 failure_result_map[failed_step_or_test] = '%s/%s/%s' % (
46 master_name, builder_name, failure['first_failure'])
47 return False, None
48
49
50 def _CheckIfNeedNewTryJobForTestFailure(
51 failure_level, master_name, builder_name, build_number,
52 failure_result_map, failures):
53 """Traverses failed steps or tests to check if a new try job is needed."""
54 need_new_try_job = False
55 last_pass = build_number
56 targeted_tests = {} if failure_level == 'step' else []
57
58 for failure_name, failure in failures.iteritems():
59 if 'tests' in failure:
60 failure_result_map[failure_name] = {}
61 failure_targeted_tests, failure_need_try_job, failure_last_pass = (
62 _CheckIfNeedNewTryJobForTestFailure(
63 'test', master_name, builder_name, build_number,
64 failure_result_map[failure_name], failure['tests']))
65 if failure_need_try_job:
66 targeted_tests[failure_name] = failure_targeted_tests
67 else:
68 failure_need_try_job, failure_last_pass = _CheckFailureForTryJobKey(
69 master_name, builder_name, build_number,
70 failure_result_map, failure_name, failure)
71 if failure_need_try_job:
72 if failure_level == 'step':
73 targeted_tests[failure_name] = []
74 else:
75 targeted_tests.append(failure.get('base_test_name', failure_name))
76
77 need_new_try_job = need_new_try_job or failure_need_try_job
78 last_pass = (failure_last_pass if failure_last_pass and
79 failure_last_pass < last_pass else last_pass)
80
81 return targeted_tests, need_new_try_job, last_pass
82 27
83 28
84 def _BlameListsIntersection(blame_list_1, blame_list_2): 29 def _BlameListsIntersection(blame_list_1, blame_list_2):
85 return set(blame_list_1) & set(blame_list_2) 30 return set(blame_list_1) & set(blame_list_2)
86 31
87 32
88 def _GetStepsAndTests(failed_steps): 33 def _GetStepsAndTests(failed_steps):
89 """Extracts failed steps and tests from failed_steps data structure. 34 """Extracts failed steps and tests from failed_steps data structure.
90 35
91 Args: 36 Args:
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 try_job.put() 236 try_job.put()
292 else: 237 else:
293 try_job_entity_revived_or_created = False 238 try_job_entity_revived_or_created = False
294 else: 239 else:
295 try_job = WfTryJob.Create(master_name, builder_name, build_number) 240 try_job = WfTryJob.Create(master_name, builder_name, build_number)
296 try_job.put() 241 try_job.put()
297 242
298 return try_job_entity_revived_or_created 243 return try_job_entity_revived_or_created
299 244
300 245
301 def _NeedANewTryJob( 246 def _NeedANewTryJobForCompile(
lijeffrey 2016/08/09 04:07:34 nit: how about _NeedANewCompileTryJob/_NeedANewTes
chanli 2016/08/09 17:08:43 Done.
302 master_name, builder_name, build_number, build_failure_type, failed_steps, 247 master_name, builder_name, build_number, failure_info):
303 failure_result_map, builds, signals, heuristic_result, force_try_job=False):
304 """Checks if a new try_job is needed."""
305 need_new_try_job = False
306 last_pass = build_number
307 248
308 if 'compile' in failed_steps: 249 compile_failure = failure_info['failed_steps'].get('compile', {})
309 try_job_type = TryJobType.COMPILE 250 if compile_failure:
310 targeted_tests = None 251 analysis = WfAnalysis.Get(master_name, builder_name, build_number)
311 need_new_try_job, last_pass = _CheckFailureForTryJobKey( 252 analysis.failure_result_map['compile'] = '%s/%s/%d' % (
312 master_name, builder_name, build_number, 253 master_name, builder_name, compile_failure['first_failure'])
313 failure_result_map, TryJobType.COMPILE, failed_steps['compile']) 254 analysis.put()
314 else: 255
315 try_job_type = TryJobType.TEST 256 if compile_failure['first_failure'] == compile_failure['current_failure']:
316 targeted_tests, need_new_try_job, last_pass = ( 257 return True
317 _CheckIfNeedNewTryJobForTestFailure( 258
318 'step', master_name, builder_name, build_number, failure_result_map, 259 return False
319 failed_steps)) 260
261 def _NeedANewTryJobForTest(
262 master_name, builder_name, build_number, failure_info, force_try_job):
263
264 if failure_info['failure_type'] != failure_type.TEST:
265 return False
266
267 if not force_try_job:
268 if waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name):
lijeffrey 2016/08/09 04:07:34 nit: It looks like these if statements can be comb
chanli 2016/08/09 17:08:43 Done.
269 logging.info('Test try jobs on %s, %s are not supported yet.',
270 master_name, builder_name)
271 return False
272
273 analysis = WfAnalysis.Get(master_name, builder_name, build_number)
274 failure_result_map = analysis.failure_result_map
lijeffrey 2016/08/09 04:07:34 nit: break this part of the code into a separate f
chanli 2016/08/09 17:08:43 Done.
275 current_build_key = _CurrentBuildKey(master_name, builder_name, build_number)
276 for step_keys in failure_result_map.itervalues():
277 for test_key in step_keys.itervalues():
278 if test_key == current_build_key:
279 return True
280 return False
281
282 def NeedANewTryJob(
283 master_name, builder_name, build_number, failure_info, signals,
284 heuristic_result, force_try_job=False):
285
286 tryserver_mastername, tryserver_buildername = (
287 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name))
288
289 try_job_type = failure_info['failure_type']
290 if not tryserver_mastername or not tryserver_buildername:
291 logging.info('%s, %s is not supported yet.', master_name, builder_name)
292 return False
293
294 if not force_try_job:
295 build = WfBuild.Get(master_name, builder_name, build_number)
296
297 if _ShouldBailOutForOutdatedBuild(build):
298 logging.error('Build time %s is more than 24 hours old. '
299 'Try job will not be triggered.' % build.start_time)
300 return False
301
302 need_new_try_job = (_NeedANewTryJobForCompile(
303 master_name, builder_name, build_number, failure_info)
304 if try_job_type == failure_type.COMPILE else
305 _NeedANewTryJobForTest(
306 master_name, builder_name, build_number, failure_info, force_try_job))
307
308 if need_new_try_job:
309 # TODO(josiahk): Integrate this into need_new_try_job boolean
310 _IsBuildFailureUniqueAcrossPlatforms(
311 master_name, builder_name, build_number, try_job_type,
312 failure_info['builds'][str(build_number)]['blame_list'],
313 failure_info['failed_steps'], signals, heuristic_result)
314
315 need_new_try_job = need_new_try_job and ReviveOrCreateTryJobEntity(
316 master_name, builder_name, build_number, force_try_job)
317 return need_new_try_job
320 318
321 319
322 need_new_try_job = ( 320 def GetFailedTargetsFromSignals(signals, master_name, builder_name):
323 need_new_try_job and ReviveOrCreateTryJobEntity(
324 master_name, builder_name, build_number, force_try_job))
325
326 # TODO(josiahk): Integrate _IsBuildFailureUniqueAcrossPlatforms() into
327 # need_new_try_job boolean
328 if need_new_try_job:
329 _IsBuildFailureUniqueAcrossPlatforms(
330 master_name, builder_name, build_number, build_failure_type,
331 builds[str(build_number)]['blame_list'], failed_steps, signals,
332 heuristic_result)
333
334 return need_new_try_job, last_pass, try_job_type, targeted_tests
335
336
337 def _GetFailedTargetsFromSignals(signals, master_name, builder_name):
338 compile_targets = [] 321 compile_targets = []
339 322
340 if not signals or 'compile' not in signals: 323 if not signals or 'compile' not in signals:
341 return compile_targets 324 return compile_targets
342 325
343 if signals['compile'].get('failed_output_nodes'): 326 if signals['compile'].get('failed_output_nodes'):
344 return signals['compile'].get('failed_output_nodes') 327 return signals['compile'].get('failed_output_nodes')
345 328
346 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures( 329 strict_regex = waterfall_config.EnableStrictRegexForCompileLinkFailures(
347 master_name, builder_name) 330 master_name, builder_name)
348 for source_target in signals['compile'].get('failed_targets', []): 331 for source_target in signals['compile'].get('failed_targets', []):
349 # For link failures, we pass the executable targets directly to try-job, and 332 # For link failures, we pass the executable targets directly to try-job, and
350 # there is no 'source' for link failures. 333 # there is no 'source' for link failures.
351 # For compile failures, only pass the object files as the compile targets 334 # For compile failures, only pass the object files as the compile targets
352 # for the bots that we use strict regex to extract such information. 335 # for the bots that we use strict regex to extract such information.
353 if not source_target.get('source') or strict_regex: 336 if not source_target.get('source') or strict_regex:
354 compile_targets.append(source_target.get('target')) 337 compile_targets.append(source_target.get('target'))
355 338
356 return compile_targets 339 return compile_targets
357
358
359 def _GetSuspectsFromHeuristicResult(heuristic_result):
360 suspected_revisions = set()
361 if not heuristic_result:
362 return list(suspected_revisions)
363 for failure in heuristic_result.get('failures', []):
364 for cl in failure['suspected_cls']:
365 suspected_revisions.add(cl['revision'])
366 return list(suspected_revisions)
367
368
369 def _ShouldBailOutForOutdatedBuild(build):
370 return (datetime.utcnow() - build.start_time).days > 0
371
372
373 def ScheduleTryJobIfNeeded(failure_info, signals, heuristic_result,
374 force_try_job=False):
375 master_name = failure_info['master_name']
376 builder_name = failure_info['builder_name']
377 build_number = failure_info['build_number']
378 failed_steps = failure_info.get('failed_steps', [])
379 builds = failure_info.get('builds', {})
380
381 tryserver_mastername, tryserver_buildername = (
382 waterfall_config.GetTrybotForWaterfallBuilder(master_name, builder_name))
383
384 if not tryserver_mastername or not tryserver_buildername:
385 logging.info('%s, %s is not supported yet.', master_name, builder_name)
386 return {}
387
388 if not force_try_job:
389 build = WfBuild.Get(master_name, builder_name, build_number)
390
391 if _ShouldBailOutForOutdatedBuild(build):
392 logging.error('Build time %s is more than 24 hours old. '
393 'Try job will not be triggered.' % build.start_time)
394 return {}
395
396 if (failure_info['failure_type'] == failure_type.TEST and
397 waterfall_config.ShouldSkipTestTryJobs(master_name, builder_name)):
398 logging.info('Test try jobs on %s, %s are not supported yet.',
399 master_name, builder_name)
400 return {}
401
402 failure_result_map = {}
403 need_new_try_job, last_pass, try_job_type, targeted_tests = (
404 _NeedANewTryJob(master_name, builder_name, build_number,
405 failure_info['failure_type'], failed_steps,
406 failure_result_map, builds, signals, heuristic_result,
407 force_try_job))
408
409 if need_new_try_job:
410 compile_targets = (_GetFailedTargetsFromSignals(
411 signals, master_name, builder_name)
412 if try_job_type == TryJobType.COMPILE else None)
413 suspected_revisions = _GetSuspectsFromHeuristicResult(heuristic_result)
414
415 pipeline = (
416 swarming_tasks_to_try_job_pipeline.SwarmingTasksToTryJobPipeline(
417 master_name, builder_name, build_number,
418 builds[str(last_pass)]['chromium_revision'],
419 builds[str(build_number)]['chromium_revision'],
420 builds[str(build_number)]['blame_list'],
421 try_job_type, compile_targets, targeted_tests, suspected_revisions,
422 force_try_job))
423
424 pipeline.target = appengine_util.GetTargetNameForModule(
425 constants.WATERFALL_BACKEND)
426 pipeline.start(queue_name=constants.WATERFALL_TRY_JOB_QUEUE)
427
428 if try_job_type == TryJobType.TEST: # pragma: no cover
429 logging_str = (
430 'Trying to schedule swarming task(s) for build %s, %s, %s: %s'
431 ' because of %s failure. A try job may be triggered if some reliable'
432 ' failure is detected in task(s).') % (
433 master_name, builder_name, build_number,
434 pipeline.pipeline_status_path, try_job_type)
435 else: # pragma: no cover
436 logging_str = (
437 'Try job was scheduled for build %s, %s, %s: %s because of %s '
438 'failure.') % (
439 master_name, builder_name, build_number,
440 pipeline.pipeline_status_path, try_job_type)
441 logging.info(logging_str)
442
443 return failure_result_map
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698