Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(36)

Side by Side Diff: appengine/chromium_try_flakes/handlers/flake_issues.py

Issue 1660043002: Move flaky run processing into a taskqueue (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/chromium_try_flakes/handlers/index.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Task queue endpoints for creating and updating issues on issue tracker.""" 5 """Task queue endpoints for creating and updating issues on issue tracker."""
6 6
7 import datetime 7 import datetime
8 import json
8 import logging 9 import logging
9 import webapp2 10 import webapp2
10 11
11 from google.appengine.api import app_identity 12 from google.appengine.api import app_identity
12 from google.appengine.api import taskqueue 13 from google.appengine.api import taskqueue
14 from google.appengine.api import urlfetch
13 from google.appengine.ext import ndb 15 from google.appengine.ext import ndb
14 16
17 from infra_libs import ts_mon
15 from issue_tracker import issue_tracker_api, issue 18 from issue_tracker import issue_tracker_api, issue
16 from model.flake import FlakeUpdateSingleton, FlakeUpdate, Flake 19 from model.flake import (
17 from infra_libs import ts_mon 20 Flake, FlakeOccurrence, FlakeUpdate, FlakeUpdateSingleton, FlakyRun)
21 from status import build_result, util
18 22
19 23
20 MAX_UPDATED_ISSUES_PER_DAY = 50 24 MAX_UPDATED_ISSUES_PER_DAY = 50
21 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60 25 MAX_TIME_DIFFERENCE_SECONDS = 12 * 60 * 60
22 MIN_REQUIRED_FLAKY_RUNS = 5 26 MIN_REQUIRED_FLAKY_RUNS = 5
23 DAYS_TILL_STALE = 3 27 DAYS_TILL_STALE = 3
24 USE_MONORAIL = False 28 USE_MONORAIL = False
25 DAYS_TO_REOPEN_ISSUE = 3 29 DAYS_TO_REOPEN_ISSUE = 3
26 FLAKY_RUNS_TEMPLATE = ( 30 FLAKY_RUNS_TEMPLATE = (
27 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see ' 31 'Detected %(new_flakes_count)d new flakes for test/step "%(name)s". To see '
(...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after
327 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE) 331 api.update(flake_issue, comment=BACK_TO_SHERIFF_MESSAGE)
328 return 332 return
329 333
330 # Report to stale-flakes-reports@ if the issue has no updates for 7 days. 334 # Report to stale-flakes-reports@ if the issue has no updates for 7 days.
331 week_ago = now - datetime.timedelta(days=7) 335 week_ago = now - datetime.timedelta(days=7)
332 if (last_third_party_update < week_ago and 336 if (last_third_party_update < week_ago and
333 STALE_FLAKES_ML not in flake_issue.cc): 337 STALE_FLAKES_ML not in flake_issue.cc):
334 flake_issue.cc.append(STALE_FLAKES_ML) 338 flake_issue.cc.append(STALE_FLAKES_ML)
335 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML) 339 logging.info('Reporting issue %s to %s', flake_issue.id, STALE_FLAKES_ML)
336 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE) 340 api.update(flake_issue, comment=VERY_STALE_FLAKES_MESSAGE)
341
342
343 class CreateFlakyRun(webapp2.RequestHandler):
Sergiy Byelozyorov 2016/02/02 21:24:35 Most of the methods here are copied with minor mod
344 # We execute below method in an indepedent transaction since otherwise we
345 # would exceed the maximum number of entities allowed within a single
346 # transaction.
347 @staticmethod
348 # pylint: disable=E1120
349 @ndb.transactional(xg=True, propagation=ndb.TransactionOptions.INDEPENDENT)
350 def add_failure_to_flake(name, flaky_run_key, failure_time):
351 flake = Flake.get_by_id(name)
352 if not flake:
353 flake = Flake(name=name, id=name, last_time_seen=datetime.datetime.min)
354 flake.put()
355
356 flake.occurrences.append(flaky_run_key)
357 util.add_occurrence_time_to_flake(flake, failure_time)
358 flake.put()
359
360 # see examples:
361 # compile http://build.chromium.org/p/tryserver.chromium.mac/json/builders/
362 # mac_chromium_compile_dbg/builds/11167?as_text=1
363 # gtest http://build.chromium.org/p/tryserver.chromium.win/json/builders/
364 # win_chromium_x64_rel_swarming/builds/4357?as_text=1
365 # TODO(jam): get specific problem with compile so we can use that as name
366 # TODO(jam): It's unfortunate to have to parse this html. Can we get it from
367 # another place instead of the tryserver's json?
368 @staticmethod
369 def get_flakes(step):
370 combined = ' '.join(step['text'])
371
372 # If test results were invalid, report whole step as flaky.
373 if 'TEST RESULTS WERE INVALID' in combined:
374 return [combined]
375
376 #gtest
377 gtest_search_str = 'failures:<br/>'
378 gtest_search_index = combined.find(gtest_search_str)
379 if gtest_search_index != -1:
380 failures = combined[gtest_search_index + len(gtest_search_str):]
381 failures = failures.split('<br/>')
382 results = []
383 for failure in failures:
384 if not failure:
385 continue
386 if failure == 'ignored:':
387 break # layout test output
388 results.append(failure)
389 return results
390
391 #gpu
392 gpu_search_str = '&tests='
393 gpu_search_index = combined.find(gpu_search_str)
394 if gpu_search_index != -1:
395 failures = combined[gpu_search_index + len(gpu_search_str):]
396 end_index = failures.find('">')
397 failures = failures[:end_index ]
398 failures = failures.split(',')
399 results = []
400 for failure in failures:
401 if not failure:
402 continue
403 results.append(failure)
404 return results
405
406 return [combined]
407
408 @ndb.transactional(xg=True) # pylint: disable=E1120
409 def post(self):
410 if (not self.request.get('failure_run_key') or
411 not self.request.get('success_run_key')):
412 self.response.set_status(400, 'Invalid request parameters')
413 return
414
415 failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()
416 success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()
417
418 flaky_run = FlakyRun(
419 failure_run=failure_run.key,
420 failure_run_time_started=failure_run.time_started,
421 failure_run_time_finished=failure_run.time_finished,
422 success_run=success_run.key)
423
424 success_time = success_run.time_finished
425 failure_time = failure_run.time_finished
426 patchset_builder_runs = failure_run.key.parent().get()
427 url = ('http://build.chromium.org/p/' + patchset_builder_runs.master +
428 '/json/builders/' + patchset_builder_runs.builder +'/builds/' +
429 str(failure_run.buildnumber))
430 urlfetch.set_default_fetch_deadline(60)
431 logging.info('get_flaky_run_reason ' + url)
432 result = urlfetch.fetch(url).content
433 try:
434 json_result = json.loads(result)
435 except ValueError:
436 logging.exception('couldnt decode json for %s', url)
437 return
438 steps = json_result['steps']
439
440 failed_steps = []
441 passed_steps = []
442 for step in steps:
443 result = step['results'][0]
444 if build_result.isResultSuccess(result):
445 passed_steps.append(step)
446 continue
447 if not build_result.isResultFailure(result):
448 continue
449 step_name = step['name']
450 step_text = ' '.join(step['text'])
451 # The following step failures are ignored:
452 # - steps: always red when any other step is red (not a failure)
453 # - [swarming] ...: summary step would also be red (do not double count)
454 # - presubmit: typically red due to missing OWNERs LGTM, not a flake
455 # - recipe failure reason: always red when build fails (not a failure)
456 # - Patch failure: if success run was before failure run, it is
457 # likely a legitimate failure. For example it often happens that
458 # developers use CQ dry run and then wait for a review. Once getting
459 # LGTM they check CQ checkbox, but the patch does not cleanly apply
460 # anymore.
461 # - bot_update PATCH FAILED: Corresponds to 'Patch failure' step.
462 # - test results: always red when another step is red (not a failure)
463 # - Uncaught Exception: summary step referring to an exception in another
464 # step (e.g. bot_update)
465 if (step_name == 'steps' or step_name.startswith('[swarming]') or
466 step_name == 'presubmit' or step_name == 'recipe failure reason' or
467 (step_name == 'Patch failure' and success_time < failure_time) or
468 (step_name == 'bot_update' and 'PATCH FAILED' in step_text) or
469 step_name == 'test results' or step_name == 'Uncaught Exception'):
470 continue
471 failed_steps.append(step)
472
473 steps_to_ignore = []
474 for step in failed_steps:
475 step_name = step['name']
476 if ' (with patch)' in step_name:
477 # Android instrumentation tests add a prefix before the step name, which
478 # doesn't appear on the summary step (without suffixes). To make sure we
479 # correctly ignore duplicate failures, we remove the prefix.
480 step_name = step_name.replace('Instrumentation test ', '')
481
482 step_name_with_no_modifier = step_name.replace(' (with patch)', '')
483 for other_step in failed_steps:
484 # A step which fails, and then is retried and also fails, will have
485 # its name without the ' (with patch)' again. Don't double count.
486 if other_step['name'] == step_name_with_no_modifier:
487 steps_to_ignore.append(other_step['name'])
488
489 # If a step fails without the patch, then the tree is busted. Don't
490 # count as flake.
491 step_name_without_patch = (
492 '%s (without patch)' % step_name_with_no_modifier)
493 for other_step in failed_steps:
494 if other_step['name'] == step_name_without_patch:
495 steps_to_ignore.append(step['name'])
496 steps_to_ignore.append(other_step['name'])
497
498 flakes_to_update = []
499 for step in failed_steps:
500 step_name = step['name']
501 if step_name in steps_to_ignore:
502 continue
503 flakes = self.get_flakes(step)
504 if not flakes:
505 continue
506 for flake in flakes:
507 flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)
508 flaky_run.flakes.append(flake_occurrence)
509 flakes_to_update.append(flake)
510
511 flaky_run_key = flaky_run.put()
512 for flake in flakes_to_update:
513 self.add_failure_to_flake(flake, flaky_run_key, failure_time)
OLDNEW
« no previous file with comments | « no previous file | appengine/chromium_try_flakes/handlers/index.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698