appengine/chromium_try_flakes/handlers/flake_issues.py - Issue 1873243002: Only report Patch failure on infra error and refactor code

Side by Side Diff: appengine/chromium_try_flakes/handlers/flake_issues.py

Issue 1873243002: Only report Patch failure on infra error and refactor code (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """Task queue endpoints for creating and updating issues on issue tracker."""	5 """Task queue endpoints for creating and updating issues on issue tracker."""

6	6

7 import datetime	7 import datetime

8 import json	8 import json

9 import logging	9 import logging

10 import urllib2	10 import urllib2

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
62 'https://chromium-try-flakes.appspot.com/all_flake_occurrences?key=%s')	62 'https://chromium-try-flakes.appspot.com/all_flake_occurrences?key=%s')

63 TEST_RESULTS_URL_TEMPLATE = (	63 TEST_RESULTS_URL_TEMPLATE = (

64 'http://test-results.appspot.com/testfile?builder=%(buildername)s&name='	64 'http://test-results.appspot.com/testfile?builder=%(buildername)s&name='

65 'full_results.json&master=%(mastername)s&testtype=%(stepname)s&buildnumber='	65 'full_results.json&master=%(mastername)s&testtype=%(stepname)s&buildnumber='

66 '%(buildnumber)s')	66 '%(buildnumber)s')

67 VERY_STALE_FLAKES_MESSAGE = (	67 VERY_STALE_FLAKES_MESSAGE = (

68 'Reporting to stale-flakes-reports@google.com to investigate why this '	68 'Reporting to stale-flakes-reports@google.com to investigate why this '

69 'issue is not being processed despite being in an appropriate queue.')	69 'issue is not being processed despite being in an appropriate queue.')

70 STALE_FLAKES_ML = 'stale-flakes-reports@google.com'	70 STALE_FLAKES_ML = 'stale-flakes-reports@google.com'

71 MAX_GAP_FOR_FLAKINESS_PERIOD = datetime.timedelta(days=3)	71 MAX_GAP_FOR_FLAKINESS_PERIOD = datetime.timedelta(days=3)

72 KNOWN_TROOPER_FAILURES = [	72 KNOWN_TROOPER_FLAKE_NAMES = [

73 'analyze', 'bot_update', 'compile (with patch)', 'compile',	73 'analyze', 'bot_update', 'compile (with patch)', 'compile',

74 'device_status_check', 'gclient runhooks (with patch)', 'Patch failure',	74 'device_status_check', 'gclient runhooks (with patch)', 'Patch failure',

75 'process_dumps', 'provision_devices', 'update_scripts']	75 'process_dumps', 'provision_devices', 'update_scripts']

76	76

	77 # Flakes in these steps are always ignored:

	78 # - steps: always red when any other step is red (duplicates failure)

	79 # - presubmit: typically red due to missing OWNERs LGTM, not a flake

	80 # - recipe failure reason: always red when build fails (not a failure)

	81 # - test results: always red when another step is red (not a failure)

	82 # - Uncaught Exception: summary step referring to an exception in another

	83 # step (duplicates failure)

	84 # There are additional rules for non-trivial cases in the FlakyRun.post method.

	85 IGNORED_STEPS = ['steps', 'presubmit', 'recipe failure reason', 'test results',

	86 'Uncaught Exception']

	87

77	88

78 def is_trooper_flake(flake_name):	89 def is_trooper_flake(flake_name):

79 return flake_name in KNOWN_TROOPER_FAILURES	90 return flake_name in KNOWN_TROOPER_FLAKE_NAMES

80	91

81	92

82 def get_queue_details(flake_name):	93 def get_queue_details(flake_name):

83 if is_trooper_flake(flake_name):	94 if is_trooper_flake(flake_name):

84 return 'Trooper Bug Queue', 'Infra-Troopers'	95 return 'Trooper Bug Queue', 'Infra-Troopers'

85 else:	96 else:

86 return 'Sheriff Bug Queue', 'Sheriff-Chromium'	97 return 'Sheriff Bug Queue', 'Sheriff-Chromium'

87	98

88	99

89 class ProcessIssue(webapp2.RequestHandler):	100 class ProcessIssue(webapp2.RequestHandler):

(...skipping 374 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
464	475

465 failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()	476 failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()

466 success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()	477 success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()

467	478

468 flaky_run = FlakyRun(	479 flaky_run = FlakyRun(

469 failure_run=failure_run.key,	480 failure_run=failure_run.key,

470 failure_run_time_started=failure_run.time_started,	481 failure_run_time_started=failure_run.time_started,

471 failure_run_time_finished=failure_run.time_finished,	482 failure_run_time_finished=failure_run.time_finished,

472 success_run=success_run.key)	483 success_run=success_run.key)

473	484

474 success_time = success_run.time_finished

475 failure_time = failure_run.time_finished	485 failure_time = failure_run.time_finished

476 patchset_builder_runs = failure_run.key.parent().get()	486 patchset_builder_runs = failure_run.key.parent().get()

477	487

478 # TODO(sergiyb): The parsing logic below is very fragile and will break with	488 # TODO(sergiyb): The parsing logic below is very fragile and will break with

479 # any changes to step names and step text. We should move away from parsing	489 # any changes to step names and step text. We should move away from parsing

480 # buildbot to tools like flakiness dashboard (test-results.appspot.com),	490 # buildbot to tools like flakiness dashboard (test-results.appspot.com),

481 # which uses a standartized JSON format.	491 # which uses a standartized JSON format.

482 url = ('http://build.chromium.org/p/' + patchset_builder_runs.master +	492 url = ('http://build.chromium.org/p/' + patchset_builder_runs.master +

483 '/json/builders/' + patchset_builder_runs.builder +'/builds/' +	493 '/json/builders/' + patchset_builder_runs.builder +'/builds/' +

484 str(failure_run.buildnumber))	494 str(failure_run.buildnumber))

(...skipping 11 matching lines...) Expand all Loading...
496 passed_steps = []	506 passed_steps = []

497 for step in steps:	507 for step in steps:

498 result = step['results'][0]	508 result = step['results'][0]

499 if build_result.isResultSuccess(result):	509 if build_result.isResultSuccess(result):

500 passed_steps.append(step)	510 passed_steps.append(step)

501 continue	511 continue

502 if not build_result.isResultFailure(result):	512 if not build_result.isResultFailure(result):

503 continue	513 continue

504 step_name = step['name']	514 step_name = step['name']

505 step_text = ' '.join(step['text'])	515 step_text = ' '.join(step['text'])

506 # The following step failures are ignored:	516 if step_name in IGNORED_STEPS:

507 # - steps: always red when any other step is red (not a failure)	517 continue

	518

	519 # Custom (non-trivial) rules for ignoring flakes in certain steps:

508 # - [swarming] ...: summary step would also be red (do not double count)	520 # - [swarming] ...: summary step would also be red (do not double count)

509 # - presubmit: typically red due to missing OWNERs LGTM, not a flake	521 # - Patch failure: ingore non-infra failures as they are typically due to

510 # - recipe failure reason: always red when build fails (not a failure)	522 # changes in the code on HEAD

511 # - Patch failure: if success run was before failure run, it is	523 # - bot_update PATCH FAILED: Duplicates failure in 'Patch failure' step.

512 # likely a legitimate failure. For example it often happens that

513 # developers use CQ dry run and then wait for a review. Once getting

514 # LGTM they check CQ checkbox, but the patch does not cleanly apply

515 # anymore.

516 # - bot_update PATCH FAILED: Corresponds to 'Patch failure' step.

517 # - test results: always red when another step is red (not a failure)

518 # - Uncaught Exception: summary step referring to an exception in another

519 # step (e.g. bot_update)

520 # - ... (retry summary): this is an artificial step to fail the build due	524 # - ... (retry summary): this is an artificial step to fail the build due

521 # to another step that has failed earlier (do not double count).	525 # to another step that has failed earlier (do not double count).

522 if (step_name == 'steps' or step_name.startswith('[swarming]') or	526 print step_name, '1', build_result

523 step_name == 'presubmit' or step_name == 'recipe failure reason' or	527 if (step_name.startswith('[swarming]') or

524 (step_name == 'Patch failure' and success_time < failure_time) or	528 step_name.endswith(' (retry summary)') or

525 (step_name == 'bot_update' and 'PATCH FAILED' in step_text) or	529 (step_name == 'Patch failure' and result != build_result.EXCEPTION) or

526 step_name == 'test results' or step_name == 'Uncaught Exception' or	530 (step_name == 'bot_update' and 'PATCH FAILED' in step_text)):

527 step_name.endswith(' (retry summary)')):

528 continue	531 continue

	532 print step_name, '2'

	533

529 failed_steps.append(step)	534 failed_steps.append(step)

530	535

531 steps_to_ignore = []	536 steps_to_ignore = []

532 for step in failed_steps:	537 for step in failed_steps:

533 step_name = step['name']	538 step_name = step['name']

534 if ' (with patch)' in step_name:	539 if ' (with patch)' in step_name:

535 # Android instrumentation tests add a prefix before the step name, which	540 # Android instrumentation tests add a prefix before the step name, which

536 # doesn't appear on the summary step (without suffixes). To make sure we	541 # doesn't appear on the summary step (without suffixes). To make sure we

537 # correctly ignore duplicate failures, we remove the prefix.	542 # correctly ignore duplicate failures, we remove the prefix.

538 step_name = step_name.replace('Instrumentation test ', '')	543 step_name = step_name.replace('Instrumentation test ', '')

(...skipping 18 matching lines...) Expand all Loading...
557 failure_run.buildnumber, step)	562 failure_run.buildnumber, step)

558 for flake in flakes:	563 for flake in flakes:

559 flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)	564 flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)

560 flaky_run.flakes.append(flake_occurrence)	565 flaky_run.flakes.append(flake_occurrence)

561 flakes_to_update.append(flake)	566 flakes_to_update.append(flake)

562	567

563 flaky_run_key = flaky_run.put()	568 flaky_run_key = flaky_run.put()

564 for flake in flakes_to_update:	569 for flake in flakes_to_update:

565 self.add_failure_to_flake(flake, flaky_run_key, failure_time)	570 self.add_failure_to_flake(flake, flaky_run_key, failure_time)

566 self.flaky_runs.increment_by(1)	571 self.flaky_runs.increment_by(1)

OLD	NEW

« no previous file with comments | « no previous file | appengine/chromium_try_flakes/handlers/test/flake_issues_test.py » ('j') | no next file with comments »