appengine/findit/waterfall/extract_signal_pipeline.py - Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures.

Unified Diff: appengine/findit/waterfall/extract_signal_pipeline.py

Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Fixed several small issues based on comments. Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« appengine/findit/waterfall/buildbot.py ('K') | « appengine/findit/waterfall/buildbot.py ('k') | appengine/findit/waterfall/test/buildbot_test.py » ('j') | appengine/findit/waterfall/test/buildbot_test.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/waterfall/extract_signal_pipeline.py

diff --git a/appengine/findit/waterfall/extract_signal_pipeline.py b/appengine/findit/waterfall/extract_signal_pipeline.py

index 37eb8f2f1e67b57ae80bb2276b822f4ffb2849eb..43b28c7ba88e16e8004c5b4dac82be361b2bf85e 100644

--- a/appengine/findit/waterfall/extract_signal_pipeline.py

+++ b/appengine/findit/waterfall/extract_signal_pipeline.py

@@ -2,7 +2,9 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

+import cStringIO

import logging

+import json

from google.appengine.api.urlfetch import ResponseTooLargeError

@@ -45,6 +47,36 @@ class ExtractSignalPipeline(BasePipeline):

else:

return log_data # pragma: no cover - this won't be reached.

+ @staticmethod

+ def _GetReliableTestFailureLog(gtest_result):

+ """Analyze the archived step log and extract reliable failures only.

stgao 2015/05/22 22:49:43 To be accurate, it is 'archived gtest json results

chanli 2015/05/22 23:26:32 Done.

+ Args:

+ gtest_result (str): A JSON file for failed step log.

+ Returns:

+ A string contains the names of reliable test failures and output_snippets.

stgao 2015/05/22 22:49:43 'output_snippets' is a detail about the gtest outp

chanli 2015/05/22 23:26:32 Done.

+ """

+ step_failure_data = json.loads(gtest_result)

+ sio = cStringIO.StringIO()

+ for iteration in step_failure_data['gtest_results']['per_iteration_data']:

+ for test_name in iteration.keys():

+ is_reliable_failure = True

+ for test in iteration[test_name]:

stgao 2015/05/22 22:49:42 test->test_run?

chanli 2015/05/22 23:26:32 Done.

+ # We will ignore the test if some of the attempts were success.

+ if test['status'] == 'SUCCESS':

+ is_reliable_failure = False

+ break

+ if is_reliable_failure: # all attempts failed, it's a reliable failure

stgao 2015/05/22 22:49:43 Still not correct, please check the google python

chanli 2015/05/22 23:26:32 Done.

+ for test in iteration[test_name]:

+ sio.write("'%s': %s\n" % (test_name, test['output_snippet']))

+ failed_test_log = sio.getvalue()

+ sio.close()

+ return failed_test_log

# Arguments number differs from overridden method - pylint: disable=W0221

def run(self, failure_info):

@@ -67,44 +99,49 @@ class ExtractSignalPipeline(BasePipeline):

for step_name in failure_info.get('failed_steps', []):

step = WfStep.Get(master_name, builder_name, build_number, step_name)

if step and step.log_data:

- stdio_log = step.log_data

+ failure_log = step.log_data

else:

- if not lock_util.WaitUntilDownloadAllowed(

- master_name): # pragma: no cover

- raise pipeline.Retry('Failed to pull stdio of step %s of master %s'

- % (step_name, master_name))

# TODO: do test-level analysis instead of step-level.

- try:

- stdio_log = buildbot.GetStepStdio(

- master_name, builder_name, build_number, step_name,

- self.HTTP_CLIENT)

- except ResponseTooLargeError: # pragma: no cover.

- logging.exception(

- 'Log of step "%s" is too large for urlfetch.', step_name)

- # If the stdio log of a step is too large, we don't want to pull it

- # again in next run, because that might lead to DDoS to the master.

- # TODO: Use archived stdio logs in Google Storage instead.

- stdio_log = 'Stdio log is too large for urlfetch.'

- if not stdio_log: # pragma: no cover

- raise pipeline.Retry('Failed to pull stdio of step %s of master %s'

- % (step_name, master_name))

- # Save stdio in datastore and avoid downloading again during retry.

+ gtest_result = buildbot.GetGtestResultLog(

+ master_name, builder_name, build_number, step_name)

+ if gtest_result:

stgao 2015/05/22 22:49:43 https://code.google.com/p/chromium/codesearch#chro

chanli 2015/05/22 23:26:32 Done.

+ failure_log = self._GetReliableTestFailureLog(gtest_result)

+ else:

+ if not lock_util.WaitUntilDownloadAllowed(

+ master_name): # pragma: no cover

+ raise pipeline.Retry('Failed to pull log of step %s of master %s'

+ % (step_name, master_name))

+ try:

+ failure_log = buildbot.GetStepStdio(

+ master_name, builder_name, build_number, step_name,

+ self.HTTP_CLIENT)

+ except ResponseTooLargeError: # pragma: no cover.

+ logging.exception(

+ 'Log of step "%s" is too large for urlfetch.', step_name)

+ # If the stdio log of a step is too large, we don't want to pull it

+ # again in next run, because that might lead to DDoS to the master.

+ failure_log = 'Stdio log is too large for urlfetch.'

+ if not failure_log: # pragma: no cover

+ raise pipeline.Retry('Failed to pull stdio of step %s of master %s'

+ % (step_name, master_name))

+ # Save step log in datastore and avoid downloading again during retry.

if not step: # pragma: no cover

step = WfStep.Create(

master_name, builder_name, build_number, step_name)

- step.log_data = self._ExtractStorablePortionOfLog(stdio_log)

+ step.log_data = self._ExtractStorablePortionOfLog(failure_log)

try:

step.put()

except Exception as e: # pragma: no cover

- # Sometimes, the stdio log is too large to save in datastore.

+ # Sometimes, the step log is too large to save in datastore.

logging.exception(e)

# TODO: save result in datastore?

signals[step_name] = extractors.ExtractSignal(

- master_name, builder_name, step_name, None, stdio_log).ToDict()

+ master_name, builder_name, step_name, None,

+ failure_log).ToDict()

return signals