appengine/findit/waterfall/extract_signal_pipeline.py - Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures.

Side by Side Diff: appengine/findit/waterfall/extract_signal_pipeline.py

Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Fix name style nit. Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 import cStringIO

5 import logging	6 import logging

	7 import json

6	8

7 from google.appengine.api.urlfetch import ResponseTooLargeError	9 from google.appengine.api.urlfetch import ResponseTooLargeError

8	10

9 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline	11 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline

10	12

11 from common.http_client_appengine import HttpClientAppengine as HttpClient	13 from common.http_client_appengine import HttpClientAppengine as HttpClient

12 from model.wf_step import WfStep	14 from model.wf_step import WfStep

13 from waterfall import buildbot	15 from waterfall import buildbot

14 from waterfall import extractors	16 from waterfall import extractors

15 from waterfall import lock_util	17 from waterfall import lock_util

(...skipping 22 matching lines...) Expand all Loading...
38	40

39 lines = log_data.split('\n')	41 lines = log_data.split('\n')

40 size = 0	42 size = 0

41 for line_index in reversed(range(len(lines))):	43 for line_index in reversed(range(len(lines))):

42 size += len(lines[line_index]) + 1	44 size += len(lines[line_index]) + 1

43 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT:	45 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT:

44 return '\n'.join(lines[line_index + 1:])	46 return '\n'.join(lines[line_index + 1:])

45 else:	47 else:

46 return log_data # pragma: no cover - this won't be reached.	48 return log_data # pragma: no cover - this won't be reached.

47	49

	50 @staticmethod

	51 def _GetReliableTestFailureLog(gtest_result):

	52 """Analyze the archived gtest json results and extract reliable failures.

	53

	54 Args:

	55 gtest_result (str): A JSON file for failed step log.

	56

	57 Returns:

	58 A string contains the names of reliable test failures and related

	59 log content.

	60 If gtest_results in gtest json result is 'invalid', we will return

	61 'invalid' as the result.

	62 If we find out that all the test failures in this step are flaky, we will

	63 return 'flaky' as result.

	64 """

	65 step_failure_data = json.loads(gtest_result)

	66

	67 if step_failure_data['gtest_results'] == 'invalid': # pragma: no cover

	68 return 'invalid'

	69

	70 sio = cStringIO.StringIO()

	71 for iteration in step_failure_data['gtest_results']['per_iteration_data']:

	72 for test_name in iteration.keys():

	73 is_reliable_failure = True

	74

	75 for test_run in iteration[test_name]:

	76 # We will ignore the test if some of the attempts were success.

	77 if test_run['status'] == 'SUCCESS':

	78 is_reliable_failure = False

	79 break

	80

	81 if is_reliable_failure: # all attempts failed

	82 for test_run in iteration[test_name]:

	83 sio.write("'%s': %s\n" % (test_name, test_run['output_snippet']))

	84

	85 failed_test_log = sio.getvalue()

	86 sio.close()

	87

	88 if not failed_test_log:

	89 return 'flaky'

	90

	91 return failed_test_log

48	92

49 # Arguments number differs from overridden method - pylint: disable=W0221	93 # Arguments number differs from overridden method - pylint: disable=W0221

50 def run(self, failure_info):	94 def run(self, failure_info):

51 """	95 """

52 Args:	96 Args:

53 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run().	97 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run().

54	98

55 Returns:	99 Returns:

56 A dict like below:	100 A dict like below:

57 {	101 {

58 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(),	102 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(),

59 ...	103 ...

60 }	104 }

61 """	105 """

62 signals = {}	106 signals = {}

63	107

64 master_name = failure_info['master_name']	108 master_name = failure_info['master_name']

65 builder_name = failure_info['builder_name']	109 builder_name = failure_info['builder_name']

66 build_number = failure_info['build_number']	110 build_number = failure_info['build_number']

67 for step_name in failure_info.get('failed_steps', []):	111 for step_name in failure_info.get('failed_steps', []):

68 step = WfStep.Get(master_name, builder_name, build_number, step_name)	112 step = WfStep.Get(master_name, builder_name, build_number, step_name)

69 if step and step.log_data:	113 if step and step.log_data:

70 stdio_log = step.log_data	114 failure_log = step.log_data

71 else:	115 else:

72 if not lock_util.WaitUntilDownloadAllowed(	116 # TODO: do test-level analysis instead of step-level.

73 master_name): # pragma: no cover	117 gtest_result = buildbot.GetGtestResultLog(

74 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'	118 master_name, builder_name, build_number, step_name)

75 % (step_name, master_name))	119 if gtest_result:

	120 failure_log = self._GetReliableTestFailureLog(gtest_result)

	121 if gtest_result is None or failure_log == 'invalid':

	122 if not lock_util.WaitUntilDownloadAllowed(

	123 master_name): # pragma: no cover

	124 raise pipeline.Retry('Failed to pull log of step %s of master %s'

	125 % (step_name, master_name))

	126 try:

	127 failure_log = buildbot.GetStepStdio(

	128 master_name, builder_name, build_number, step_name,

	129 self.HTTP_CLIENT)

	130 except ResponseTooLargeError: # pragma: no cover.

	131 logging.exception(

	132 'Log of step "%s" is too large for urlfetch.', step_name)

	133 # If the stdio log of a step is too large, we don't want to pull it

	134 # again in next run, because that might lead to DDoS to the master.

	135 # TODO: Use archived stdio logs in Google Storage instead.

	136 failure_log = 'Stdio log is too large for urlfetch.'

76	137

77 # TODO: do test-level analysis instead of step-level.	138 if not failure_log: # pragma: no cover

78 try:	139 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'

79 stdio_log = buildbot.GetStepStdio(	140 % (step_name, master_name))

80 master_name, builder_name, build_number, step_name,

81 self.HTTP_CLIENT)

82 except ResponseTooLargeError: # pragma: no cover.

83 logging.exception(

84 'Log of step "%s" is too large for urlfetch.', step_name)

85 # If the stdio log of a step is too large, we don't want to pull it

86 # again in next run, because that might lead to DDoS to the master.

87 # TODO: Use archived stdio logs in Google Storage instead.

88 stdio_log = 'Stdio log is too large for urlfetch.'

89	141

90 if not stdio_log: # pragma: no cover	142 # Save step log in datastore and avoid downloading again during retry.

91 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'

92 % (step_name, master_name))

93

94 # Save stdio in datastore and avoid downloading again during retry.

95 if not step: # pragma: no cover	143 if not step: # pragma: no cover

96 step = WfStep.Create(	144 step = WfStep.Create(

97 master_name, builder_name, build_number, step_name)	145 master_name, builder_name, build_number, step_name)

98	146

99 step.log_data = self._ExtractStorablePortionOfLog(stdio_log)	147 step.log_data = self._ExtractStorablePortionOfLog(failure_log)

	148

100 try:	149 try:

101 step.put()	150 step.put()

102 except Exception as e: # pragma: no cover	151 except Exception as e: # pragma: no cover

103 # Sometimes, the stdio log is too large to save in datastore.	152 # Sometimes, the step log is too large to save in datastore.

104 logging.exception(e)	153 logging.exception(e)

105	154

106 # TODO: save result in datastore?	155 # TODO: save result in datastore?

107 signals[step_name] = extractors.ExtractSignal(	156 signals[step_name] = extractors.ExtractSignal(

108 master_name, builder_name, step_name, None, stdio_log).ToDict()	157 master_name, builder_name, step_name, None, failure_log).ToDict()

109	158

110 return signals	159 return signals

OLD	NEW

« no previous file with comments | « appengine/findit/waterfall/buildbot.py ('k') | appengine/findit/waterfall/test/buildbot_test.py » ('j') | no next file with comments »