Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(358)

Side by Side Diff: appengine/findit/waterfall/extract_signal_pipeline.py

Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Fix name style nit. Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import cStringIO
5 import logging 6 import logging
7 import json
6 8
7 from google.appengine.api.urlfetch import ResponseTooLargeError 9 from google.appengine.api.urlfetch import ResponseTooLargeError
8 10
9 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline 11 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline
10 12
11 from common.http_client_appengine import HttpClientAppengine as HttpClient 13 from common.http_client_appengine import HttpClientAppengine as HttpClient
12 from model.wf_step import WfStep 14 from model.wf_step import WfStep
13 from waterfall import buildbot 15 from waterfall import buildbot
14 from waterfall import extractors 16 from waterfall import extractors
15 from waterfall import lock_util 17 from waterfall import lock_util
(...skipping 22 matching lines...) Expand all
38 40
39 lines = log_data.split('\n') 41 lines = log_data.split('\n')
40 size = 0 42 size = 0
41 for line_index in reversed(range(len(lines))): 43 for line_index in reversed(range(len(lines))):
42 size += len(lines[line_index]) + 1 44 size += len(lines[line_index]) + 1
43 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT: 45 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT:
44 return '\n'.join(lines[line_index + 1:]) 46 return '\n'.join(lines[line_index + 1:])
45 else: 47 else:
46 return log_data # pragma: no cover - this won't be reached. 48 return log_data # pragma: no cover - this won't be reached.
47 49
50 @staticmethod
51 def _GetReliableTestFailureLog(gtest_result):
52 """Analyze the archived gtest json results and extract reliable failures.
53
54 Args:
55 gtest_result (str): A JSON file for failed step log.
56
57 Returns:
58 A string contains the names of reliable test failures and related
59 log content.
60 If gtest_results in gtest json result is 'invalid', we will return
61 'invalid' as the result.
62 If we find out that all the test failures in this step are flaky, we will
63 return 'flaky' as result.
64 """
65 step_failure_data = json.loads(gtest_result)
66
67 if step_failure_data['gtest_results'] == 'invalid': # pragma: no cover
68 return 'invalid'
69
70 sio = cStringIO.StringIO()
71 for iteration in step_failure_data['gtest_results']['per_iteration_data']:
72 for test_name in iteration.keys():
73 is_reliable_failure = True
74
75 for test_run in iteration[test_name]:
76 # We will ignore the test if some of the attempts were success.
77 if test_run['status'] == 'SUCCESS':
78 is_reliable_failure = False
79 break
80
81 if is_reliable_failure: # all attempts failed
82 for test_run in iteration[test_name]:
83 sio.write("'%s': %s\n" % (test_name, test_run['output_snippet']))
84
85 failed_test_log = sio.getvalue()
86 sio.close()
87
88 if not failed_test_log:
89 return 'flaky'
90
91 return failed_test_log
48 92
49 # Arguments number differs from overridden method - pylint: disable=W0221 93 # Arguments number differs from overridden method - pylint: disable=W0221
50 def run(self, failure_info): 94 def run(self, failure_info):
51 """ 95 """
52 Args: 96 Args:
53 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run(). 97 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run().
54 98
55 Returns: 99 Returns:
56 A dict like below: 100 A dict like below:
57 { 101 {
58 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(), 102 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(),
59 ... 103 ...
60 } 104 }
61 """ 105 """
62 signals = {} 106 signals = {}
63 107
64 master_name = failure_info['master_name'] 108 master_name = failure_info['master_name']
65 builder_name = failure_info['builder_name'] 109 builder_name = failure_info['builder_name']
66 build_number = failure_info['build_number'] 110 build_number = failure_info['build_number']
67 for step_name in failure_info.get('failed_steps', []): 111 for step_name in failure_info.get('failed_steps', []):
68 step = WfStep.Get(master_name, builder_name, build_number, step_name) 112 step = WfStep.Get(master_name, builder_name, build_number, step_name)
69 if step and step.log_data: 113 if step and step.log_data:
70 stdio_log = step.log_data 114 failure_log = step.log_data
71 else: 115 else:
72 if not lock_util.WaitUntilDownloadAllowed( 116 # TODO: do test-level analysis instead of step-level.
73 master_name): # pragma: no cover 117 gtest_result = buildbot.GetGtestResultLog(
74 raise pipeline.Retry('Failed to pull stdio of step %s of master %s' 118 master_name, builder_name, build_number, step_name)
75 % (step_name, master_name)) 119 if gtest_result:
120 failure_log = self._GetReliableTestFailureLog(gtest_result)
121 if gtest_result is None or failure_log == 'invalid':
122 if not lock_util.WaitUntilDownloadAllowed(
123 master_name): # pragma: no cover
124 raise pipeline.Retry('Failed to pull log of step %s of master %s'
125 % (step_name, master_name))
126 try:
127 failure_log = buildbot.GetStepStdio(
128 master_name, builder_name, build_number, step_name,
129 self.HTTP_CLIENT)
130 except ResponseTooLargeError: # pragma: no cover.
131 logging.exception(
132 'Log of step "%s" is too large for urlfetch.', step_name)
133 # If the stdio log of a step is too large, we don't want to pull it
134 # again in next run, because that might lead to DDoS to the master.
135 # TODO: Use archived stdio logs in Google Storage instead.
136 failure_log = 'Stdio log is too large for urlfetch.'
76 137
77 # TODO: do test-level analysis instead of step-level. 138 if not failure_log: # pragma: no cover
78 try: 139 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'
79 stdio_log = buildbot.GetStepStdio( 140 % (step_name, master_name))
80 master_name, builder_name, build_number, step_name,
81 self.HTTP_CLIENT)
82 except ResponseTooLargeError: # pragma: no cover.
83 logging.exception(
84 'Log of step "%s" is too large for urlfetch.', step_name)
85 # If the stdio log of a step is too large, we don't want to pull it
86 # again in next run, because that might lead to DDoS to the master.
87 # TODO: Use archived stdio logs in Google Storage instead.
88 stdio_log = 'Stdio log is too large for urlfetch.'
89 141
90 if not stdio_log: # pragma: no cover 142 # Save step log in datastore and avoid downloading again during retry.
91 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'
92 % (step_name, master_name))
93
94 # Save stdio in datastore and avoid downloading again during retry.
95 if not step: # pragma: no cover 143 if not step: # pragma: no cover
96 step = WfStep.Create( 144 step = WfStep.Create(
97 master_name, builder_name, build_number, step_name) 145 master_name, builder_name, build_number, step_name)
98 146
99 step.log_data = self._ExtractStorablePortionOfLog(stdio_log) 147 step.log_data = self._ExtractStorablePortionOfLog(failure_log)
148
100 try: 149 try:
101 step.put() 150 step.put()
102 except Exception as e: # pragma: no cover 151 except Exception as e: # pragma: no cover
103 # Sometimes, the stdio log is too large to save in datastore. 152 # Sometimes, the step log is too large to save in datastore.
104 logging.exception(e) 153 logging.exception(e)
105 154
106 # TODO: save result in datastore? 155 # TODO: save result in datastore?
107 signals[step_name] = extractors.ExtractSignal( 156 signals[step_name] = extractors.ExtractSignal(
108 master_name, builder_name, step_name, None, stdio_log).ToDict() 157 master_name, builder_name, step_name, None, failure_log).ToDict()
109 158
110 return signals 159 return signals
OLDNEW
« no previous file with comments | « appengine/findit/waterfall/buildbot.py ('k') | appengine/findit/waterfall/test/buildbot_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698