Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: appengine/findit/waterfall/extract_signal_pipeline.py

Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Use cStringIO to pull the reliable test failures. Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import cStringIO
5 import logging 6 import logging
7 import json
6 8
7 from google.appengine.api.urlfetch import ResponseTooLargeError 9 from google.appengine.api.urlfetch import ResponseTooLargeError
8 10
9 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline 11 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline
10 12
11 from common.http_client_appengine import HttpClientAppengine as HttpClient 13 from common.http_client_appengine import HttpClientAppengine as HttpClient
12 from model.wf_step import WfStep 14 from model.wf_step import WfStep
13 from waterfall import buildbot 15 from waterfall import buildbot
14 from waterfall import extractors 16 from waterfall import extractors
15 from waterfall import lock_util 17 from waterfall import lock_util
(...skipping 22 matching lines...) Expand all
38 40
39 lines = log_data.split('\n') 41 lines = log_data.split('\n')
40 size = 0 42 size = 0
41 for line_index in reversed(range(len(lines))): 43 for line_index in reversed(range(len(lines))):
42 size += len(lines[line_index]) + 1 44 size += len(lines[line_index]) + 1
43 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT: 45 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT:
44 return '\n'.join(lines[line_index + 1:]) 46 return '\n'.join(lines[line_index + 1:])
45 else: 47 else:
46 return log_data # pragma: no cover - this won't be reached. 48 return log_data # pragma: no cover - this won't be reached.
47 49
50 @staticmethod
51 def _GetTestLevelFailures(gtest_result):
stgao 2015/05/22 01:30:37 Maybe rename it to _GetReliableTestFailureLog? or
52 """Analyze the step log and extract reliable failures only.
stgao 2015/05/22 01:30:37 not step log.
chanli 2015/05/22 18:43:27 I think it actually is the 'archived' step log. is
53
54 Args:
55 gtest_result (file): A JSON file for failed step log.
stgao 2015/05/22 01:30:37 This is not a file, it is a string in JSON format.
56
57 Returns:
58 A string contains the names of reliable test failures and output_snippets.
59 """
60
stgao 2015/05/22 01:30:37 no empty line here.
61 step_failure_data = json.loads(gtest_result)
62 sio = cStringIO.StringIO()
63 for iteration in step_failure_data['gtest_results']['per_iteration_data']:
64 for key in iteration.keys(): # Keys are test names.
stgao 2015/05/22 01:30:37 key -> test_name That makes the code more clear.
65 is_reliable_failure = True
66
67 for test in iteration[key]:
68 # We will ignore the test if some of the attempts were success.
69 if test['status'] == 'SUCCESS':
70 is_reliable_failure = False
71 break
72
73 if is_reliable_failure: # All attempts failed, it's a reliable failure.
stgao 2015/05/22 01:30:37 comment style.
74 for test in iteration[key]:
75 sio.write("'%s': %s\n" % (key, test['output_snippet']))
76
77 failed_test_log = sio.getvalue()
78 sio.close()
79
80 return failed_test_log
48 81
49 # Arguments number differs from overridden method - pylint: disable=W0221 82 # Arguments number differs from overridden method - pylint: disable=W0221
50 def run(self, failure_info): 83 def run(self, failure_info):
51 """ 84 """
52 Args: 85 Args:
53 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run(). 86 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run().
54 87
55 Returns: 88 Returns:
56 A dict like below: 89 A dict like below:
57 { 90 {
58 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(), 91 'step_name1': waterfall.failure_signal.FailureSignal.ToDict(),
59 ... 92 ...
60 } 93 }
61 """ 94 """
62 signals = {} 95 signals = {}
63 96
64 master_name = failure_info['master_name'] 97 master_name = failure_info['master_name']
65 builder_name = failure_info['builder_name'] 98 builder_name = failure_info['builder_name']
66 build_number = failure_info['build_number'] 99 build_number = failure_info['build_number']
67 for step_name in failure_info.get('failed_steps', []): 100 for step_name in failure_info.get('failed_steps', []):
68 step = WfStep.Get(master_name, builder_name, build_number, step_name) 101 step = WfStep.Get(master_name, builder_name, build_number, step_name)
69 if step and step.log_data: 102 if step and step.log_data:
70 stdio_log = step.log_data 103 test_failure_log = step.log_data
stgao 2015/05/22 01:30:37 failure_log? Because it could be compile step too,
71 else: 104 else:
72 if not lock_util.WaitUntilDownloadAllowed( 105 # TODO: add test level log info to signal.
73 master_name): # pragma: no cover 106 gtest_result = buildbot.GetGtestResultLog(
74 raise pipeline.Retry('Failed to pull stdio of step %s of master %s' 107 master_name, builder_name, build_number, step_name)
75 % (step_name, master_name)) 108 if gtest_result:
109 test_failure_log = self._GetTestLevelFailures(gtest_result)
110 else:
111 if not lock_util.WaitUntilDownloadAllowed(
112 master_name): # pragma: no cover
113 raise pipeline.Retry('Failed to pull log of step %s of master %s'
114 % (step_name, master_name))
115 try:
116 test_failure_log = buildbot.GetStepStdio(
117 master_name, builder_name, build_number, step_name,
118 self.HTTP_CLIENT)
119 except ResponseTooLargeError: # pragma: no cover.
120 logging.exception(
121 'Log of step "%s" is too large for urlfetch.', step_name)
122 # If the stdio log of a step is too large, we don't want to pull it
123 # again in next run, because that might lead to DDoS to the master.
stgao 2015/05/22 01:30:37 Please keep the original TODO here. See my last co
124 test_failure_log = 'Stdio log is too large for urlfetch.'
76 125
77 # TODO: do test-level analysis instead of step-level. 126 if not test_failure_log: # pragma: no cover
78 try: 127 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'
Sharu Jiang 2015/05/22 17:54:30 I think if we fail to get result from self._GetTes
chanli 2015/05/22 18:43:27 We will go to stdio log if we failed retrieving da
Sharu Jiang 2015/05/22 20:09:36 I mean even though we have gtest_result, we may no
79 stdio_log = buildbot.GetStepStdio( 128 % (step_name, master_name))
80 master_name, builder_name, build_number, step_name,
81 self.HTTP_CLIENT)
82 except ResponseTooLargeError: # pragma: no cover.
83 logging.exception(
84 'Log of step "%s" is too large for urlfetch.', step_name)
85 # If the stdio log of a step is too large, we don't want to pull it
86 # again in next run, because that might lead to DDoS to the master.
87 # TODO: Use archived stdio logs in Google Storage instead.
88 stdio_log = 'Stdio log is too large for urlfetch.'
89 129
90 if not stdio_log: # pragma: no cover 130 # Save step log in datastore and avoid downloading again during retry.
91 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'
92 % (step_name, master_name))
93
94 # Save stdio in datastore and avoid downloading again during retry.
95 if not step: # pragma: no cover 131 if not step: # pragma: no cover
96 step = WfStep.Create( 132 step = WfStep.Create(
97 master_name, builder_name, build_number, step_name) 133 master_name, builder_name, build_number, step_name)
98 134
99 step.log_data = self._ExtractStorablePortionOfLog(stdio_log) 135 step.log_data = self._ExtractStorablePortionOfLog(test_failure_log)
136
100 try: 137 try:
101 step.put() 138 step.put()
102 except Exception as e: # pragma: no cover 139 except Exception as e: # pragma: no cover
103 # Sometimes, the stdio log is too large to save in datastore. 140 # Sometimes, the step log is too large to save in datastore.
104 logging.exception(e) 141 logging.exception(e)
105 142
106 # TODO: save result in datastore? 143 # TODO: save result in datastore?
107 signals[step_name] = extractors.ExtractSignal( 144 signals[step_name] = extractors.ExtractSignal(
108 master_name, builder_name, step_name, None, stdio_log).ToDict() 145 master_name, builder_name, step_name, None,
146 str(test_failure_log)).ToDict()
stgao 2015/05/22 01:30:37 Why we need str here? Same reason as you told me?
chanli 2015/05/22 18:43:27 No... I forgot this one. Fixed.
109 147
110 return signals 148 return signals
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698