Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: appengine/findit/waterfall/extract_signal_pipeline.py

Issue 1149743002: [Findit] Use step level analysis to exclude flaky test failures. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import json
6 7
7 from google.appengine.api.urlfetch import ResponseTooLargeError 8 from google.appengine.api.urlfetch import ResponseTooLargeError
8 9
9 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline 10 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline
10 11
11 from common.http_client_appengine import HttpClientAppengine as HttpClient 12 from common.http_client_appengine import HttpClientAppengine as HttpClient
12 from model.wf_step import WfStep 13 from model.wf_step import WfStep
13 from waterfall import buildbot 14 from waterfall import buildbot
14 from waterfall import extractors 15 from waterfall import extractors
15 from waterfall import lock_util 16 from waterfall import lock_util
(...skipping 22 matching lines...) Expand all
38 39
39 lines = log_data.split('\n') 40 lines = log_data.split('\n')
40 size = 0 41 size = 0
41 for line_index in reversed(range(len(lines))): 42 for line_index in reversed(range(len(lines))):
42 size += len(lines[line_index]) + 1 43 size += len(lines[line_index]) + 1
43 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT: 44 if size > ExtractSignalPipeline.LOG_DATA_BYTE_LIMIT:
44 return '\n'.join(lines[line_index + 1:]) 45 return '\n'.join(lines[line_index + 1:])
45 else: 46 else:
46 return log_data # pragma: no cover - this won't be reached. 47 return log_data # pragma: no cover - this won't be reached.
47 48
49 @staticmethod
50 def _GetTestLevelFailures(step_log):
51 """Analyze the step log and extract reliable failures only.
52
53 Args:
54 step_log (file): A JSON file for failed step log.
55
56 Returns:
57 A dict like below:
58 {
59 'test_name1': [
60 {
61 "elapsed_time_ms": ..,
62 "losless_snippet": ..,
63 "output_snippet": ..,
64 "status": "FAILURE",
65 "output_snippet_base64":..
66 },
67 ..
68 ],
69 ..
70 }
71 """
72 failed_test_log = {}
73 step_failure_data = json.loads(step_log)
74
75 for iteration in step_failure_data['gtest_results']['per_iteration_data']:
76 for key in iteration.keys(): # Keys are test names.
77 is_reliable_failure = True
78
79 for test in iteration[key]:
80 # We will ignore the test if one of the attempts passes.
81 if test['status'] != 'FAILURE':
stgao 2015/05/21 00:29:56 This code doesn't match the comment. I think statu
82 is_reliable_failure = False
83 break
84
85 if is_reliable_failure: # All attempts failed, it's a reliable failure.
86 failed_test_log[key] = iteration[key]
87
88 return failed_test_log
48 89
49 # Arguments number differs from overridden method - pylint: disable=W0221 90 # Arguments number differs from overridden method - pylint: disable=W0221
50 def run(self, failure_info): 91 def run(self, failure_info):
51 """ 92 """
52 Args: 93 Args:
53 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run(). 94 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run().
54 95
55 Returns: 96 Returns:
56 A dict like below: 97 A dict like below:
57 { 98 {
58 'step_name1': waterfall.failure_signal.FailureSignal.ToJson(), 99 'step_name1': waterfall.failure_signal.FailureSignal.ToJson(),
59 ... 100 ...
60 } 101 }
61 """ 102 """
62 signals = {} 103 signals = {}
63 104
64 master_name = failure_info['master_name'] 105 master_name = failure_info['master_name']
65 builder_name = failure_info['builder_name'] 106 builder_name = failure_info['builder_name']
66 build_number = failure_info['build_number'] 107 build_number = failure_info['build_number']
67 for step_name in failure_info.get('failed_steps', []): 108 for step_name in failure_info.get('failed_steps', []):
68 step = WfStep.Get(master_name, builder_name, build_number, step_name) 109 step = WfStep.Get(master_name, builder_name, build_number, step_name)
69 if step and step.log_data: 110 if step and step.log_data:
70 stdio_log = step.log_data 111 test_failure_log = step.log_data
71 else: 112 else:
72 if not lock_util.WaitUntilDownloadAllowed( 113 if not lock_util.WaitUntilDownloadAllowed(
stgao 2015/05/21 00:29:56 We don't have to wait unless the request is to the
73 master_name): # pragma: no cover 114 master_name): # pragma: no cover
74 raise pipeline.Retry('Failed to pull stdio of step %s of master %s' 115 raise pipeline.Retry('Failed to pull log of step %s of master %s'
75 % (step_name, master_name)) 116 % (step_name, master_name))
76 117
77 # TODO: do test-level analysis instead of step-level. 118 # TODO: Add test level log info to signal
stgao 2015/05/21 00:29:56 style.
78 try: 119 step_log = buildbot.GetGsStepLog(
79 stdio_log = buildbot.GetStepStdio( 120 master_name, builder_name, build_number, step_name)
80 master_name, builder_name, build_number, step_name, 121 if step_log:
81 self.HTTP_CLIENT) 122 test_failure_log = str(self._GetTestLevelFailures(step_log))
stgao 2015/05/21 00:29:56 This is a little hacky. How about using StringIO
82 except ResponseTooLargeError: # pragma: no cover. 123 else:
83 logging.exception( 124 try:
84 'Log of step "%s" is too large for urlfetch.', step_name) 125 test_failure_log = buildbot.GetStepStdio(
85 # If the stdio log of a step is too large, we don't want to pull it 126 master_name, builder_name, build_number, step_name,
86 # again in next run, because that might lead to DDoS to the master. 127 self.HTTP_CLIENT)
87 # TODO: Use archived stdio logs in Google Storage instead. 128 except ResponseTooLargeError: # pragma: no cover.
stgao 2015/05/21 00:29:56 Please still keep this TODO. Because we still pull
88 stdio_log = 'Stdio log is too large for urlfetch.' 129 logging.exception(
130 'Log of step "%s" is too large for urlfetch.', step_name)
131 # If the stdio log of a step is too large, we don't want to pull it
132 # again in next run, because that might lead to DDoS to the master.
133 test_failure_log = 'Stdio log is too large for urlfetch.'
89 134
90 if not stdio_log: # pragma: no cover 135 if not test_failure_log: # pragma: no cover
91 raise pipeline.Retry('Failed to pull stdio of step %s of master %s' 136 raise pipeline.Retry('Failed to pull stdio of step %s of master %s'
92 % (step_name, master_name)) 137 % (step_name, master_name))
93 138
94 # Save stdio in datastore and avoid downloading again during retry. 139 # Save step log in datastore and avoid downloading again during retry.
95 if not step: # pragma: no cover 140 if not step: # pragma: no cover
96 step = WfStep.Create( 141 step = WfStep.Create(
97 master_name, builder_name, build_number, step_name) 142 master_name, builder_name, build_number, step_name)
98 143
99 step.log_data = self._ExtractStorablePortionOfLog(stdio_log) 144 step.log_data = self._ExtractStorablePortionOfLog(test_failure_log)
145
100 try: 146 try:
101 step.put() 147 step.put()
102 except Exception as e: # pragma: no cover 148 except Exception as e: # pragma: no cover
103 # Sometimes, the stdio log is too large to save in datastore. 149 # Sometimes, the step log is too large to save in datastore.
104 logging.exception(e) 150 logging.exception(e)
105 151
106 # TODO: save result in datastore? 152 # TODO: save result in datastore?
107 signals[step_name] = extractors.ExtractSignal( 153 signals[step_name] = extractors.ExtractSignal(
108 master_name, builder_name, step_name, None, stdio_log).ToJson() 154 master_name, builder_name, step_name, None,
155 str(test_failure_log)).ToJson()
109 156
110 return signals 157 return signals
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698