Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(620)

Side by Side Diff: appengine/findit/waterfall/detect_first_failure_pipeline.py

Issue 820113002: [Findit] Add a sub-pipeline to detect first-known failure. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Address comments Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import collections
6 from datetime import datetime
7 import logging
8 import random
9 import time
10
11 from google.appengine.api import memcache
12
13 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline
14
15 from common.http_client_appengine import HttpClientAppengine as HttpClient
16 from model.build import Build
17 from model.build_analysis import BuildAnalysis
18 from waterfall import buildbot
19 from waterfall.base_pipeline import BasePipeline
20 from waterfall import lock_util
21
22
23 _MAX_BUILDS_TO_CHECK = 20
24
25
26 class DetectFirstFailurePipeline(BasePipeline):
27 """ A pipeline to detect first failure of each step.
28
29 TODO(stgao): do test-level detection for gtest.
30
31 Input:
32 master_name
33 builder_name
34 build_number
35
36 Output:
37 A json like below:
38 {
39 "master_name": "chromium.gpu",
40 "builder_name": "GPU Linux Builder"
41 "build_number": 25410,
42 "failed": true,
43 "failed_steps": {
44 "compile": {
45 "last_pass": 25408,
46 "current_failure": 25410,
47 "first_failure": 25409
48 }
49 },
50 "builds": {
51 "25408": {
52 "chromium_revision": "474ab324d17d2cd198d3fb067cabc10a775a8df7"
53 "blame_list": [
54 "474ab324d17d2cd198d3fb067cabc10a775a8df7"
55 ],
56 },
57 "25409": {
58 "chromium_revision": "33c6f11de20c5b229e102c51237d96b2d2f1be04"
59 "blame_list": [
60 "9d5ebc5eb14fc4b3823f6cfd341da023f71f49dd",
61 ...
62 ],
63 },
64 "25410": {
65 "chromium_revision": "4bffcd598dd89e0016208ce9312a1f477ff105d1"
66 "blame_list": [
67 "b98e0b320d39a323c81cc0542e6250349183a4df",
68 ...
69 ],
70 }
71 }
72 }
73 """
74
75 HTTP_CLIENT = HttpClient()
76
77 def _BuildDataNeedUpdating(self, build):
78 return (not build.data or (not build.completed and
79 (datetime.utcnow() - build.last_crawled_time).total_seconds() >= 300))
80
81 def _DownloadBuildData(self, master_name, builder_name, build_number):
82 """Downloads build data and returns a Build instance."""
83 build = Build.GetBuild(master_name, builder_name, build_number)
84 if not build:
85 build = Build.CreateBuild(master_name, builder_name, build_number)
86
87 # Cache the data to avoid pulling from master again.
88 if self._BuildDataNeedUpdating(build):
89 if not lock_util.WaitUntilDownloadAllowed(
90 master_name): # pragma: no cover
91 raise pipeline.Retry('Too many download from %s' % master_name)
92
93 build.data = buildbot.GetBuildData(
94 build.master_name, build.builder_name, build.build_number,
95 self.HTTP_CLIENT)
96 build.last_crawled_time = datetime.utcnow()
97 build.put()
98
99 return build
100
101 def _ExtractBuildInfo(self, master_name, builder_name, build_number):
102 """Returns a BuildInfo instance for the specified build."""
103 build = self._DownloadBuildData(master_name, builder_name, build_number)
104 if not build.data: # pragma: no cover
105 return None
106
107 build_info = buildbot.ExtractBuildInfo(
108 master_name, builder_name, build_number, build.data)
109
110 if not build.completed:
111 build.start_time = build_info.build_start_time
112 build.completed = build_info.completed
113 build.result = build_info.result
114 build.put()
115
116 analysis = BuildAnalysis.GetBuildAnalysis(
117 master_name, builder_name, build_number)
118 if analysis and not analysis.build_start_time:
119 analysis.build_start_time = build_info.build_start_time
120 analysis.put()
121
122 return build_info
123
124 def _SaveBlamelistAndChromiumRevisionIntoDict(self, build_info, builds):
125 """
126 Args:
127 build_info (BuildInfo): a BuildInfo instance which contains blame list and
128 chromium revision.
129 builds (dict): to which the blame list and chromium revision is saved. It
130 will be updated and looks like:
131 {
132 555 : {
133 'chromium_revision': 'a_git_hash',
134 'blame_list': ['git_hash1', 'git_hash2'],
135 },
136 }
137 """
138 builds[build_info.build_number] = {
139 'chromium_revision': build_info.chromium_revision,
140 'blame_list': build_info.blame_list
141 }
142
143 def _CreateADictOfFailedSteps(self, build_info):
144 """ Returns a dict with build number for failed steps.
145
146 Args:
147 failed_steps (list): a list of failed steps.
148
149 Returns:
150 A dict like this:
151 {
152 'step_name': {
153 'current_failure': 555,
154 'first_failure': 553,
155 },
156 }
157 """
158 failed_steps = dict()
159 for step_name in build_info.failed_steps:
160 failed_steps[step_name] = {
161 'current_failure': build_info.build_number,
162 'first_failure': build_info.build_number,
163 }
164
165 return failed_steps
166
167 def _CheckForFirstKnownFailure(self, master_name, builder_name, build_number,
168 failed_steps, builds):
169 """Checks for first known failures of the given failed steps.
170
171 Args:
172 master_name (str): master of the failed build.
173 builder_name (str): builder of the failed build.
174 build_number (int): builder number of the current failed build.
175 failed_steps (dict): the failed steps of the current failed build. It will
176 be updated with build numbers for 'first_failure' and 'last_pass' of
177 each failed step.
178 builds (dict): a dict to save blame list and chromium revision.
179 """
180 # Look back for first known failures.
181 for i in range(_MAX_BUILDS_TO_CHECK): # limit not hit - pragma: no cover
182 build_info = self._ExtractBuildInfo(
183 master_name, builder_name, build_number - i - 1)
184
185 if not build_info: # pragma: no cover
186 # Failed to extract the build information, bail out.
187 return
188
189 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds)
190
191 if build_info.result == buildbot.SUCCESS:
192 for step_name in failed_steps:
193 if 'last_pass' not in failed_steps[step_name]:
194 failed_steps[step_name]['last_pass'] = build_info.build_number
195
196 # All steps passed, so stop looking back.
197 return
198 else:
199 # If a step is not run due to some bot exception, we are not sure
200 # whether the step could pass or not. So we only check failed/passed
201 # steps here.
202
203 for step_name in build_info.failed_steps:
204 if step_name in failed_steps:
205 failed_steps[step_name]['first_failure'] = build_info.build_number
206
207 for step_name in failed_steps:
208 if step_name in build_info.passed_steps:
209 failed_steps[step_name]['last_pass'] = build_info.build_number
210
211 if all('last_pass' in step_info for step_info in failed_steps.values()):
212 # All failed steps passed in this build cycle.
213 return
214
215 # Arguments number differs from overridden method - pylint: disable=W0221
216 def run(self, master_name, builder_name, build_number):
217 build_info = self._ExtractBuildInfo(master_name, builder_name, build_number)
218
219 if not build_info: # pragma: no cover
220 raise pipeline.Retry('Failed to extract build info.')
221
222 failure_info = {
223 'failed': True,
224 'master_name': master_name,
225 'builder_name': builder_name,
226 'build_number': build_number
227 }
228
229 if (build_info.result == buildbot.SUCCESS or
230 not build_info.failed_steps):
231 failure_info['failed'] = False
232 return failure_info
233
234 builds = dict()
235 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds)
236
237 failed_steps = self._CreateADictOfFailedSteps(build_info)
238
239 self._CheckForFirstKnownFailure(
240 master_name, builder_name, build_number, failed_steps, builds)
241
242 failure_info['builds'] = builds
243 failure_info['failed_steps'] = failed_steps
244 return failure_info
OLDNEW
« no previous file with comments | « appengine/findit/waterfall/build_failure_analysis_pipelines.py ('k') | appengine/findit/waterfall/lock_util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698