OLD | NEW |
---|---|
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import collections | |
6 from datetime import datetime | |
5 import logging | 7 import logging |
8 import random | |
9 import time | |
6 | 10 |
11 from google.appengine.api import memcache | |
7 from google.appengine.ext import ndb | 12 from google.appengine.ext import ndb |
8 | 13 |
9 from pipeline_utils import pipelines | 14 from pipeline_utils import pipelines |
15 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline | |
10 | 16 |
17 from common.http_client_appengine import HttpClientAppengine as HttpClient | |
11 from model.build import Build | 18 from model.build import Build |
19 from model.build_analysis import BuildAnalysis | |
12 from model.build_analysis_status import BuildAnalysisStatus | 20 from model.build_analysis_status import BuildAnalysisStatus |
21 from waterfall import buildbot | |
13 | 22 |
14 | 23 |
15 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. | 24 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. |
16 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover | 25 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover |
17 def run_test(self, *args, **kwargs): | 26 def run_test(self, *args, **kwargs): |
18 pass | 27 pass |
19 | 28 |
20 def finalized_test(self, *args, **kwargs): | 29 def finalized_test(self, *args, **kwargs): |
21 pass | 30 pass |
22 | 31 |
23 def callback(self, **kwargs): | 32 def callback(self, **kwargs): |
24 pass | 33 pass |
25 | 34 |
26 def run(self, *args, **kwargs): | 35 def run(self, *args, **kwargs): |
27 raise NotImplementedError() | 36 raise NotImplementedError() |
28 | 37 |
29 | 38 |
30 class BuildFailurePipeline(BasePipeline): | 39 _MEMCACHE_MASTER_DOWNLOAD_LOCK = 'master-download-lock-%s' |
40 _MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS = 60 * 60 | |
41 _DOWNLOAD_INTERVAL_SECONDS = 5 | |
42 _MAX_BUILDS_TO_CHECK_FOR_FIRST_FAILURE = 20 | |
43 | |
44 | |
45 def _WaitUntilDownloadAllowed( | |
46 master_name, timeout_seconds=90): # pragma: no cover | |
47 """Waits until next download from the specified master is allowed. | |
48 | |
49 Returns: | |
50 True if download is allowed to proceed. | |
51 False if download is not allowed until the given timeout occurs. | |
52 """ | |
53 client = memcache.Client() | |
54 key = _MEMCACHE_MASTER_DOWNLOAD_LOCK % master_name | |
55 | |
56 deadline = time.time() + timeout_seconds | |
57 while True: | |
58 info = client.gets(key) | |
59 if not info or time.time() - info['time'] >= _DOWNLOAD_INTERVAL_SECONDS: | |
60 new_info = { | |
61 'time': time.time() | |
62 } | |
63 if not info: | |
64 success = client.add( | |
65 key, new_info, time=_MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
66 else: | |
67 success = client.cas( | |
68 key, new_info, time=_MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
69 | |
70 if success: | |
71 logging.info('Download from %s is allowed. Waited %s seconds.', | |
72 master_name, (time.time() + timeout_seconds - deadline)) | |
73 return True | |
74 | |
75 if time.time() > deadline: | |
76 logging.info('Download from %s is not allowed. Waited %s seconds.', | |
77 master_name, timeout_seconds) | |
78 return False | |
79 | |
80 logging.info('Waiting to download from %s', master_name) | |
81 time.sleep(_DOWNLOAD_INTERVAL_SECONDS + random.random()) | |
82 | |
83 | |
84 class DetectFirstFailurePipeline(BasePipeline): | |
stgao
2015/01/06 23:54:26
I'm planning to move sub-pipelines to separate mod
stgao
2015/01/09 01:28:23
Done.
| |
85 HTTP_CLIENT = HttpClient() | |
86 | |
87 def _BuildDataNeedUpdating(self, build): | |
88 return (not build.data or (not build.completed and | |
89 (datetime.utcnow() - build.last_crawled_time).total_seconds >= 60 * 5)) | |
90 | |
91 def _DownloadBuildData(self, master_name, builder_name, build_number): | |
92 """Downloads build data and returns a Build instance.""" | |
93 build = Build.GetBuild(master_name, builder_name, build_number) | |
94 if not build: # pragma: no cover | |
95 build = Build.CreateBuild(master_name, builder_name, build_number) | |
96 | |
97 # Cache the data to avoid pulling from master again. | |
98 if self._BuildDataNeedUpdating(build): # pragma: no cover | |
99 if not _WaitUntilDownloadAllowed(master_name): # pragma: no cover | |
100 raise pipeline.Retry('Too many download from %s' % master_name) | |
101 | |
102 build.data = buildbot.GetBuildData( | |
103 build.master_name, build.builder_name, build.build_number, | |
104 self.HTTP_CLIENT) | |
105 build.last_crawled_time = datetime.utcnow() | |
106 build.put() | |
107 | |
108 return build | |
109 | |
110 def _ExtractBuildInfo(self, master_name, builder_name, build_number): | |
111 """Returns a BuildInfo instance for the specified build.""" | |
112 build = self._DownloadBuildData(master_name, builder_name, build_number) | |
113 if not build.data: # pragma: no cover | |
114 return None | |
115 | |
116 build_info = buildbot.ExtractBuildInfo( | |
117 master_name, builder_name, build_number, build.data) | |
118 | |
119 if not build.completed: # pragma: no cover | |
120 build.start_time = build_info.build_start_time | |
121 build.completed = build_info.completed | |
122 build.result = build_info.result | |
123 build.put() | |
124 | |
125 analysis = BuildAnalysis.GetBuildAnalysis( | |
126 master_name, builder_name, build_number) | |
127 if analysis and not analysis.build_start_time: | |
128 analysis.build_start_time = build_info.build_start_time | |
129 analysis.put() | |
130 | |
131 return build_info | |
132 | |
133 def _SaveBlamelistAndChromiumRevisionIntoDict(self, build_info, builds): | |
134 """ | |
135 Args: | |
136 build_info (BuildInfo): a BuildInfo instance which contains blame list and | |
137 chromium revision. | |
138 builds (dict): to which the blame list and chromium revision is saved. It | |
139 will be updated and looks like: | |
140 { | |
141 555 : { | |
142 'chromium_revision': 'a_git_hash', | |
143 'blame_list': ['git_hash1', 'git_hash2'], | |
144 }, | |
145 } | |
146 """ | |
147 builds[build_info.build_number] = { | |
148 'chromium_revision': build_info.chromium_revision, | |
149 'blame_list': build_info.blame_list | |
150 } | |
151 | |
152 def _CreateADictOfFailedSteps(self, build_info): | |
153 """ Returns a dict with build number for failed steps. | |
154 | |
155 Args: | |
156 failed_steps (list): a list of failed steps. | |
157 | |
158 Returns: | |
159 A dict like this: | |
160 { | |
161 'step_name': { | |
162 'current_failure': 555, | |
163 'first_failure': 553, | |
164 }, | |
165 } | |
166 """ | |
167 failed_steps = dict() | |
168 for step_name in build_info.failed_steps: | |
169 failed_steps[step_name] = { | |
170 'current_failure': build_info.build_number, | |
171 'first_failure': build_info.build_number, | |
172 } | |
173 | |
174 return failed_steps | |
175 | |
176 def _CheckForFirstKnownFailure(self, master_name, builder_name, build_number, | |
177 failed_steps, builds): | |
178 """Checks for first known failures of the given failed steps. | |
179 | |
180 Args: | |
181 master_name (str): master of the failed build. | |
182 builder_name (str): builder of the failed build. | |
183 build_number (int): builder number of the current failed build. | |
184 failed_steps (dict): the failed steps of the current failed build. It will | |
185 be updated with build numbers for 'first_failure' and 'last_pass' of | |
186 each failed step. | |
187 builds (dict): a dict to save blame list and chromium revision. | |
188 """ | |
189 # Look back for first known failures. | |
190 for i in range(_MAX_BUILDS_TO_CHECK_FOR_FIRST_FAILURE): | |
191 build_info = self._ExtractBuildInfo( | |
192 master_name, builder_name, build_number - i - 1) | |
193 | |
194 if not build_info: # pragma: no cover | |
195 # Failed to extract the build information, bail out. | |
196 return | |
197 | |
198 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) | |
199 | |
200 if build_info.result == buildbot.SUCCESS: | |
201 for step_name in failed_steps: | |
202 if 'last_pass' not in failed_steps[step_name]: | |
203 failed_steps[step_name]['last_pass'] = build_info.build_number | |
204 | |
205 # All steps passed, so stop looking back. | |
206 return | |
207 else: | |
208 # If a step is not run due to some bot exception, we are not sure | |
209 # whether the step could pass or not. So we only check failed/passed | |
210 # steps here. | |
211 | |
212 for step_name in build_info.failed_steps: | |
213 if step_name in failed_steps: | |
214 failed_steps[step_name]['first_failure'] = build_info.build_number | |
215 | |
216 for step_name in failed_steps: | |
217 if step_name in build_info.passed_steps: | |
218 failed_steps[step_name]['last_pass'] = build_info.build_number | |
219 | |
220 if all('last_pass' in step_info for step_info in failed_steps.values()): | |
221 # All failed steps passed in this build cycle. | |
222 return # pragma: no cover | |
31 | 223 |
32 # Arguments number differs from overridden method - pylint: disable=W0221 | 224 # Arguments number differs from overridden method - pylint: disable=W0221 |
33 def run(self, master_name, builder_name, build_number): | 225 def run(self, master_name, builder_name, build_number): |
34 build = Build.GetBuild(master_name, builder_name, build_number) | 226 build_info = self._ExtractBuildInfo(master_name, builder_name, build_number) |
35 | 227 |
36 # TODO: implement the logic. | 228 if not build_info: # pragma: no cover |
37 build.analysis_status = BuildAnalysisStatus.ANALYZED | 229 raise pipeline.Retry('Failed to extract build info.') |
38 build.put() | 230 |
231 failure_info = { | |
232 'failed': True, | |
233 'master_name': master_name, | |
234 'builder_name': builder_name, | |
235 'build_number': build_number | |
236 } | |
237 | |
238 if (build_info.result == buildbot.SUCCESS or | |
239 not build_info.failed_steps): # pragma: no cover | |
240 failure_info['failed'] = False | |
241 return failure_info | |
242 | |
243 builds = dict() | |
244 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) | |
245 | |
246 failed_steps = self._CreateADictOfFailedSteps(build_info) | |
247 | |
248 self._CheckForFirstKnownFailure( | |
249 master_name, builder_name, build_number, failed_steps, builds) | |
250 | |
251 failure_info['builds'] = builds | |
252 failure_info['failed_steps'] = failed_steps | |
253 return failure_info | |
254 | |
255 | |
256 class BuildFailurePipeline(BasePipeline): | |
257 | |
258 def __init__(self, master_name, builder_name, build_number): | |
259 super(BuildFailurePipeline, self).__init__( | |
260 master_name, builder_name, build_number) | |
261 self.master_name = master_name | |
262 self.builder_name = builder_name | |
263 self.build_number = build_number | |
264 | |
265 def finalized(self): | |
266 analysis = BuildAnalysis.GetBuildAnalysis( | |
267 self.master_name, self.builder_name, self.build_number) | |
268 if self.was_aborted: # pragma: no cover | |
269 analysis.status = BuildAnalysisStatus.ERROR | |
270 else: | |
271 analysis.status = BuildAnalysisStatus.ANALYZED | |
272 analysis.put() | |
273 | |
274 # Arguments number differs from overridden method - pylint: disable=W0221 | |
275 def run(self, master_name, builder_name, build_number): | |
276 analysis = BuildAnalysis.GetBuildAnalysis( | |
277 master_name, builder_name, build_number) | |
278 analysis.pipeline_url = self.pipeline_status_url() | |
279 analysis.status = BuildAnalysisStatus.ANALYZING | |
280 analysis.start_time = datetime.utcnow() | |
281 analysis.put() | |
282 | |
283 yield DetectFirstFailurePipeline(master_name, builder_name, build_number) | |
39 | 284 |
40 | 285 |
41 @ndb.transactional | 286 @ndb.transactional |
42 def NeedANewAnalysis(master_name, builder_name, build_number, force): | 287 def NeedANewAnalysis(master_name, builder_name, build_number, force): |
43 """Check analysis status of a build and decide if a new analysis is needed. | 288 """Checks status of analysis for the build and decides if a new one is needed. |
289 | |
290 A BuildAnalysis entity for the given build will be created if none exists. | |
44 | 291 |
45 Returns: | 292 Returns: |
46 (build, need_analysis) | 293 True if an analysis is needed, otherwise False. |
47 build (Build): the build as specified by the input. | |
48 need_analysis (bool): True if an analysis is needed, otherwise False. | |
49 """ | 294 """ |
50 build_key = Build.CreateKey(master_name, builder_name, build_number) | 295 analysis = BuildAnalysis.GetBuildAnalysis( |
51 build = build_key.get() | 296 master_name, builder_name, build_number) |
52 | 297 |
53 if not build: | 298 if not analysis: |
54 build = Build.CreateBuild(master_name, builder_name, build_number) | 299 analysis = BuildAnalysis.CreateBuildAnalysis( |
55 build.analysis_status = BuildAnalysisStatus.PENDING | 300 master_name, builder_name, build_number) |
56 build.put() | 301 analysis.status = BuildAnalysisStatus.PENDING |
57 return build, True | 302 analysis.put() |
303 return True | |
58 elif force: | 304 elif force: |
59 # TODO: avoid concurrent analysis. | 305 # TODO: avoid concurrent analysis. |
60 build.Reset() | 306 analysis.Reset() |
61 build.put() | 307 analysis.put() |
62 return build, True | 308 return True |
63 else: | 309 else: |
64 # TODO: support following cases | 310 # TODO: support following cases |
65 # 1. Automatically retry if last analysis failed with errors. | 311 # 1. Automatically retry if last analysis failed with errors. |
66 # 2. Start another analysis if the build cycle wasn't completed in last | 312 # 2. Start another analysis if the build cycle wasn't completed in last |
67 # analysis request. | 313 # analysis request. |
68 # 3. Analysis is not complete and no update in the last 5 minutes. | 314 # 3. Analysis is not complete and no update in the last 5 minutes. |
69 return build, False | 315 return False |
70 | 316 |
71 | 317 |
72 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, | 318 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, |
73 queue_name): | 319 queue_name): |
74 """Schedule an analysis if needed and return the build.""" | 320 """Schedules an analysis if needed and returns the build analysis.""" |
75 build, need_new_analysis = NeedANewAnalysis( | 321 if NeedANewAnalysis(master_name, builder_name, build_number, force): |
76 master_name, builder_name, build_number, force) | |
77 | |
78 if need_new_analysis: | |
79 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) | 322 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) |
80 pipeline_job.start(queue_name=queue_name) | 323 pipeline_job.start(queue_name=queue_name) |
81 | 324 |
82 logging.info('An analysis triggered on build %s, %s, %s: %s', | 325 logging.info('An analysis triggered on build %s, %s, %s: %s', |
83 master_name, builder_name, build_number, | 326 master_name, builder_name, build_number, |
84 pipeline_job.pipeline_status_url()) | 327 pipeline_job.pipeline_status_url()) |
85 else: # pragma: no cover | 328 else: # pragma: no cover |
86 logging.info('Analysis was already triggered or the result is recent.') | 329 logging.info('Analysis was already triggered or the result is recent.') |
87 | 330 |
88 return build | 331 return BuildAnalysis.GetBuildAnalysis(master_name, builder_name, build_number) |
OLD | NEW |