OLD | NEW |
---|---|
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import collections | |
6 from datetime import datetime | |
5 import logging | 7 import logging |
8 import random | |
9 import time | |
6 | 10 |
11 from google.appengine.api import memcache | |
7 from google.appengine.ext import ndb | 12 from google.appengine.ext import ndb |
8 | 13 |
9 from pipeline_utils import pipelines | 14 from pipeline_utils import pipelines |
15 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline | |
10 | 16 |
17 from common.http_client_appengine import HttpClientAppengine as HttpClient | |
11 from model.build import Build | 18 from model.build import Build |
19 from model.build_analysis import BuildAnalysis | |
12 from model.build_analysis_status import BuildAnalysisStatus | 20 from model.build_analysis_status import BuildAnalysisStatus |
21 from waterfall import buildbot | |
13 | 22 |
14 | 23 |
15 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. | 24 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. |
16 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover | 25 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover |
17 def run_test(self, *args, **kwargs): | 26 def run_test(self, *args, **kwargs): |
18 pass | 27 pass |
19 | 28 |
20 def finalized_test(self, *args, **kwargs): | 29 def finalized_test(self, *args, **kwargs): |
21 pass | 30 pass |
22 | 31 |
23 def callback(self, **kwargs): | 32 def callback(self, **kwargs): |
24 pass | 33 pass |
25 | 34 |
26 def run(self, *args, **kwargs): | 35 def run(self, *args, **kwargs): |
27 raise NotImplementedError() | 36 raise NotImplementedError() |
28 | 37 |
29 | 38 |
30 class BuildFailurePipeline(BasePipeline): | 39 MEMCACHE_MASTER_DOWNLOAD_LOCK = 'master-download-lock-%s' |
40 MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS = 60 * 60 | |
41 DOWNLOAD_INTERVAL_SECONDS = 5 | |
qyearsley
2014/12/29 19:44:54
If these aren't used in any other modules, you can
stgao
2015/01/03 01:31:47
Done.
| |
42 | |
43 | |
44 def WaitUntilDownloadAllowed( | |
45 master_name, timeout_seconds=90): # pragma: no cover | |
46 """Wait until next download from the specified master is allowed. | |
47 | |
48 Returns: | |
49 True if download is allowed to proceed. | |
50 False if download is not allowed until the given timeout occurs. | |
51 """ | |
qyearsley
2014/12/29 19:44:53
I might put WaitUntilDownloadAllowed below where i
stgao
2015/01/03 01:31:47
In infra repo, it seems the convention is to put h
| |
52 client = memcache.Client() | |
53 key = MEMCACHE_MASTER_DOWNLOAD_LOCK % master_name | |
54 | |
55 deadline = time.time() + timeout_seconds | |
56 while True: | |
57 info = client.gets(key) | |
58 if not info or time.time() - info['time'] >= DOWNLOAD_INTERVAL_SECONDS: | |
59 new_info = { | |
60 'time': time.time() | |
61 } | |
62 if not info: | |
63 success = client.add( | |
64 key, new_info, time=MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
65 else: | |
66 success = client.cas( | |
67 key, new_info, time=MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
68 | |
69 if success: | |
70 return True | |
71 | |
72 if time.time() > deadline: | |
73 return False | |
74 | |
75 logging.info('waiting to download from %s', master_name) | |
76 time.sleep(DOWNLOAD_INTERVAL_SECONDS + random.random()) | |
77 | |
78 | |
79 class DetectFirstFailure(BasePipeline): | |
qyearsley
2014/12/29 19:44:54
How about the name DetectFirstFailurePipeline? (It
stgao
2015/01/03 01:31:47
Good point. Done.
| |
80 HTTP_CLIENT = HttpClient() | |
81 | |
82 def ExtractBuildInfo(self, master_name, builder_name, build_number): | |
83 """Return a BuildInfo instance for the specified build.""" | |
qyearsley
2014/12/29 19:44:53
Return -> Returns
stgao
2015/01/03 01:31:47
Done.
| |
84 build = Build.GetBuild(master_name, builder_name, build_number) | |
85 if not build: | |
86 build = Build.CreateBuild(master_name, builder_name, build_number) | |
87 | |
88 # Cache the data to avoid pulling from master again. | |
89 if (not build.data or (not build.completed and | |
90 (datetime.utcnow() - build.last_crawled_time).total_seconds >= 60 * 5)): | |
91 if not WaitUntilDownloadAllowed(master_name): | |
92 raise pipeline.Retry('Too many download from %s' % master_name) | |
93 | |
94 build.data = buildbot.GetBuildData( | |
95 build.master_name, build.builder_name, build.build_number, | |
96 self.HTTP_CLIENT) | |
97 build.last_crawled_time = datetime.utcnow() | |
98 | |
99 if not build.data: | |
100 return None | |
101 | |
102 build_info = buildbot.ExtractBuildInfo( | |
103 master_name, builder_name, build_number, build.data) | |
104 | |
105 if not build.completed: | |
106 build.start_time = build_info.build_start_time | |
107 build.completed = build_info.completed | |
108 build.result = build_info.result | |
109 build.put() | |
110 | |
111 analysis = BuildAnalysis.GetBuildAnalysis( | |
112 master_name, builder_name, build_number) | |
113 if analysis and not analysis.build_start_time: | |
114 analysis.build_start_time = build_info.build_start_time | |
115 | |
116 return build_info | |
31 | 117 |
32 # Arguments number differs from overridden method - pylint: disable=W0221 | 118 # Arguments number differs from overridden method - pylint: disable=W0221 |
33 def run(self, master_name, builder_name, build_number): | 119 def run(self, master_name, builder_name, build_number): |
34 build = Build.GetBuild(master_name, builder_name, build_number) | 120 build_info = self.ExtractBuildInfo(master_name, builder_name, build_number) |
35 | 121 |
36 # TODO: implement the logic. | 122 if not build_info: |
37 build.analysis_status = BuildAnalysisStatus.ANALYZED | 123 raise pipeline.Retry('Failed to extract build info.') |
38 build.put() | 124 |
125 failure_info = { | |
126 'failed': True, | |
127 } | |
128 | |
129 if build_info.result == buildbot.SUCCESS or not build_info.failed_steps: | |
130 failure_info['failed'] = False | |
131 return failure_info | |
132 | |
133 # Save blame list and tested chromium revision for builds. | |
134 # { | |
135 # 555 : { | |
136 # 'chromium_revision': 'a_git_hash', | |
137 # 'blame_list': ['cl1', 'cl2'], | |
138 # }, | |
139 # } | |
qyearsley
2014/12/29 19:44:53
Is this an example of what the `builds` dict below
stgao
2015/01/03 01:31:47
Done. Moved to a helper function.
| |
140 builds = collections.defaultdict(dict) | |
141 def FillBuildInfo(info): | |
142 builds[info.build_number]['chromium_revision'] = info.chromium_revision | |
143 builds[info.build_number]['blame_list'] = info.blame_list | |
144 | |
145 FillBuildInfo(build_info) | |
146 | |
147 # Save build number for failed steps. | |
148 # { | |
149 # 'step_name': { | |
150 # 'current_failure': 555, | |
151 # 'first_failure': 553, | |
152 # 'last_pass': 551, | |
153 # }, | |
154 # } | |
155 failed_steps = collections.defaultdict(dict) | |
156 for step_name in build_info.failed_steps: | |
157 failed_steps[step_name]['current_failure'] = build_info.build_number | |
158 failed_steps[step_name]['first_failure'] = build_info.build_number | |
159 | |
160 # Look back for first known failure. At most, 20 builds. | |
161 for i in range(20): | |
qyearsley
2014/12/29 19:44:54
This 20 should probably be a module-level constant
stgao
2015/01/03 01:31:47
Done.
| |
162 build_info = self.ExtractBuildInfo( | |
163 master_name, builder_name, build_number - i - 1) | |
164 | |
165 if not build_info: | |
166 # Failed to extract the build information, bail out. | |
167 break | |
168 | |
169 FillBuildInfo(build_info) | |
170 | |
171 if build_info.result == buildbot.SUCCESS: | |
172 for step_name in failed_steps: | |
173 if 'last_pass' not in failed_steps[step_name]: | |
174 failed_steps[step_name]['last_pass'] = build_info.build_number | |
175 | |
176 # All steps passed, so stop looking back. | |
177 break | |
178 else: | |
179 # If a step is not run due to some bot exception, we are not sure | |
180 # whether the step could pass or not. So we only check failed/passed | |
181 # steps here. | |
182 | |
183 for step_name in build_info.failed_steps: | |
184 if step_name in failed_steps: | |
185 failed_steps[step_name]['first_failure'] = build_info.build_number | |
186 | |
187 for step_name in failed_steps: | |
188 if step_name in build_info.passed_steps: | |
189 failed_steps[step_name]['last_pass'] = build_info.build_number | |
190 | |
191 failure_info['builds'] = builds | |
192 failure_info['failed_steps'] = failed_steps | |
193 return failure_info | |
qyearsley
2014/12/29 19:44:53
This function is very long, and would probably be
stgao
2015/01/03 01:31:47
Done. Hope the code is easier to understand.
| |
194 | |
195 | |
196 class BuildFailurePipeline(BasePipeline): | |
197 | |
198 def __init__(self, master_name, builder_name, build_number): | |
199 super(BuildFailurePipeline, self).__init__( | |
200 master_name, builder_name, build_number) | |
201 self.master_name = master_name | |
202 self.builder_name = builder_name | |
203 self.build_number = build_number | |
204 | |
205 def finalized(self): | |
206 if self.was_aborted: | |
207 analysis = BuildAnalysis.GetBuildAnalysis( | |
208 self.master_name, self.builder_name, self.build_number) | |
209 analysis.status = BuildAnalysisStatus.ERROR | |
210 analysis.put() | |
211 | |
212 # Arguments number differs from overridden method - pylint: disable=W0221 | |
qyearsley
2014/12/29 19:44:54
Cool, I didn't know you could add a comment on the
stgao
2015/01/03 01:31:47
Yes, they could fit into one line.
| |
213 def run(self, master_name, builder_name, build_number): | |
214 yield DetectFirstFailure(master_name, builder_name, build_number) | |
39 | 215 |
40 | 216 |
41 @ndb.transactional | 217 @ndb.transactional |
42 def NeedANewAnalysis(master_name, builder_name, build_number, force): | 218 def NeedANewAnalysis(master_name, builder_name, build_number, force): |
43 """Check analysis status of a build and decide if a new analysis is needed. | 219 """Check status of analysis for the build and decide if a new one is needed. |
qyearsley
2014/12/29 19:44:54
Check -> Checks, decide -> decides (third-person s
stgao
2015/01/03 01:31:47
Done.
| |
44 | 220 |
45 Returns: | 221 Returns: |
46 (build, need_analysis) | 222 (build, need_analysis) |
47 build (Build): the build as specified by the input. | 223 build (Build): the build as specified by the input. |
qyearsley
2014/12/29 19:44:53
Make sure to update the docstring to reflect the c
stgao
2015/01/03 01:31:47
Oops, done.
| |
48 need_analysis (bool): True if an analysis is needed, otherwise False. | 224 need_analysis (bool): True if an analysis is needed, otherwise False. |
49 """ | 225 """ |
qyearsley
2014/12/29 19:44:54
Similar to WaitUntilDownloadAllowed, I think NeedA
stgao
2015/01/03 01:31:47
I intentionally made this function public for test
| |
50 build_key = Build.CreateKey(master_name, builder_name, build_number) | 226 analysis = BuildAnalysis.GetBuildAnalysis( |
51 build = build_key.get() | 227 master_name, builder_name, build_number) |
52 | 228 |
53 if not build: | 229 if not analysis: |
54 build = Build.CreateBuild(master_name, builder_name, build_number) | 230 analysis = BuildAnalysis.CreateBuildAnalysis( |
55 build.analysis_status = BuildAnalysisStatus.PENDING | 231 master_name, builder_name, build_number) |
56 build.put() | 232 analysis.status = BuildAnalysisStatus.PENDING |
57 return build, True | 233 analysis.put() |
234 return analysis, True | |
58 elif force: | 235 elif force: |
59 # TODO: avoid concurrent analysis. | 236 # TODO: avoid concurrent analysis. |
60 build.Reset() | 237 analysis.Reset() |
61 build.put() | 238 analysis.put() |
62 return build, True | 239 return analysis, True |
63 else: | 240 else: |
64 # TODO: support following cases | 241 # TODO: support following cases |
65 # 1. Automatically retry if last analysis failed with errors. | 242 # 1. Automatically retry if last analysis failed with errors. |
66 # 2. Start another analysis if the build cycle wasn't completed in last | 243 # 2. Start another analysis if the build cycle wasn't completed in last |
67 # analysis request. | 244 # analysis request. |
68 # 3. Analysis is not complete and no update in the last 5 minutes. | 245 # 3. Analysis is not complete and no update in the last 5 minutes. |
69 return build, False | 246 return analysis, False |
70 | 247 |
71 | 248 |
72 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, | 249 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, |
73 queue_name): | 250 queue_name): |
74 """Schedule an analysis if needed and return the build.""" | 251 """Schedule an analysis if needed and return the build.""" |
75 build, need_new_analysis = NeedANewAnalysis( | 252 analysis, need_new_analysis = NeedANewAnalysis( |
76 master_name, builder_name, build_number, force) | 253 master_name, builder_name, build_number, force) |
77 | 254 |
78 if need_new_analysis: | 255 if need_new_analysis: |
79 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) | 256 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) |
80 pipeline_job.start(queue_name=queue_name) | 257 pipeline_job.start(queue_name=queue_name) |
81 | 258 |
82 logging.info('An analysis triggered on build %s, %s, %s: %s', | 259 logging.info('An analysis triggered on build %s, %s, %s: %s', |
83 master_name, builder_name, build_number, | 260 master_name, builder_name, build_number, |
84 pipeline_job.pipeline_status_url()) | 261 pipeline_job.pipeline_status_url()) |
85 else: # pragma: no cover | 262 else: # pragma: no cover |
86 logging.info('Analysis was already triggered or the result is recent.') | 263 logging.info('Analysis was already triggered or the result is recent.') |
87 | 264 |
88 return build | 265 return analysis |
OLD | NEW |