Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import collections | |
| 6 from datetime import datetime | |
| 5 import logging | 7 import logging |
| 8 import random | |
| 9 import time | |
| 6 | 10 |
| 11 from google.appengine.api import memcache | |
| 7 from google.appengine.ext import ndb | 12 from google.appengine.ext import ndb |
| 8 | 13 |
| 9 from pipeline_utils import pipelines | 14 from pipeline_utils import pipelines |
| 15 from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline | |
| 10 | 16 |
| 17 from common.http_client_appengine import HttpClientAppengine as HttpClient | |
| 11 from model.build import Build | 18 from model.build import Build |
| 19 from model.build_analysis import BuildAnalysis | |
| 12 from model.build_analysis_status import BuildAnalysisStatus | 20 from model.build_analysis_status import BuildAnalysisStatus |
| 21 from waterfall import buildbot | |
| 13 | 22 |
| 14 | 23 |
| 15 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. | 24 # TODO(stgao): remove BasePipeline after http://crrev.com/810193002 is landed. |
| 16 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover | 25 class BasePipeline(pipelines.AppenginePipeline): # pragma: no cover |
| 17 def run_test(self, *args, **kwargs): | 26 def run_test(self, *args, **kwargs): |
| 18 pass | 27 pass |
| 19 | 28 |
| 20 def finalized_test(self, *args, **kwargs): | 29 def finalized_test(self, *args, **kwargs): |
| 21 pass | 30 pass |
| 22 | 31 |
| 23 def callback(self, **kwargs): | 32 def callback(self, **kwargs): |
| 24 pass | 33 pass |
| 25 | 34 |
| 26 def run(self, *args, **kwargs): | 35 def run(self, *args, **kwargs): |
| 27 raise NotImplementedError() | 36 raise NotImplementedError() |
| 28 | 37 |
| 29 | 38 |
| 30 class BuildFailurePipeline(BasePipeline): | 39 _MEMCACHE_MASTER_DOWNLOAD_LOCK = 'master-download-lock-%s' |
| 40 _MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS = 60 * 60 | |
| 41 _DOWNLOAD_INTERVAL_SECONDS = 5 | |
| 42 _MAX_BUILDS_TO_CHECK_FOR_FIRST_FAILURE = 20 | |
| 43 | |
| 44 | |
| 45 def _WaitUntilDownloadAllowed( | |
| 46 master_name, timeout_seconds=90): # pragma: no cover | |
| 47 """Waits until next download from the specified master is allowed. | |
| 48 | |
| 49 Returns: | |
| 50 True if download is allowed to proceed. | |
| 51 False if download is not allowed until the given timeout occurs. | |
| 52 """ | |
| 53 client = memcache.Client() | |
| 54 key = _MEMCACHE_MASTER_DOWNLOAD_LOCK % master_name | |
| 55 | |
| 56 deadline = time.time() + timeout_seconds | |
| 57 while True: | |
| 58 info = client.gets(key) | |
| 59 if not info or time.time() - info['time'] >= _DOWNLOAD_INTERVAL_SECONDS: | |
| 60 new_info = { | |
| 61 'time': time.time() | |
| 62 } | |
| 63 if not info: | |
| 64 success = client.add( | |
| 65 key, new_info, time=_MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
| 66 else: | |
| 67 success = client.cas( | |
| 68 key, new_info, time=_MEMCACHE_MASTER_DOWNLOAD_EXPIRATION_SECONDS) | |
| 69 | |
| 70 if success: | |
| 71 logging.info('Download from %s is allowed. Waited %s seconds.', | |
| 72 master_name, (time.time() + timeout_seconds - deadline)) | |
| 73 return True | |
| 74 | |
| 75 if time.time() > deadline: | |
| 76 logging.info('Download from %s is not allowed. Waited %s seconds.', | |
| 77 master_name, timeout_seconds) | |
| 78 return False | |
| 79 | |
| 80 logging.info('Waiting to download from %s', master_name) | |
| 81 time.sleep(_DOWNLOAD_INTERVAL_SECONDS + random.random()) | |
| 82 | |
| 83 | |
| 84 class DetectFirstFailurePipeline(BasePipeline): | |
|
stgao
2015/01/06 23:54:26
I'm planning to move sub-pipelines to separate mod
stgao
2015/01/09 01:28:23
Done.
| |
| 85 HTTP_CLIENT = HttpClient() | |
| 86 | |
| 87 def _BuildDataNeedUpdating(self, build): | |
| 88 return (not build.data or (not build.completed and | |
| 89 (datetime.utcnow() - build.last_crawled_time).total_seconds >= 60 * 5)) | |
| 90 | |
| 91 def _DownloadBuildData(self, master_name, builder_name, build_number): | |
| 92 """Downloads build data and returns a Build instance.""" | |
| 93 build = Build.GetBuild(master_name, builder_name, build_number) | |
| 94 if not build: # pragma: no cover | |
| 95 build = Build.CreateBuild(master_name, builder_name, build_number) | |
| 96 | |
| 97 # Cache the data to avoid pulling from master again. | |
| 98 if self._BuildDataNeedUpdating(build): # pragma: no cover | |
| 99 if not _WaitUntilDownloadAllowed(master_name): # pragma: no cover | |
| 100 raise pipeline.Retry('Too many download from %s' % master_name) | |
| 101 | |
| 102 build.data = buildbot.GetBuildData( | |
| 103 build.master_name, build.builder_name, build.build_number, | |
| 104 self.HTTP_CLIENT) | |
| 105 build.last_crawled_time = datetime.utcnow() | |
| 106 build.put() | |
| 107 | |
| 108 return build | |
| 109 | |
| 110 def _ExtractBuildInfo(self, master_name, builder_name, build_number): | |
| 111 """Returns a BuildInfo instance for the specified build.""" | |
| 112 build = self._DownloadBuildData(master_name, builder_name, build_number) | |
| 113 if not build.data: # pragma: no cover | |
| 114 return None | |
| 115 | |
| 116 build_info = buildbot.ExtractBuildInfo( | |
| 117 master_name, builder_name, build_number, build.data) | |
| 118 | |
| 119 if not build.completed: # pragma: no cover | |
| 120 build.start_time = build_info.build_start_time | |
| 121 build.completed = build_info.completed | |
| 122 build.result = build_info.result | |
| 123 build.put() | |
| 124 | |
| 125 analysis = BuildAnalysis.GetBuildAnalysis( | |
| 126 master_name, builder_name, build_number) | |
| 127 if analysis and not analysis.build_start_time: | |
| 128 analysis.build_start_time = build_info.build_start_time | |
| 129 analysis.put() | |
| 130 | |
| 131 return build_info | |
| 132 | |
| 133 def _SaveBlamelistAndChromiumRevisionIntoDict(self, build_info, builds): | |
| 134 """ | |
| 135 Args: | |
| 136 build_info (BuildInfo): a BuildInfo instance which contains blame list and | |
| 137 chromium revision. | |
| 138 builds (dict): to which the blame list and chromium revision is saved. It | |
| 139 will be updated and looks like: | |
| 140 { | |
| 141 555 : { | |
| 142 'chromium_revision': 'a_git_hash', | |
| 143 'blame_list': ['git_hash1', 'git_hash2'], | |
| 144 }, | |
| 145 } | |
| 146 """ | |
| 147 builds[build_info.build_number] = { | |
| 148 'chromium_revision': build_info.chromium_revision, | |
| 149 'blame_list': build_info.blame_list | |
| 150 } | |
| 151 | |
| 152 def _CreateADictOfFailedSteps(self, build_info): | |
| 153 """ Returns a dict with build number for failed steps. | |
| 154 | |
| 155 Args: | |
| 156 failed_steps (list): a list of failed steps. | |
| 157 | |
| 158 Returns: | |
| 159 A dict like this: | |
| 160 { | |
| 161 'step_name': { | |
| 162 'current_failure': 555, | |
| 163 'first_failure': 553, | |
| 164 }, | |
| 165 } | |
| 166 """ | |
| 167 failed_steps = dict() | |
| 168 for step_name in build_info.failed_steps: | |
| 169 failed_steps[step_name] = { | |
| 170 'current_failure': build_info.build_number, | |
| 171 'first_failure': build_info.build_number, | |
| 172 } | |
| 173 | |
| 174 return failed_steps | |
| 175 | |
| 176 def _CheckForFirstKnownFailure(self, master_name, builder_name, build_number, | |
| 177 failed_steps, builds): | |
| 178 """Checks for first known failures of the given failed steps. | |
| 179 | |
| 180 Args: | |
| 181 master_name (str): master of the failed build. | |
| 182 builder_name (str): builder of the failed build. | |
| 183 build_number (int): builder number of the current failed build. | |
| 184 failed_steps (dict): the failed steps of the current failed build. It will | |
| 185 be updated with build numbers for 'first_failure' and 'last_pass' of | |
| 186 each failed step. | |
| 187 builds (dict): a dict to save blame list and chromium revision. | |
| 188 """ | |
| 189 # Look back for first known failures. | |
| 190 for i in range(_MAX_BUILDS_TO_CHECK_FOR_FIRST_FAILURE): | |
| 191 build_info = self._ExtractBuildInfo( | |
| 192 master_name, builder_name, build_number - i - 1) | |
| 193 | |
| 194 if not build_info: # pragma: no cover | |
| 195 # Failed to extract the build information, bail out. | |
| 196 return | |
| 197 | |
| 198 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) | |
| 199 | |
| 200 if build_info.result == buildbot.SUCCESS: | |
| 201 for step_name in failed_steps: | |
| 202 if 'last_pass' not in failed_steps[step_name]: | |
| 203 failed_steps[step_name]['last_pass'] = build_info.build_number | |
| 204 | |
| 205 # All steps passed, so stop looking back. | |
| 206 return | |
| 207 else: | |
| 208 # If a step is not run due to some bot exception, we are not sure | |
| 209 # whether the step could pass or not. So we only check failed/passed | |
| 210 # steps here. | |
| 211 | |
| 212 for step_name in build_info.failed_steps: | |
| 213 if step_name in failed_steps: | |
| 214 failed_steps[step_name]['first_failure'] = build_info.build_number | |
| 215 | |
| 216 for step_name in failed_steps: | |
| 217 if step_name in build_info.passed_steps: | |
| 218 failed_steps[step_name]['last_pass'] = build_info.build_number | |
| 219 | |
| 220 if all('last_pass' in step_info for step_info in failed_steps.values()): | |
| 221 # All failed steps passed in this build cycle. | |
| 222 return # pragma: no cover | |
| 31 | 223 |
| 32 # Arguments number differs from overridden method - pylint: disable=W0221 | 224 # Arguments number differs from overridden method - pylint: disable=W0221 |
| 33 def run(self, master_name, builder_name, build_number): | 225 def run(self, master_name, builder_name, build_number): |
| 34 build = Build.GetBuild(master_name, builder_name, build_number) | 226 build_info = self._ExtractBuildInfo(master_name, builder_name, build_number) |
| 35 | 227 |
| 36 # TODO: implement the logic. | 228 if not build_info: # pragma: no cover |
| 37 build.analysis_status = BuildAnalysisStatus.ANALYZED | 229 raise pipeline.Retry('Failed to extract build info.') |
| 38 build.put() | 230 |
| 231 failure_info = { | |
| 232 'failed': True, | |
| 233 'master_name': master_name, | |
| 234 'builder_name': builder_name, | |
| 235 'build_number': build_number | |
| 236 } | |
| 237 | |
| 238 if (build_info.result == buildbot.SUCCESS or | |
| 239 not build_info.failed_steps): # pragma: no cover | |
| 240 failure_info['failed'] = False | |
| 241 return failure_info | |
| 242 | |
| 243 builds = dict() | |
| 244 self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) | |
| 245 | |
| 246 failed_steps = self._CreateADictOfFailedSteps(build_info) | |
| 247 | |
| 248 self._CheckForFirstKnownFailure( | |
| 249 master_name, builder_name, build_number, failed_steps, builds) | |
| 250 | |
| 251 failure_info['builds'] = builds | |
| 252 failure_info['failed_steps'] = failed_steps | |
| 253 return failure_info | |
| 254 | |
| 255 | |
| 256 class BuildFailurePipeline(BasePipeline): | |
| 257 | |
| 258 def __init__(self, master_name, builder_name, build_number): | |
| 259 super(BuildFailurePipeline, self).__init__( | |
| 260 master_name, builder_name, build_number) | |
| 261 self.master_name = master_name | |
| 262 self.builder_name = builder_name | |
| 263 self.build_number = build_number | |
| 264 | |
| 265 def finalized(self): | |
| 266 analysis = BuildAnalysis.GetBuildAnalysis( | |
| 267 self.master_name, self.builder_name, self.build_number) | |
| 268 if self.was_aborted: # pragma: no cover | |
| 269 analysis.status = BuildAnalysisStatus.ERROR | |
| 270 else: | |
| 271 analysis.status = BuildAnalysisStatus.ANALYZED | |
| 272 analysis.put() | |
| 273 | |
| 274 # Arguments number differs from overridden method - pylint: disable=W0221 | |
| 275 def run(self, master_name, builder_name, build_number): | |
| 276 analysis = BuildAnalysis.GetBuildAnalysis( | |
| 277 master_name, builder_name, build_number) | |
| 278 analysis.pipeline_url = self.pipeline_status_url() | |
| 279 analysis.status = BuildAnalysisStatus.ANALYZING | |
| 280 analysis.start_time = datetime.utcnow() | |
| 281 analysis.put() | |
| 282 | |
| 283 yield DetectFirstFailurePipeline(master_name, builder_name, build_number) | |
| 39 | 284 |
| 40 | 285 |
| 41 @ndb.transactional | 286 @ndb.transactional |
| 42 def NeedANewAnalysis(master_name, builder_name, build_number, force): | 287 def NeedANewAnalysis(master_name, builder_name, build_number, force): |
| 43 """Check analysis status of a build and decide if a new analysis is needed. | 288 """Checks status of analysis for the build and decides if a new one is needed. |
| 289 | |
| 290 A BuildAnalysis entity for the given build will be created if none exists. | |
| 44 | 291 |
| 45 Returns: | 292 Returns: |
| 46 (build, need_analysis) | 293 True if an analysis is needed, otherwise False. |
| 47 build (Build): the build as specified by the input. | |
| 48 need_analysis (bool): True if an analysis is needed, otherwise False. | |
| 49 """ | 294 """ |
| 50 build_key = Build.CreateKey(master_name, builder_name, build_number) | 295 analysis = BuildAnalysis.GetBuildAnalysis( |
| 51 build = build_key.get() | 296 master_name, builder_name, build_number) |
| 52 | 297 |
| 53 if not build: | 298 if not analysis: |
| 54 build = Build.CreateBuild(master_name, builder_name, build_number) | 299 analysis = BuildAnalysis.CreateBuildAnalysis( |
| 55 build.analysis_status = BuildAnalysisStatus.PENDING | 300 master_name, builder_name, build_number) |
| 56 build.put() | 301 analysis.status = BuildAnalysisStatus.PENDING |
| 57 return build, True | 302 analysis.put() |
| 303 return True | |
| 58 elif force: | 304 elif force: |
| 59 # TODO: avoid concurrent analysis. | 305 # TODO: avoid concurrent analysis. |
| 60 build.Reset() | 306 analysis.Reset() |
| 61 build.put() | 307 analysis.put() |
| 62 return build, True | 308 return True |
| 63 else: | 309 else: |
| 64 # TODO: support following cases | 310 # TODO: support following cases |
| 65 # 1. Automatically retry if last analysis failed with errors. | 311 # 1. Automatically retry if last analysis failed with errors. |
| 66 # 2. Start another analysis if the build cycle wasn't completed in last | 312 # 2. Start another analysis if the build cycle wasn't completed in last |
| 67 # analysis request. | 313 # analysis request. |
| 68 # 3. Analysis is not complete and no update in the last 5 minutes. | 314 # 3. Analysis is not complete and no update in the last 5 minutes. |
| 69 return build, False | 315 return False |
| 70 | 316 |
| 71 | 317 |
| 72 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, | 318 def ScheduleAnalysisIfNeeded(master_name, builder_name, build_number, force, |
| 73 queue_name): | 319 queue_name): |
| 74 """Schedule an analysis if needed and return the build.""" | 320 """Schedules an analysis if needed and returns the build analysis.""" |
| 75 build, need_new_analysis = NeedANewAnalysis( | 321 if NeedANewAnalysis(master_name, builder_name, build_number, force): |
| 76 master_name, builder_name, build_number, force) | |
| 77 | |
| 78 if need_new_analysis: | |
| 79 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) | 322 pipeline_job = BuildFailurePipeline(master_name, builder_name, build_number) |
| 80 pipeline_job.start(queue_name=queue_name) | 323 pipeline_job.start(queue_name=queue_name) |
| 81 | 324 |
| 82 logging.info('An analysis triggered on build %s, %s, %s: %s', | 325 logging.info('An analysis triggered on build %s, %s, %s: %s', |
| 83 master_name, builder_name, build_number, | 326 master_name, builder_name, build_number, |
| 84 pipeline_job.pipeline_status_url()) | 327 pipeline_job.pipeline_status_url()) |
| 85 else: # pragma: no cover | 328 else: # pragma: no cover |
| 86 logging.info('Analysis was already triggered or the result is recent.') | 329 logging.info('Analysis was already triggered or the result is recent.') |
| 87 | 330 |
| 88 return build | 331 return BuildAnalysis.GetBuildAnalysis(master_name, builder_name, build_number) |
| OLD | NEW |