Index: appengine/findit/waterfall/detect_first_failure_pipeline.py |
diff --git a/appengine/findit/waterfall/detect_first_failure_pipeline.py b/appengine/findit/waterfall/detect_first_failure_pipeline.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..fa62e606811981ae02989ac620ac26d9d0b56d92 |
--- /dev/null |
+++ b/appengine/findit/waterfall/detect_first_failure_pipeline.py |
@@ -0,0 +1,244 @@ |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import collections |
+from datetime import datetime |
+import logging |
+import random |
+import time |
+ |
+from google.appengine.api import memcache |
+ |
+from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline |
+ |
+from common.http_client_appengine import HttpClientAppengine as HttpClient |
+from model.build import Build |
+from model.build_analysis import BuildAnalysis |
+from waterfall import buildbot |
+from waterfall.base_pipeline import BasePipeline |
+from waterfall import lock_util |
+ |
+ |
+_MAX_BUILDS_TO_CHECK = 20 |
+ |
+ |
+class DetectFirstFailurePipeline(BasePipeline): |
+ """ A pipeline to detect first failure of each step. |
+ |
+ TODO(stgao): do test-level detection for gtest. |
+ |
+ Input: |
+ master_name |
+ builder_name |
+ build_number |
+ |
+ Output: |
+ A json like below: |
+ { |
+ "master_name": "chromium.gpu", |
+ "builder_name": "GPU Linux Builder" |
+ "build_number": 25410, |
+ "failed": true, |
+ "failed_steps": { |
+ "compile": { |
+ "last_pass": 25408, |
+ "current_failure": 25410, |
+ "first_failure": 25409 |
+ } |
+ }, |
+ "builds": { |
+ "25408": { |
+ "chromium_revision": "474ab324d17d2cd198d3fb067cabc10a775a8df7" |
+ "blame_list": [ |
+ "474ab324d17d2cd198d3fb067cabc10a775a8df7" |
+ ], |
+ }, |
+ "25409": { |
+ "chromium_revision": "33c6f11de20c5b229e102c51237d96b2d2f1be04" |
+ "blame_list": [ |
+ "9d5ebc5eb14fc4b3823f6cfd341da023f71f49dd", |
+ ... |
+ ], |
+ }, |
+ "25410": { |
+ "chromium_revision": "4bffcd598dd89e0016208ce9312a1f477ff105d1" |
+ "blame_list": [ |
+ "b98e0b320d39a323c81cc0542e6250349183a4df", |
+ ... |
+ ], |
+ } |
+ } |
+ } |
+ """ |
+ |
+ HTTP_CLIENT = HttpClient() |
+ |
+ def _BuildDataNeedUpdating(self, build): |
+ return (not build.data or (not build.completed and |
+ (datetime.utcnow() - build.last_crawled_time).total_seconds() >= 300)) |
+ |
+ def _DownloadBuildData(self, master_name, builder_name, build_number): |
+ """Downloads build data and returns a Build instance.""" |
+ build = Build.GetBuild(master_name, builder_name, build_number) |
+ if not build: |
+ build = Build.CreateBuild(master_name, builder_name, build_number) |
+ |
+ # Cache the data to avoid pulling from master again. |
+ if self._BuildDataNeedUpdating(build): |
+ if not lock_util.WaitUntilDownloadAllowed( |
+ master_name): # pragma: no cover |
+ raise pipeline.Retry('Too many download from %s' % master_name) |
+ |
+ build.data = buildbot.GetBuildData( |
+ build.master_name, build.builder_name, build.build_number, |
+ self.HTTP_CLIENT) |
+ build.last_crawled_time = datetime.utcnow() |
+ build.put() |
+ |
+ return build |
+ |
+ def _ExtractBuildInfo(self, master_name, builder_name, build_number): |
+ """Returns a BuildInfo instance for the specified build.""" |
+ build = self._DownloadBuildData(master_name, builder_name, build_number) |
+ if not build.data: # pragma: no cover |
+ return None |
+ |
+ build_info = buildbot.ExtractBuildInfo( |
+ master_name, builder_name, build_number, build.data) |
+ |
+ if not build.completed: |
+ build.start_time = build_info.build_start_time |
+ build.completed = build_info.completed |
+ build.result = build_info.result |
+ build.put() |
+ |
+ analysis = BuildAnalysis.GetBuildAnalysis( |
+ master_name, builder_name, build_number) |
+ if analysis and not analysis.build_start_time: |
+ analysis.build_start_time = build_info.build_start_time |
+ analysis.put() |
+ |
+ return build_info |
+ |
+ def _SaveBlamelistAndChromiumRevisionIntoDict(self, build_info, builds): |
+ """ |
+ Args: |
+ build_info (BuildInfo): a BuildInfo instance which contains blame list and |
+ chromium revision. |
+ builds (dict): to which the blame list and chromium revision is saved. It |
+ will be updated and looks like: |
+ { |
+ 555 : { |
+ 'chromium_revision': 'a_git_hash', |
+ 'blame_list': ['git_hash1', 'git_hash2'], |
+ }, |
+ } |
+ """ |
+ builds[build_info.build_number] = { |
+ 'chromium_revision': build_info.chromium_revision, |
+ 'blame_list': build_info.blame_list |
+ } |
+ |
+ def _CreateADictOfFailedSteps(self, build_info): |
+ """ Returns a dict with build number for failed steps. |
+ |
+ Args: |
+ failed_steps (list): a list of failed steps. |
+ |
+ Returns: |
+ A dict like this: |
+ { |
+ 'step_name': { |
+ 'current_failure': 555, |
+ 'first_failure': 553, |
+ }, |
+ } |
+ """ |
+ failed_steps = dict() |
+ for step_name in build_info.failed_steps: |
+ failed_steps[step_name] = { |
+ 'current_failure': build_info.build_number, |
+ 'first_failure': build_info.build_number, |
+ } |
+ |
+ return failed_steps |
+ |
+ def _CheckForFirstKnownFailure(self, master_name, builder_name, build_number, |
+ failed_steps, builds): |
+ """Checks for first known failures of the given failed steps. |
+ |
+ Args: |
+ master_name (str): master of the failed build. |
+ builder_name (str): builder of the failed build. |
+ build_number (int): builder number of the current failed build. |
+ failed_steps (dict): the failed steps of the current failed build. It will |
+ be updated with build numbers for 'first_failure' and 'last_pass' of |
+ each failed step. |
+ builds (dict): a dict to save blame list and chromium revision. |
+ """ |
+ # Look back for first known failures. |
+ for i in range(_MAX_BUILDS_TO_CHECK): # limit not hit - pragma: no cover |
+ build_info = self._ExtractBuildInfo( |
+ master_name, builder_name, build_number - i - 1) |
+ |
+ if not build_info: # pragma: no cover |
+ # Failed to extract the build information, bail out. |
+ return |
+ |
+ self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) |
+ |
+ if build_info.result == buildbot.SUCCESS: |
+ for step_name in failed_steps: |
+ if 'last_pass' not in failed_steps[step_name]: |
+ failed_steps[step_name]['last_pass'] = build_info.build_number |
+ |
+ # All steps passed, so stop looking back. |
+ return |
+ else: |
+ # If a step is not run due to some bot exception, we are not sure |
+ # whether the step could pass or not. So we only check failed/passed |
+ # steps here. |
+ |
+ for step_name in build_info.failed_steps: |
+ if step_name in failed_steps: |
+ failed_steps[step_name]['first_failure'] = build_info.build_number |
+ |
+ for step_name in failed_steps: |
+ if step_name in build_info.passed_steps: |
+ failed_steps[step_name]['last_pass'] = build_info.build_number |
+ |
+ if all('last_pass' in step_info for step_info in failed_steps.values()): |
+ # All failed steps passed in this build cycle. |
+ return |
+ |
+ # Arguments number differs from overridden method - pylint: disable=W0221 |
+ def run(self, master_name, builder_name, build_number): |
+ build_info = self._ExtractBuildInfo(master_name, builder_name, build_number) |
+ |
+ if not build_info: # pragma: no cover |
+ raise pipeline.Retry('Failed to extract build info.') |
+ |
+ failure_info = { |
+ 'failed': True, |
+ 'master_name': master_name, |
+ 'builder_name': builder_name, |
+ 'build_number': build_number |
+ } |
+ |
+ if (build_info.result == buildbot.SUCCESS or |
+ not build_info.failed_steps): |
+ failure_info['failed'] = False |
+ return failure_info |
+ |
+ builds = dict() |
+ self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds) |
+ |
+ failed_steps = self._CreateADictOfFailedSteps(build_info) |
+ |
+ self._CheckForFirstKnownFailure( |
+ master_name, builder_name, build_number, failed_steps, builds) |
+ |
+ failure_info['builds'] = builds |
+ failure_info['failed_steps'] = failed_steps |
+ return failure_info |