Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Unified Diff: appengine/findit/waterfall/detect_first_failure_pipeline.py

Issue 820113002: [Findit] Add a sub-pipeline to detect first-known failure. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Address comments Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/waterfall/detect_first_failure_pipeline.py
diff --git a/appengine/findit/waterfall/detect_first_failure_pipeline.py b/appengine/findit/waterfall/detect_first_failure_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa62e606811981ae02989ac620ac26d9d0b56d92
--- /dev/null
+++ b/appengine/findit/waterfall/detect_first_failure_pipeline.py
@@ -0,0 +1,244 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import collections
+from datetime import datetime
+import logging
+import random
+import time
+
+from google.appengine.api import memcache
+
+from pipeline_utils.appengine_third_party_pipeline_src_pipeline import pipeline
+
+from common.http_client_appengine import HttpClientAppengine as HttpClient
+from model.build import Build
+from model.build_analysis import BuildAnalysis
+from waterfall import buildbot
+from waterfall.base_pipeline import BasePipeline
+from waterfall import lock_util
+
+
+_MAX_BUILDS_TO_CHECK = 20
+
+
+class DetectFirstFailurePipeline(BasePipeline):
+ """ A pipeline to detect first failure of each step.
+
+ TODO(stgao): do test-level detection for gtest.
+
+ Input:
+ master_name
+ builder_name
+ build_number
+
+ Output:
+ A json like below:
+ {
+ "master_name": "chromium.gpu",
+ "builder_name": "GPU Linux Builder"
+ "build_number": 25410,
+ "failed": true,
+ "failed_steps": {
+ "compile": {
+ "last_pass": 25408,
+ "current_failure": 25410,
+ "first_failure": 25409
+ }
+ },
+ "builds": {
+ "25408": {
+ "chromium_revision": "474ab324d17d2cd198d3fb067cabc10a775a8df7"
+ "blame_list": [
+ "474ab324d17d2cd198d3fb067cabc10a775a8df7"
+ ],
+ },
+ "25409": {
+ "chromium_revision": "33c6f11de20c5b229e102c51237d96b2d2f1be04"
+ "blame_list": [
+ "9d5ebc5eb14fc4b3823f6cfd341da023f71f49dd",
+ ...
+ ],
+ },
+ "25410": {
+ "chromium_revision": "4bffcd598dd89e0016208ce9312a1f477ff105d1"
+ "blame_list": [
+ "b98e0b320d39a323c81cc0542e6250349183a4df",
+ ...
+ ],
+ }
+ }
+ }
+ """
+
+ HTTP_CLIENT = HttpClient()
+
+ def _BuildDataNeedUpdating(self, build):
+ return (not build.data or (not build.completed and
+ (datetime.utcnow() - build.last_crawled_time).total_seconds() >= 300))
+
+ def _DownloadBuildData(self, master_name, builder_name, build_number):
+ """Downloads build data and returns a Build instance."""
+ build = Build.GetBuild(master_name, builder_name, build_number)
+ if not build:
+ build = Build.CreateBuild(master_name, builder_name, build_number)
+
+ # Cache the data to avoid pulling from master again.
+ if self._BuildDataNeedUpdating(build):
+ if not lock_util.WaitUntilDownloadAllowed(
+ master_name): # pragma: no cover
+ raise pipeline.Retry('Too many download from %s' % master_name)
+
+ build.data = buildbot.GetBuildData(
+ build.master_name, build.builder_name, build.build_number,
+ self.HTTP_CLIENT)
+ build.last_crawled_time = datetime.utcnow()
+ build.put()
+
+ return build
+
+ def _ExtractBuildInfo(self, master_name, builder_name, build_number):
+ """Returns a BuildInfo instance for the specified build."""
+ build = self._DownloadBuildData(master_name, builder_name, build_number)
+ if not build.data: # pragma: no cover
+ return None
+
+ build_info = buildbot.ExtractBuildInfo(
+ master_name, builder_name, build_number, build.data)
+
+ if not build.completed:
+ build.start_time = build_info.build_start_time
+ build.completed = build_info.completed
+ build.result = build_info.result
+ build.put()
+
+ analysis = BuildAnalysis.GetBuildAnalysis(
+ master_name, builder_name, build_number)
+ if analysis and not analysis.build_start_time:
+ analysis.build_start_time = build_info.build_start_time
+ analysis.put()
+
+ return build_info
+
+ def _SaveBlamelistAndChromiumRevisionIntoDict(self, build_info, builds):
+ """
+ Args:
+ build_info (BuildInfo): a BuildInfo instance which contains blame list and
+ chromium revision.
+ builds (dict): to which the blame list and chromium revision is saved. It
+ will be updated and looks like:
+ {
+ 555 : {
+ 'chromium_revision': 'a_git_hash',
+ 'blame_list': ['git_hash1', 'git_hash2'],
+ },
+ }
+ """
+ builds[build_info.build_number] = {
+ 'chromium_revision': build_info.chromium_revision,
+ 'blame_list': build_info.blame_list
+ }
+
+ def _CreateADictOfFailedSteps(self, build_info):
+ """ Returns a dict with build number for failed steps.
+
+ Args:
+ failed_steps (list): a list of failed steps.
+
+ Returns:
+ A dict like this:
+ {
+ 'step_name': {
+ 'current_failure': 555,
+ 'first_failure': 553,
+ },
+ }
+ """
+ failed_steps = dict()
+ for step_name in build_info.failed_steps:
+ failed_steps[step_name] = {
+ 'current_failure': build_info.build_number,
+ 'first_failure': build_info.build_number,
+ }
+
+ return failed_steps
+
+ def _CheckForFirstKnownFailure(self, master_name, builder_name, build_number,
+ failed_steps, builds):
+ """Checks for first known failures of the given failed steps.
+
+ Args:
+ master_name (str): master of the failed build.
+ builder_name (str): builder of the failed build.
+ build_number (int): builder number of the current failed build.
+ failed_steps (dict): the failed steps of the current failed build. It will
+ be updated with build numbers for 'first_failure' and 'last_pass' of
+ each failed step.
+ builds (dict): a dict to save blame list and chromium revision.
+ """
+ # Look back for first known failures.
+ for i in range(_MAX_BUILDS_TO_CHECK): # limit not hit - pragma: no cover
+ build_info = self._ExtractBuildInfo(
+ master_name, builder_name, build_number - i - 1)
+
+ if not build_info: # pragma: no cover
+ # Failed to extract the build information, bail out.
+ return
+
+ self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds)
+
+ if build_info.result == buildbot.SUCCESS:
+ for step_name in failed_steps:
+ if 'last_pass' not in failed_steps[step_name]:
+ failed_steps[step_name]['last_pass'] = build_info.build_number
+
+ # All steps passed, so stop looking back.
+ return
+ else:
+ # If a step is not run due to some bot exception, we are not sure
+ # whether the step could pass or not. So we only check failed/passed
+ # steps here.
+
+ for step_name in build_info.failed_steps:
+ if step_name in failed_steps:
+ failed_steps[step_name]['first_failure'] = build_info.build_number
+
+ for step_name in failed_steps:
+ if step_name in build_info.passed_steps:
+ failed_steps[step_name]['last_pass'] = build_info.build_number
+
+ if all('last_pass' in step_info for step_info in failed_steps.values()):
+ # All failed steps passed in this build cycle.
+ return
+
+ # Arguments number differs from overridden method - pylint: disable=W0221
+ def run(self, master_name, builder_name, build_number):
+ build_info = self._ExtractBuildInfo(master_name, builder_name, build_number)
+
+ if not build_info: # pragma: no cover
+ raise pipeline.Retry('Failed to extract build info.')
+
+ failure_info = {
+ 'failed': True,
+ 'master_name': master_name,
+ 'builder_name': builder_name,
+ 'build_number': build_number
+ }
+
+ if (build_info.result == buildbot.SUCCESS or
+ not build_info.failed_steps):
+ failure_info['failed'] = False
+ return failure_info
+
+ builds = dict()
+ self._SaveBlamelistAndChromiumRevisionIntoDict(build_info, builds)
+
+ failed_steps = self._CreateADictOfFailedSteps(build_info)
+
+ self._CheckForFirstKnownFailure(
+ master_name, builder_name, build_number, failed_steps, builds)
+
+ failure_info['builds'] = builds
+ failure_info['failed_steps'] = failed_steps
+ return failure_info
« no previous file with comments | « appengine/findit/waterfall/build_failure_analysis_pipelines.py ('k') | appengine/findit/waterfall/lock_util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698