Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1851)

Unified Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model
Patch Set: Self review. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/waterfall/build_failure_analysis.py
diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py
index f3234d000b454bd4dc06112cb37baf175635f17e..1d461065dc91569e55ee6e576a49f31b92951ba9 100644
--- a/appengine/findit/waterfall/build_failure_analysis.py
+++ b/appengine/findit/waterfall/build_failure_analysis.py
@@ -2,13 +2,15 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-import collections
+from collections import defaultdict
import os
import re
from common.diff import ChangeType
from common.git_repository import GitRepository
from common.http_client_appengine import HttpClientAppengine as HttpClient
+from model import analysis_approach_type
+from waterfall import suspected_cl_util
from waterfall import waterfall_config
from waterfall.failure_signal import FailureSignal
@@ -233,7 +235,7 @@ class _Justification(object):
def __init__(self):
self._score = 0
- self._hints = collections.defaultdict(int)
+ self._hints = defaultdict(int)
@property
def score(self):
@@ -565,7 +567,7 @@ def _CheckFiles(failure_signal, change_log, deps_info):
failure; otherwise None.
"""
# Use a dict to map each file name of the touched files to their occurrences.
- file_name_occurrences = collections.defaultdict(int)
+ file_name_occurrences = defaultdict(int)
for touched_file in change_log['touched_files']:
change_type = touched_file['change_type']
if (change_type in (ChangeType.ADD, ChangeType.COPY,
@@ -600,6 +602,45 @@ def _CheckFiles(failure_signal, change_log, deps_info):
return justification.ToDict()
+class _CLInfo(object):
lijeffrey 2016/09/12 22:55:11 So how about making this depend on your other cl h
chanli 2016/09/16 21:24:19 Based on our discussion offline, I'll leave it as
+ """A object of information we need for a suspected CL.
+
+ The information is specific to current build.
+ """
+ def __init__(self):
+ self.failures = defaultdict(list)
+ self.top_score = 0
+
+ @property
+ def not_none_failures(self):
stgao 2016/09/15 17:35:12 What does this mean?
chanli 2016/09/16 21:24:19 For compile and other non-swarming steps, we will
stgao 2016/09/23 18:30:21 Why we will have None in the list? Can we avoid th
chanli 2016/09/24 00:04:59 Since I'm using defaultdict here, I need to make s
+ failures = {
+ key: [t for t in self.failures[key] if t] for key in self.failures
+ }
+ return failures
+
+
+def _SaveFailureToMap(
+ cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):
+ """Saves a failure's info to the cl that caused it."""
+ cl_key = (
+ new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],
+ new_suspected_cl_dict['commit_position'])
+
+ cl_failure_map[cl_key].failures[step_name].append(test_name)
+ cl_failure_map[cl_key].top_score = top_score
stgao 2016/09/15 17:35:12 What if the original score is higher than this one
chanli 2016/09/16 21:24:19 The real problem should be what if the original sc
stgao 2016/09/23 18:30:21 Add a comment for that?
chanli 2016/09/24 00:04:59 Done.
+
+
+def _SaveSuspectedCLs(
+ cl_failure_map, master_name, builder_name, build_number, failure_type):
+ """Saves suspected CLs to dataStore."""
+ for cl_key, cl_object in cl_failure_map.iteritems():
+ repo_name, revision, commit_position = cl_key
+ suspected_cl_util.UpdateSuspectedCL(
+ repo_name, revision, commit_position, analysis_approach_type.HEURISTIC,
+ master_name, builder_name, build_number, failure_type,
+ cl_object.not_none_failures, cl_object.top_score)
+
+
def AnalyzeBuildFailure(
failure_info, change_logs, deps_info, failure_signals):
"""Analyze the given failure signals, and figure out culprit CLs.
@@ -665,7 +706,9 @@ def AnalyzeBuildFailure(
failed_steps = failure_info['failed_steps']
builds = failure_info['builds']
- master_name = failure_info.get('master_name')
+ master_name = failure_info['master_name']
+
+ cl_failure_map = defaultdict(_CLInfo)
for step_name, step_failure_info in failed_steps.iteritems():
is_test_level = step_failure_info.get('tests') is not None
@@ -716,6 +759,10 @@ def AnalyzeBuildFailure(
test_analysis_result['suspected_cls'].append(
new_suspected_cl_dict)
+ _SaveFailureToMap(
stgao 2016/09/15 17:35:12 I'm wondering if it would be possible and cleaner
chanli 2016/09/16 21:24:19 I actually do datastore operations in a patch here
+ cl_failure_map, new_suspected_cl_dict, step_name, test_name,
+ max(justification_dict.values()))
+
# Checks Files on step level using step level signals
# regardless of test level signals so we can make sure
# no duplicate justifications added to the step result.
@@ -726,11 +773,21 @@ def AnalyzeBuildFailure(
if not justification_dict:
continue
- step_analysis_result['suspected_cls'].append(
- CreateCLInfoDict(justification_dict, build_number,
- change_logs[revision]))
+ new_suspected_cl_dict = CreateCLInfoDict(
+ justification_dict, build_number, change_logs[revision])
+ step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)
+
+ if not is_test_level:
+ _SaveFailureToMap(
+ cl_failure_map, new_suspected_cl_dict, step_name, None,
+ max(justification_dict.values()))
# TODO(stgao): sort CLs by score.
analysis_result['failures'].append(step_analysis_result)
+ # Save suspected_cls to data_store.
+ _SaveSuspectedCLs(
+ cl_failure_map, failure_info['master_name'], failure_info['builder_name'],
+ failure_info['build_number'], failure_info['failure_type'])
+
return analysis_result

Powered by Google App Engine
This is Rietveld 408576698