Chromium Code Reviews| Index: appengine/findit/waterfall/build_failure_analysis.py |
| diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py |
| index f3234d000b454bd4dc06112cb37baf175635f17e..1d461065dc91569e55ee6e576a49f31b92951ba9 100644 |
| --- a/appengine/findit/waterfall/build_failure_analysis.py |
| +++ b/appengine/findit/waterfall/build_failure_analysis.py |
| @@ -2,13 +2,15 @@ |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| -import collections |
| +from collections import defaultdict |
| import os |
| import re |
| from common.diff import ChangeType |
| from common.git_repository import GitRepository |
| from common.http_client_appengine import HttpClientAppengine as HttpClient |
| +from model import analysis_approach_type |
| +from waterfall import suspected_cl_util |
| from waterfall import waterfall_config |
| from waterfall.failure_signal import FailureSignal |
| @@ -233,7 +235,7 @@ class _Justification(object): |
| def __init__(self): |
| self._score = 0 |
| - self._hints = collections.defaultdict(int) |
| + self._hints = defaultdict(int) |
| @property |
| def score(self): |
| @@ -565,7 +567,7 @@ def _CheckFiles(failure_signal, change_log, deps_info): |
| failure; otherwise None. |
| """ |
| # Use a dict to map each file name of the touched files to their occurrences. |
| - file_name_occurrences = collections.defaultdict(int) |
| + file_name_occurrences = defaultdict(int) |
| for touched_file in change_log['touched_files']: |
| change_type = touched_file['change_type'] |
| if (change_type in (ChangeType.ADD, ChangeType.COPY, |
| @@ -600,6 +602,45 @@ def _CheckFiles(failure_signal, change_log, deps_info): |
| return justification.ToDict() |
| +class _CLInfo(object): |
|
lijeffrey
2016/09/12 22:55:11
So how about making this depend on your other cl h
chanli
2016/09/16 21:24:19
Based on our discussion offline, I'll leave it as
|
| + """A object of information we need for a suspected CL. |
| + |
| + The information is specific to current build. |
| + """ |
| + def __init__(self): |
| + self.failures = defaultdict(list) |
| + self.top_score = 0 |
| + |
| + @property |
| + def not_none_failures(self): |
|
stgao
2016/09/15 17:35:12
What does this mean?
chanli
2016/09/16 21:24:19
For compile and other non-swarming steps, we will
stgao
2016/09/23 18:30:21
Why we will have None in the list? Can we avoid th
chanli
2016/09/24 00:04:59
Since I'm using defaultdict here, I need to make s
|
| + failures = { |
| + key: [t for t in self.failures[key] if t] for key in self.failures |
| + } |
| + return failures |
| + |
| + |
| +def _SaveFailureToMap( |
| + cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score): |
| + """Saves a failure's info to the cl that caused it.""" |
| + cl_key = ( |
| + new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'], |
| + new_suspected_cl_dict['commit_position']) |
| + |
| + cl_failure_map[cl_key].failures[step_name].append(test_name) |
| + cl_failure_map[cl_key].top_score = top_score |
|
stgao
2016/09/15 17:35:12
What if the original score is higher than this one
chanli
2016/09/16 21:24:19
The real problem should be what if the original sc
stgao
2016/09/23 18:30:21
Add a comment for that?
chanli
2016/09/24 00:04:59
Done.
|
| + |
| + |
| +def _SaveSuspectedCLs( |
| + cl_failure_map, master_name, builder_name, build_number, failure_type): |
| + """Saves suspected CLs to dataStore.""" |
| + for cl_key, cl_object in cl_failure_map.iteritems(): |
| + repo_name, revision, commit_position = cl_key |
| + suspected_cl_util.UpdateSuspectedCL( |
| + repo_name, revision, commit_position, analysis_approach_type.HEURISTIC, |
| + master_name, builder_name, build_number, failure_type, |
| + cl_object.not_none_failures, cl_object.top_score) |
| + |
| + |
| def AnalyzeBuildFailure( |
| failure_info, change_logs, deps_info, failure_signals): |
| """Analyze the given failure signals, and figure out culprit CLs. |
| @@ -665,7 +706,9 @@ def AnalyzeBuildFailure( |
| failed_steps = failure_info['failed_steps'] |
| builds = failure_info['builds'] |
| - master_name = failure_info.get('master_name') |
| + master_name = failure_info['master_name'] |
| + |
| + cl_failure_map = defaultdict(_CLInfo) |
| for step_name, step_failure_info in failed_steps.iteritems(): |
| is_test_level = step_failure_info.get('tests') is not None |
| @@ -716,6 +759,10 @@ def AnalyzeBuildFailure( |
| test_analysis_result['suspected_cls'].append( |
| new_suspected_cl_dict) |
| + _SaveFailureToMap( |
|
stgao
2016/09/15 17:35:12
I'm wondering if it would be possible and cleaner
chanli
2016/09/16 21:24:19
I actually do datastore operations in a patch here
|
| + cl_failure_map, new_suspected_cl_dict, step_name, test_name, |
| + max(justification_dict.values())) |
| + |
| # Checks Files on step level using step level signals |
| # regardless of test level signals so we can make sure |
| # no duplicate justifications added to the step result. |
| @@ -726,11 +773,21 @@ def AnalyzeBuildFailure( |
| if not justification_dict: |
| continue |
| - step_analysis_result['suspected_cls'].append( |
| - CreateCLInfoDict(justification_dict, build_number, |
| - change_logs[revision])) |
| + new_suspected_cl_dict = CreateCLInfoDict( |
| + justification_dict, build_number, change_logs[revision]) |
| + step_analysis_result['suspected_cls'].append(new_suspected_cl_dict) |
| + |
| + if not is_test_level: |
| + _SaveFailureToMap( |
| + cl_failure_map, new_suspected_cl_dict, step_name, None, |
| + max(justification_dict.values())) |
| # TODO(stgao): sort CLs by score. |
| analysis_result['failures'].append(step_analysis_result) |
| + # Save suspected_cls to data_store. |
| + _SaveSuspectedCLs( |
| + cl_failure_map, failure_info['master_name'], failure_info['builder_name'], |
| + failure_info['build_number'], failure_info['failure_type']) |
| + |
| return analysis_result |