Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Side by Side Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Finally fixed the mock tests! Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 from collections import namedtuple 6
7 from google.appengine.ext import ndb
7 8
8 from common import chrome_dependency_fetcher 9 from common import chrome_dependency_fetcher
9 from crash import detect_regression_range 10 from crash import detect_regression_range
10 from crash import findit_for_crash 11 from crash.azalea import Azalea
12 from crash.changelist_classifier import ChangelistClassifier
11 from crash.chromecrash_parser import ChromeCrashParser 13 from crash.chromecrash_parser import ChromeCrashParser
12 from crash.project_classifier import ProjectClassifier
13 from crash.component_classifier import Component 14 from crash.component_classifier import Component
14 from crash.component_classifier import ComponentClassifier 15 from crash.component_classifier import ComponentClassifier
16 from crash.crash_report import CrashReport
17 from crash.culprit import NullCulprit
18 from crash.findit import Findit
19 from crash.project_classifier import ProjectClassifier
20 from crash.type_enums import CrashClient
21 from model.crash.cracas_crash_analysis import CracasCrashAnalysis
15 from model.crash.crash_config import CrashConfig 22 from model.crash.crash_config import CrashConfig
23 from model.crash.fracas_crash_analysis import FracasCrashAnalysis
16 24
17 # TODO(katesonia): Remove the default value after adding validity check to 25 # TODO(katesonia): Remove the default value after adding validity check to
18 # config. 26 # config.
19 _DEFAULT_TOP_N = 7 27 _DEFAULT_TOP_N = 7
20 28
29 # TODO(wrengr): [Note#1] in many places below we have to do some ugly
30 # defaulting in case crash_data is missing certain keys. If we had
31 # crash_data be a proper class, rather than an anonymous dict, then we
32 # could clean all this up by having the properties themselves do the check
33 # and return the default whenever keys are missing. This would also
34 # let us do things like have regression_range be automatically computed
35 # from historical_metadata (when historical_metadata is provided and
36 # regression_range is not).
21 37
22 # TODO(wrengr): move this to its own file, so it can be shared. When we do 38 class FinditForChromeCrash(Findit):
23 # so, we'll need to also pass in the 'solution' argument for the tag_dict. 39 """Find culprits for crash reports from the Chrome Crash server."""
24 class Culprit(namedtuple('Culprit',
25 ['project', 'components', 'cls', 'regression_range'])):
26 40
27 @property 41 @classmethod
28 def fields(self): 42 def _ClientID(cls): # pragma: no cover
29 return self._fields 43 if cls is FinditForChromeCrash:
44 logging.warning('FinditForChromeCrash is abstract, '
45 'but someone constructed an instance and called _ClientID')
46 else:
47 logging.warning(
48 'FinditForChromeCrash subclass %s forgot to implement _ClientID',
49 cls.__name__)
50 raise NotImplementedError()
30 51
31 # TODO(wrengr): better name for this method. 52 # TODO(http://crbug.com/659354): remove the dependency on CrashConfig
32 def ToDicts(self): 53 # entirely, by passing the relevant data as arguments to this constructor.
33 """Convert this object to a pair of anonymous dicts for JSON. 54 def __init__(self, repository, pipeline_cls):
55 super(FinditForChromeCrash, self).__init__(repository, pipeline_cls)
56 component_classifier_config = CrashConfig.Get().component_classifier
34 57
35 Returns: 58 self._stacktrace_parser = ChromeCrashParser()
36 (analysis_result_dict, tag_dict)
37 The analysis result is a dict like below:
38 {
39 # Indicate if Findit found any suspects_cls, project,
40 # components or regression_range.
41 "found": true,
42 "suspected_project": "chromium-v8", # Which project is most suspected.
43 "feedback_url": "https://.."
44 "suspected_cls": [
45 {
46 "revision": "commit-hash",
47 "url": "https://chromium.googlesource.com/chromium/src/+/...",
48 "review_url": "https://codereview.chromium.org/issue-number",
49 "project_path": "third_party/pdfium",
50 "author": "who@chromium.org",
51 "time": "2015-08-17 03:38:16",
52 "reason": "a plain string with '\n' as line break to expla..."
53 "reason": [('MinDistance', 1, 'minimum distance is 0.'),
54 ('TopFrame', 0.9, 'top frame is2nd frame.')],
55 "changed_files": [
56 {"file": "file_name1.cc",
57 "blame_url": "https://...",
58 "info": "minimum distance (LOC) 0, frame #2"},
59 {"file": "file_name2.cc",
60 "blame_url": "https://...",
61 "info": "minimum distance (LOC) 20, frame #4"},
62 ...
63 ],
64 "confidence": 0.60
65 },
66 ...,
67 ],
68 "regression_range": [ # Detected regression range.
69 "53.0.2765.0",
70 "53.0.2766.0"
71 ],
72 "suspected_components": [ # A list of crbug components to file bugs.
73 "Blink>JavaScript"
74 ]
75 }
76 59
77 The code review url might not always be available, because not all 60 # For how these two "top n" differ, see http://crbug.com/644476#c4
78 commits go through code review. In that case, commit url should 61 self._azalea = Azalea(
79 be used instead. 62 cl_classifier = ChangelistClassifier(
63 repository = repository,
64 top_n_frames = self.config.get('top_n', _DEFAULT_TOP_N)),
65 component_classifier = ComponentClassifier(
66 [Component(component_name, path_regex, function_regex)
67 for path_regex, function_regex, component_name
68 in component_classifier_config['path_function_component']],
69 component_classifier_config['top_n']),
70 project_classifier = ProjectClassifier())
80 71
81 The tag dict are allowed key/value pairs to tag the analysis result 72 def _InitializeAnalysis(self, model, crash_data):
82 for query and monitoring purpose on Findit side. For allowed keys, 73 super(FinditForChromeCrash, self)._InitializeAnalysis(model, crash_data)
83 please refer to crash_analysis.py and fracas_crash_analysis.py: 74 # TODO(wrengr): see Note#1
84 For results with normal culprit-finding algorithm: { 75 model.regression_range = crash_data.get('regression_range', None)
85 'found_suspects': True, 76 customized_data = crash_data.get('customized_data', {})
86 'has_regression_range': True, 77 model.channel = customized_data.get('channel', None)
87 'solution': 'core_algorithm', 78 model.historical_metadata = customized_data.get('historical_metadata', [])
88 }
89 For results using git blame without a regression range: {
90 'found_suspects': True,
91 'has_regression_range': False,
92 'solution': 'blame',
93 }
94 If nothing is found: {
95 'found_suspects': False,
96 }
97 """
98 cls_list = [result.ToDict() for result in self.cls]
99 79
100 # TODO(wrengr): reformulate the JSON stuff so we can drop fields which 80 # TODO(wrengr): see Note#1, which would allow us to lift this
101 # are empty; so that, in turn, we can get rid of the NullCulprit class. 81 # implementation to the Findit base class.
102 return ( 82 @ndb.transactional
103 { 83 def _NeedsNewAnalysis(self, crash_data):
104 'found': (bool(self.project) or 84 crash_identifiers = crash_data['crash_identifiers']
105 bool(self.components) or 85 historical_metadata = crash_data['customized_data']['historical_metadata']
106 bool(cls_list) or 86 model = self.GetAnalysis(crash_identifiers)
107 bool(self.regression_range)), 87 # N.B., for mocking reasons, we must not call DetectRegressionRange
108 'regression_range': self.regression_range, 88 # directly, but rather must access it indirectly through the module.
109 'suspected_project': self.project, 89 new_regression_range = detect_regression_range.DetectRegressionRange(
110 'suspected_components': self.components, 90 historical_metadata)
111 'suspected_cls': cls_list, 91 if (model and not model.failed and
112 }, 92 new_regression_range == model.regression_range):
113 { 93 logging.info('The analysis of %s has already been done.',
114 'found_suspects': bool(cls_list), 94 repr(crash_identifiers))
115 'found_project': bool(self.project), 95 return False
116 'found_components': bool(self.components), 96
117 'has_regression_range': bool(self.regression_range), 97 if not model:
118 'solution': 'core_algorithm', 98 model = self.CreateAnalysis(crash_identifiers)
119 } 99
120 ) 100 crash_data['regression_range'] = new_regression_range
101 self._InitializeAnalysis(model, crash_data)
102 model.put()
103 return True
104
105 def CheckPolicy(self, crash_data):
106 crash_identifiers = crash_data['crash_identifiers']
107 platform = crash_data['platform']
108 # TODO(wrengr): see Note#1
109 channel = crash_data.get('customized_data', {}).get('channel', None)
110 # TODO(katesonia): Remove the default value after adding validity check to
111 # config.
112 if platform not in self.config.get(
113 'supported_platform_list_by_channel', {}).get(channel, []):
114 # Bail out if either the channel or platform is not supported yet.
115 logging.info('Analysis of channel %s, platform %s is not supported. '
116 'No analysis is scheduled for %s',
117 channel, platform, repr(crash_identifiers))
118 return None
119
120 signature = crash_data['signature']
121 # TODO(wrengr): can this blacklist stuff be lifted to the base class?
122 # TODO(katesonia): Remove the default value after adding validity check to
123 # config.
124 for blacklist_marker in self.config.get('signature_blacklist_markers', []):
125 if blacklist_marker in signature:
126 logging.info('%s signature is not supported. '
127 'No analysis is scheduled for %s', blacklist_marker,
128 repr(crash_identifiers))
129 return None
130
131 # TODO(wrengr): should we clone |crash_data| rather than mutating it?
132 crash_data['platform'] = self.RenamePlatform(platform)
133 return crash_data
121 134
122 135
123 # TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit 136 # TODO(http://crbug.com/659346): we misplaced the coverage tests; find them!
124 # can, in fact, return None; or else (b) reformulate the JSON stuff so 137 class FinditForCracas(FinditForChromeCrash): # pragma: no cover
125 # that the various fields are optional, so that we can just use 138 @classmethod
126 # Culprit('', [], [], None) directly. 139 def _ClientID(cls):
127 class NullCulprit(object): 140 return CrashClient.CRACAS
128 """A helper class so FindCulprit doesn't return None.
129 141
130 This class is analogous to Culprit(None, [], [], None), except that the 142 def CreateAnalysis(self, crash_identifiers):
131 result of the ToDicts method is more minimalistic.""" 143 # TODO: inline CracasCrashAnalysis.Create stuff here.
144 return CracasCrashAnalysis.Create(crash_identifiers)
132 145
133 def ToDicts(self): 146 def GetAnalysis(self, crash_identifiers):
134 return ( 147 # TODO: inline CracasCrashAnalysis.Get stuff here.
135 {'found': False}, 148 return CracasCrashAnalysis.Get(crash_identifiers)
136 {'found_suspects': False,
137 'has_regression_range': False}
138 )
139 149
140 150
141 # TODO(wrengr): better name for this class. Given the "findit_for_*.py" 151 class FinditForFracas(FinditForChromeCrash):
142 # file names, one might suspect that each of those files implements 152 @classmethod
143 # something analogous to this file, and hence we could make a superclass 153 def _ClientID(cls):
144 # to factor out the common bits. However, in truth, all those files 154 return CrashClient.FRACAS
145 # are unrelated and do completely different things.
146 class FinditForChromeCrash(object):
147 """Process crashes from Chrome crash server and find culprits for them.
148 155
149 Even though this class has only one method, it is helpful because it 156 def CreateAnalysis(self, crash_identifiers):
150 allows us to cache things which should outlive each call to that method. 157 # TODO: inline FracasCrashAnalysis.Create stuff here.
151 For example, we store a single ComponentClassifier object so that we 158 return FracasCrashAnalysis.Create(crash_identifiers)
152 only compile the regexes for each Component object once, rather than
153 doing so on each call to FindCulprit. In addition, the class lets
154 us cache various configuration options so that we need not depend
155 on CrashConfig; thereby decoupling the analysis itself from UX concerns
156 about deciding how to run those analyses.
157 """
158 # TODO(wrengr): remove the dependency on CrashConfig entirely, by
159 # passing the relevant data as arguments to this constructor.
160 def __init__(self, repository):
161 crash_config = CrashConfig.Get()
162 component_classifier_config = crash_config.component_classifier
163 159
164 # TODO(wrengr): why are these two different? 160 def GetAnalysis(self, crash_identifiers):
165 component_classifier_top_n = component_classifier_config['top_n'] 161 # TODO: inline FracasCrashAnalysis.Get stuff here.
166 self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N) 162 return FracasCrashAnalysis.Get(crash_identifiers)
167
168 self.component_classifier = ComponentClassifier(
169 [Component(component_name, path_regex, function_regex)
170 for path_regex, function_regex, component_name
171 in component_classifier_config['path_function_component']],
172 component_classifier_top_n)
173
174 # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.
175 self.project_classifier = ProjectClassifier()
176 # TODO(katesonia): Lift this to Findit superclass after refatoring cl
177 # committed.
178 self.repository = repository
179 self.dep_fetcher = chrome_dependency_fetcher.ChromeDependencyFetcher(
180 repository)
181
182 # TODO(wrengr): since this is the only method of interest, it would
183 # be better IMO to rename it to __call__ to reduce verbosity.
184 def FindCulprit(self, signature, platform, stack_trace, crashed_version,
185 regression_range):
186 """Finds culprits for a Chrome crash.
187
188 Args:
189 signature (str): The signature of a crash on the Chrome crash server.
190 platform (str): The platform name, could be 'win', 'mac', 'linux',
191 'android', 'ios', etc.
192 stack_trace (str): A string containing the stack trace of a crash.
193 crashed_version (str): The version of Chrome in which the crash occurred.
194 regression_range (list or None): [good_version, bad_revision] or None.
195
196 Returns:
197 A Culprit object.
198 """
199 crash_deps = self.dep_fetcher.GetDependency(crashed_version, platform)
200 stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)
201 if not stacktrace:
202 logging.warning('Failed to parse the stacktrace %s', stack_trace)
203 # TODO(wrengr): refactor things so we don't need the NullCulprit class.
204 return NullCulprit()
205
206 # Get regression deps and crash deps.
207 regression_deps_rolls = {}
208 if regression_range:
209 last_good_version, first_bad_version = regression_range
210 logging.info('Find regression range %s:%s', last_good_version,
211 first_bad_version)
212 regression_deps_rolls = self.dep_fetcher.GetDependencyRollsDict(
213 last_good_version, first_bad_version, platform)
214
215 suspected_cls = findit_for_crash.FindItForCrash(
216 stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n,
217 self.repository)
218
219 crash_stack = stacktrace.crash_stack
220 suspected_project = self.project_classifier.Classify(
221 suspected_cls, crash_stack)
222
223 suspected_components = self.component_classifier.Classify(
224 suspected_cls, crash_stack)
225
226 return Culprit(suspected_project, suspected_components, suspected_cls,
227 regression_range)
OLDNEW
« no previous file with comments | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698