Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2663063007: [Predator] Switch from anonymous dict to CrashData. (Closed)
Patch Set: Rebase and fix delta test. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import copy 5 import copy
6 import json 6 import json
7 import logging 7 import logging
8 8
9 from google.appengine.ext import ndb 9 from google.appengine.ext import ndb
10 10
11 from common import appengine_util 11 from common import appengine_util
12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
13 from common import constants 13 from common import constants
14 from crash.component import Component 14 from crash.component import Component
15 from crash.component_classifier import ComponentClassifier 15 from crash.component_classifier import ComponentClassifier
16 from crash.crash_report import CrashReport
17 from crash.project import Project 16 from crash.project import Project
18 from crash.project_classifier import ProjectClassifier 17 from crash.project_classifier import ProjectClassifier
19 from libs import time_util 18 from libs import time_util
20 from model import analysis_status 19 from model import analysis_status
21 from model.crash.crash_config import CrashConfig
22 20
23 21
24 # TODO(http://crbug.com/659346): since most of our unit tests are 22 # TODO(http://crbug.com/659346): since most of our unit tests are
25 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. 23 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.
26 # However, now we're missing coverage for most of this file (due to the 24 # However, now we're missing coverage for most of this file (due to the
27 # buggy way coverage is computed). Need to add a bunch of new unittests 25 # buggy way coverage is computed). Need to add a bunch of new unittests
28 # to get coverage back up. 26 # to get coverage back up.
29 27
30 # TODO: this class depends on ndb stuff, and should therefore move to 28 # TODO: this class depends on ndb stuff, and should therefore move to
31 # cr-culprit-finder/service/predator as part of the big reorganization. 29 # cr-culprit-finder/service/predator as part of the big reorganization.
32 # This class should be renamed to avoid confustion between Findit and Predator. 30 # This class should be renamed to avoid confustion between Findit and Predator.
33 # Think of a good name (e.g.'PredatorApp') for this class. 31 # Think of a good name (e.g.'PredatorApp') for this class.
34 class Findit(object): 32 class Findit(object):
35 33
36 def __init__(self, get_repository, config): 34 def __init__(self, get_repository, config):
37 """ 35 """
38 Args: 36 Args:
39 get_repository (callable): a function from DEP urls to ``Repository`` 37 get_repository (callable): a function from DEP urls to ``Repository``
40 objects, so we can get changelogs and blame for each dep. Notably, 38 objects, so we can get changelogs and blame for each dep. Notably,
41 to keep the code here generic, we make no assumptions about 39 to keep the code here generic, we make no assumptions about
42 which subclass of ``Repository`` this function returns. Thus, 40 which subclass of ``Repository`` this function returns. Thus,
43 it is up to the caller to decide what class to return and handle 41 it is up to the caller to decide what class to return and handle
44 any other arguments that class may require (e.g., an http client 42 any other arguments that class may require (e.g., an http client
45 for ``GitilesRepository``). 43 for ``GitilesRepository``).
46 config (ndb.CrashConfig): Config for clients and project and component 44 config (ndb.CrashConfig): Config for clients and project and component
47 classifiers. 45 classifiers.
48 """ 46 """
49 self._get_repository = get_repository 47 self._get_repository = get_repository
50 self._dep_fetcher = ChromeDependencyFetcher(get_repository)
51 48
52 # The top_n is the number of frames we want to check to get project 49 # The top_n is the number of frames we want to check to get project
53 # classifications. 50 # classifications.
54 projects = [Project(name, path_regexs, function_regexs, host_directories) 51 projects = [Project(name, path_regexs, function_regexs, host_directories)
55 for name, path_regexs, function_regexs, host_directories 52 for name, path_regexs, function_regexs, host_directories
56 in config.project_classifier['project_path_function_hosts']] 53 in config.project_classifier['project_path_function_hosts']]
57 self._project_classifier = ProjectClassifier( 54 self._project_classifier = ProjectClassifier(
58 projects, config.project_classifier['top_n'], 55 projects, config.project_classifier['top_n'],
59 config.project_classifier['non_chromium_project_rank_priority']) 56 config.project_classifier['non_chromium_project_rank_priority'])
60 57
58 # The top_n is the number of frames we want to check to get component
59 # classifications.
61 components = [Component(component_name, path_regex, function_regex) 60 components = [Component(component_name, path_regex, function_regex)
62 for path_regex, function_regex, component_name 61 for path_regex, function_regex, component_name
63 in config.component_classifier['path_function_component']] 62 in config.component_classifier['path_function_component']]
64 self._component_classifier = ComponentClassifier( 63 self._component_classifier = ComponentClassifier(
65 components, config.component_classifier['top_n']) 64 components, config.component_classifier['top_n'])
66 65
67 self._config = config 66 self._config = config
68 67
69 # TODO(http://crbug.com/659354): because self.client is volatile,
70 # we need some way of updating the Predator instance whenever the
71 # config changes. How to do that cleanly?
72 self._predator = None
73 self._stacktrace_parser = None
74
75 # This is a class method because it should be the same for all 68 # This is a class method because it should be the same for all
76 # instances of this class. We can in fact call class methods on 69 # instances of this class. We can in fact call class methods on
77 # instances (not just on the class itself), so we could in principle 70 # instances (not just on the class itself), so we could in principle
78 # get by with just this method. However, a @classmethod is treated 71 # get by with just this method. However, a @classmethod is treated
79 # syntactically like a method, thus we'd need to have the ``()`` at the 72 # syntactically like a method, thus we'd need to have the ``()`` at the
80 # end, unlike for a @property. Thus we have both the class method and 73 # end, unlike for a @property. Thus we have both the class method and
81 # the property, in order to simulate a class property. 74 # the property, in order to simulate a class property.
82 @classmethod 75 @classmethod
83 def _ClientID(cls): # pragma: no cover 76 def _ClientID(cls): # pragma: no cover
84 """Get the client id for this class. 77 """Get the client id for this class.
85 78
86 This class method is private. Unless you really need to access 79 This class method is private. Unless you really need to access
87 this method directly for some reason, you should use the ``client_id`` 80 this method directly for some reason, you should use the ``client_id``
88 property instead. 81 property instead.
89 82
90 Returns: 83 Returns:
91 A string which is member of the CrashClient enumeration. 84 A string which is member of the CrashClient enumeration.
92 """ 85 """
93 if cls is Findit: 86 if cls is Findit:
94 logging.warning('Findit is abstract, ' 87 logging.warning('Findit is abstract, '
95 'but someone constructed an instance and called _ClientID') 88 'but someone constructed an instance and called _ClientID')
96 else: 89 else:
97 logging.warning('Findit subclass %s forgot to implement _ClientID', 90 logging.warning('Findit subclass %s forgot to implement _ClientID',
98 cls.__name__) 91 cls.__name__)
99 raise NotImplementedError() 92 raise NotImplementedError()
100 93
94 def _Predator(self):
95 raise NotImplementedError()
96
101 @property 97 @property
102 def client_id(self): 98 def client_id(self):
103 """Get the client id from the class of this object. 99 """Get the client id from the class of this object.
104 100
105 N.B., this property is static and should not be overridden.""" 101 N.B., this property is static and should not be overridden."""
106 return self._ClientID() 102 return self._ClientID()
107 103
108 # TODO(http://crbug.com/659354): can we remove the dependency on
109 # CrashConfig entirely? It'd be better to receive method calls
110 # whenever things change, so that we know the change happened (and
111 # what in particular changed) so that we can update our internal state
112 # as appropriate.
113 @property 104 @property
114 def client_config(self): 105 def client_config(self):
115 """Get the current value of the client config for the class of this object. 106 """Get the current value of the client config for the class of this object.
116 107
117 N.B., this property is volatile and may change asynchronously. 108 N.B., this property is volatile and may change asynchronously.
118 109
119 If the event of an error this method will return ``None``. That we do 110 If the event of an error this method will return ``None``. That we do
120 not return the empty dict is intentional. All of the circumstances 111 not return the empty dict is intentional. All of the circumstances
121 which would lead to this method returning ``None`` indicate 112 which would lead to this method returning ``None`` indicate
122 underlying bugs in code elsewhere. (E.g., creating instances of 113 underlying bugs in code elsewhere. (E.g., creating instances of
123 abstract classes, implementing a subclass which is not suppported by 114 abstract classes, implementing a subclass which is not suppported by
124 ``CrashConfig``, etc.) Because we return ``None``, any subsequent 115 ``CrashConfig``, etc.) Because we return ``None``, any subsequent
125 calls to ``__getitem__`` or ``get`` will raise method-missing errors; 116 calls to ``__getitem__`` or ``get`` will raise method-missing errors;
126 which serve to highlight the underlying bug. Whereas, if we silently 117 which serve to highlight the underlying bug. Whereas, if we silently
127 returned the empty dict, then calls to ``get`` would "work" just 118 returned the empty dict, then calls to ``get`` would "work" just
128 fine; thereby masking the underlying bug! 119 fine; thereby masking the underlying bug!
129 """ 120 """
130 return self._config.GetClientConfig(self.client_id) 121 return self._config.GetClientConfig(self.client_id)
131 122
132 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or 123 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or
133 # something like that. 124 # something like that.
134 def RenamePlatform(self, platform): 125 def RenamePlatform(self, platform):
135 """Remap the platform to a different one, based on the config.""" 126 """Remap the platform to a different one, based on the config."""
136 # TODO(katesonia): Remove the default value after adding validity check to 127 return self.client_config['platform_rename'].get(platform, platform)
137 # config.
138 return self.client_config.get('platform_rename', {}).get(platform, platform)
139 128
140 def CheckPolicy(self, crash_data): # pylint: disable=W0613 129 # TODO(http://crbug.com/644476): rename this to something like
141 """Check whether this client supports analyzing the given report. 130 # _NewAnalysis, since it only does the "allocation" and needs to/will
131 # be followed up with _InitializeAnalysis anyways.
132 def CreateAnalysis(self, crash_identifiers): # pragma: no cover
133 raise NotImplementedError()
134
135 def GetAnalysis(self, crash_identifiers): # pragma: no cover
136 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists.
137
138 Args:
139 crash_identifiers (JSON): key, value pairs to uniquely identify a crash.
140
141 Returns:
142 If a CrashAnalysis ndb.Model already exists for the
143 ``crash_identifiers``, then we return it. Otherwise, returns None.
144 """
145 raise NotImplementedError()
146
147 def GetCrashData(self, raw_crash_data): # pragma: no cover
148 """Gets ``CrashData`` object for raw json crash data from clients.
149
150 Args:
151 crash_data (JSON): Json input message from clients.
152
153 Returns:
154 Parsed ``CrashData`` object.
155 """
156 raise NotImplementedError()
157
158 def _CheckPolicy(self, crash_data): # pylint: disable=W0613
159 """Checks whether this client supports analyzing the given report.
142 160
143 Some clients only support analysis for crashes on certain platforms 161 Some clients only support analysis for crashes on certain platforms
144 or channels, etc. This method checks to see whether this client can 162 or channels, etc. This method checks to see whether this client can
145 analyze the given crash. The default implementation on the Findit 163 analyze the given crash. The default implementation on the Findit
146 base class returns None for everything, so that unsupported clients 164 base class returns None for everything, so that unsupported clients
147 reject everything, as expected. 165 reject everything, as expected.
148 166
149 Args: 167 Args:
150 crash_data (JSON): ?? 168 crash_data (CrashData): Parsed crash data from clients.
151 169
152 Returns: 170 Returns:
153 If satisfied, we return the ``crash_data`` which may have had some 171 Boolean to indicate whether the crash passed the policy check.
154 fields modified. Otherwise returns None.
155 """ 172 """
156 return None 173 if not self.client_config:
174 logging.info('No configuration of client %s, analysis is not supported',
175 self.client_id)
176 return True
157 177
158 # TODO(http://crbug.com/644476): rename this to something like 178 for blacklist_marker in self.client_config['signature_blacklist_markers']:
159 # _NewAnalysis, since it only does the "allocation" and needs to/will 179 if blacklist_marker in crash_data.signature:
160 # be followed up with _InitializeAnalysis anyways. 180 logging.info('%s signature is not supported.', blacklist_marker)
161 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613 181 return False
162 return None
163 182
164 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613 183 return True
165 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists. 184
185 @ndb.transactional
186 def NeedsNewAnalysis(self, crash_data):
187 """Checks if the crash needs new anlysis.
188
189 If a new analysis needs to be scheduled, initialize a ``CrashAnalysis``
190 model using crash_data and put it in datastore for later culprit
191 analyzing.
166 192
167 Args: 193 Args:
168 crash_identifiers (JSON): ?? 194 crash_data (CrashData): Parsed crash data that contains all the
195 information about the crash.
169 196
170 Returns: 197 Returns:
171 If a CrashAnalysis ndb.Model already exists for the 198 Boolean of whether a new analysis needs to be scheduled.
172 ``crash_identifiers``, then we return it. Otherwise, returns None.
173 """ 199 """
174 return None 200 # Check policy and modify the raw_crash_data as needed.
201 if not self._CheckPolicy(crash_data):
202 logging.info('The analysis of %s is not supported, task will not be '
203 'scheduled.', str(crash_data.identifiers))
204 return False
175 205
176 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit. 206 # Rename platform if configured.
177 # TODO(http://crbug.com/659346): coverage tests for this class, not 207 crash_data.platform = self.RenamePlatform(crash_data.platform)
178 # just for FinditForFracas.
179 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover
180 """(Re)Initialize a CrashAnalysis ndb.Model, but do not ``put()`` it yet.
181 208
182 This method is only ever called from _NeedsNewAnalysis which is only 209 model = self.GetAnalysis(crash_data.identifiers)
183 ever called from ScheduleNewAnalysis. It is used for filling in the 210 if (model and not model.failed and
184 fields of a CrashAnalysis ndb.Model for the first time (though it 211 crash_data.regression_range == (tuple(model.regression_range)
185 can also be used to re-initialize a given CrashAnalysis). Subclasses 212 if model.regression_range else None)):
186 should extend (not override) this to (re)initialize any 213 logging.info('The analysis of %s has already been done.',
187 client-specific fields they may have.""" 214 repr(crash_data.identifiers))
188 # Get rid of any previous values there may have been. 215 return False
189 model.Reset()
190 216
191 # Set the version. 217 model = model or self.CreateAnalysis(crash_data.identifiers)
192 # ``handlers.crash.test.crash_handler_test.testAnalysisScheduled`` 218 model.Initialize(crash_data)
193 # provides and expects this field to be called 'chrome_version', 219 model.put()
194 # whereas everyone else (e.g., in ``crash.test.crash_pipeline_test`` 220 return True
195 # the tests ``testAnalysisNeededIfNoAnalysisYet``,
196 # ``testRunningAnalysisNoSuspectsFound``, ``testRunningAnalysis``,
197 # ``testAnalysisNeededIfLastOneFailed``,
198 # ``testRunningAnalysisWithSuspectsCls``) expects it to be called
199 # 'crashed_version'. The latter is the better/more general name,
200 # so the former needs to be changed in order to get rid of this
201 # defaulting ugliness.
202 model.crashed_version = crash_data.get('crashed_version',
203 crash_data.get('chrome_version', None))
204 221
205 # Set (other) common properties. 222 def ProcessResultForPublishing(self, result, key): # pragma: no cover
206 model.stack_trace = crash_data['stack_trace']
207 model.signature = crash_data['signature']
208 model.platform = crash_data['platform']
209 # TODO(wrengr): The only reason to have _InitializeAnalysis as a
210 # method of the Findit class rather than as a method on CrashAnalysis
211 # is so we can assert that crash_data['client_id'] == self.client_id.
212 # So, either we should do that, or else we should move this to be
213 # a method on CrashAnalysis.
214 model.client_id = self.client_id
215 model.regression_range = crash_data.get('regression_range', None)
216
217 # Set progress properties.
218 model.status = analysis_status.PENDING
219 model.requested_time = time_util.GetUTCNow()
220
221 @ndb.transactional
222 def _NeedsNewAnalysis(self, crash_data):
223 raise NotImplementedError()
224
225 # TODO(wrengr): does the parser actually need the version, signature,
226 # and platform? If not, then we should be able to just pass the string
227 # to be parsed (which would make a lot more sense than passing the
228 # whole model).
229 # TODO(http://crbug.com/659346): coverage tests for this class, not
230 # just for FinditForFracas.
231 def ParseStacktrace(self, model): # pragma: no cover
232 """Parse a CrashAnalysis's ``stack_trace`` string into a Stacktrace object.
233
234 Args:
235 model (CrashAnalysis): The model containing the stack_trace string
236 to be parsed.
237
238 Returns:
239 On success, returns a Stacktrace object; on failure, returns None.
240 """
241 # Use up-to-date ``top_n`` in self.client_config to filter top n frames.
242 stacktrace = self._stacktrace_parser.Parse(
243 model.stack_trace,
244 self._dep_fetcher.GetDependency(model.crashed_version, model.platform),
245 model.signature,
246 self.client_config.get('top_n'))
247 if not stacktrace:
248 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)
249 return None
250
251 return stacktrace
252
253 def ProcessResultForPublishing(self, result, key):
254 """Client specific processing of result data for publishing.""" 223 """Client specific processing of result data for publishing."""
255 raise NotImplementedError() 224 raise NotImplementedError()
256 225
257 def GetPublishableResult(self, crash_identifiers, analysis): 226 def GetPublishableResult(self, crash_identifiers, analysis):
258 """Convert a culprit result into a publishable result for client. 227 """Converts a culprit result into a publishable result for client.
259 228
260 Note, this function must be called by a concrete subclass of CrashAnalysis 229 Note, this function must be called by a concrete subclass of CrashAnalysis
261 which implements the ProcessResultForPublishing method. 230 which implements the ProcessResultForPublishing method.
262 231
263 Args: 232 Args:
264 crash_identifiers (dict): Dict containing identifiers that can uniquely 233 crash_identifiers (dict): Dict containing identifiers that can uniquely
265 identify CrashAnalysis entity. 234 identify CrashAnalysis entity.
266 analysis (CrashAnalysis model): Model containing culprit result and other 235 analysis (CrashAnalysis model): Model containing culprit result and other
267 analysis information. 236 analysis information.
268 237
(...skipping 15 matching lines...) Expand all
284 'client_id': self.client_id, 253 'client_id': self.client_id,
285 'result': result, 254 'result': result,
286 } 255 }
287 256
288 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``; 257 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``;
289 # we should be able to adjust things so that we only need to take in 258 # we should be able to adjust things so that we only need to take in
290 # ``crash_identifiers``, or a CrashReport, rather than taking in the 259 # ``crash_identifiers``, or a CrashReport, rather than taking in the
291 # whole model. And/or, we should just inline this there. 260 # whole model. And/or, we should just inline this there.
292 # TODO(http://crbug.com/659346): coverage tests for this class, not 261 # TODO(http://crbug.com/659346): coverage tests for this class, not
293 # just for FinditForFracas. 262 # just for FinditForFracas.
294 def FindCulprit(self, model): # pragma: no cover 263 def FindCulprit(self, crash_report): # pragma: no cover
295 """Given a CrashAnalysis ndb.Model, return a Culprit.""" 264 """Given a ``CrashReport``, returns a ``Culprit``."""
296 stacktrace = self.ParseStacktrace(model) 265 return self._Predator().FindCulprit(crash_report)
297 if stacktrace is None:
298 return None
299
300 return self._predator.FindCulprit(CrashReport(
301 crashed_version = model.crashed_version,
302 signature = model.signature,
303 platform = model.platform,
304 stacktrace = stacktrace,
305 regression_range = model.regression_range))
OLDNEW
« no previous file with comments | « appengine/findit/crash/crash_report_with_dependencies.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698