Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import copy | 5 import copy |
| 6 import json | 6 import json |
| 7 import logging | 7 import logging |
| 8 | 8 |
| 9 from google.appengine.ext import ndb | 9 from google.appengine.ext import ndb |
| 10 | 10 |
| 11 from common import appengine_util | 11 from common import appengine_util |
| 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 13 from common import constants | 13 from common import constants |
| 14 from crash.component import Component | 14 from crash.component import Component |
| 15 from crash.component_classifier import ComponentClassifier | 15 from crash.component_classifier import ComponentClassifier |
| 16 from crash.crash_report import CrashReport | |
| 17 from crash.project import Project | 16 from crash.project import Project |
| 18 from crash.project_classifier import ProjectClassifier | 17 from crash.project_classifier import ProjectClassifier |
| 19 from libs import time_util | 18 from libs import time_util |
| 20 from model import analysis_status | 19 from model import analysis_status |
| 21 from model.crash.crash_config import CrashConfig | |
| 22 | 20 |
| 23 | 21 |
| 24 # TODO(http://crbug.com/659346): since most of our unit tests are | 22 # TODO(http://crbug.com/659346): since most of our unit tests are |
| 25 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. | 23 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. |
| 26 # However, now we're missing coverage for most of this file (due to the | 24 # However, now we're missing coverage for most of this file (due to the |
| 27 # buggy way coverage is computed). Need to add a bunch of new unittests | 25 # buggy way coverage is computed). Need to add a bunch of new unittests |
| 28 # to get coverage back up. | 26 # to get coverage back up. |
| 29 | 27 |
| 30 # TODO: this class depends on ndb stuff, and should therefore move to | 28 # TODO: this class depends on ndb stuff, and should therefore move to |
| 31 # cr-culprit-finder/service/predator as part of the big reorganization. | 29 # cr-culprit-finder/service/predator as part of the big reorganization. |
| 32 # This class should be renamed to avoid confustion between Findit and Predator. | 30 # This class should be renamed to avoid confustion between Findit and Predator. |
| 33 # Think of a good name (e.g.'PredatorApp') for this class. | 31 # Think of a good name (e.g.'PredatorApp') for this class. |
| 34 class Findit(object): | 32 class Findit(object): |
| 35 | 33 |
| 36 def __init__(self, get_repository, config): | 34 def __init__(self, get_repository, config): |
| 37 """ | 35 """ |
| 38 Args: | 36 Args: |
| 39 get_repository (callable): a function from DEP urls to ``Repository`` | 37 get_repository (callable): a function from DEP urls to ``Repository`` |
| 40 objects, so we can get changelogs and blame for each dep. Notably, | 38 objects, so we can get changelogs and blame for each dep. Notably, |
| 41 to keep the code here generic, we make no assumptions about | 39 to keep the code here generic, we make no assumptions about |
| 42 which subclass of ``Repository`` this function returns. Thus, | 40 which subclass of ``Repository`` this function returns. Thus, |
| 43 it is up to the caller to decide what class to return and handle | 41 it is up to the caller to decide what class to return and handle |
| 44 any other arguments that class may require (e.g., an http client | 42 any other arguments that class may require (e.g., an http client |
| 45 for ``GitilesRepository``). | 43 for ``GitilesRepository``). |
| 46 config (ndb.CrashConfig): Config for clients and project and component | 44 config (ndb.CrashConfig): Config for clients and project and component |
| 47 classifiers. | 45 classifiers. |
| 48 """ | 46 """ |
| 49 self._get_repository = get_repository | 47 self._get_repository = get_repository |
| 50 self._dep_fetcher = ChromeDependencyFetcher(get_repository) | |
| 51 | 48 |
| 52 # The top_n is the number of frames we want to check to get project | 49 # The top_n is the number of frames we want to check to get project |
| 53 # classifications. | 50 # classifications. |
| 54 projects = [Project(name, path_regexs, function_regexs, host_directories) | 51 projects = [Project(name, path_regexs, function_regexs, host_directories) |
| 55 for name, path_regexs, function_regexs, host_directories | 52 for name, path_regexs, function_regexs, host_directories |
| 56 in config.project_classifier['project_path_function_hosts']] | 53 in config.project_classifier['project_path_function_hosts']] |
| 57 self._project_classifier = ProjectClassifier( | 54 self._project_classifier = ProjectClassifier( |
| 58 projects, config.project_classifier['top_n'], | 55 projects, config.project_classifier['top_n'], |
| 59 config.project_classifier['non_chromium_project_rank_priority']) | 56 config.project_classifier['non_chromium_project_rank_priority']) |
| 60 | 57 |
| 58 # The top_n is the number of frames we want to check to get component | |
| 59 # classifications. | |
| 61 components = [Component(component_name, path_regex, function_regex) | 60 components = [Component(component_name, path_regex, function_regex) |
| 62 for path_regex, function_regex, component_name | 61 for path_regex, function_regex, component_name |
| 63 in config.component_classifier['path_function_component']] | 62 in config.component_classifier['path_function_component']] |
| 64 self._component_classifier = ComponentClassifier( | 63 self._component_classifier = ComponentClassifier( |
| 65 components, config.component_classifier['top_n']) | 64 components, config.component_classifier['top_n']) |
| 66 | 65 |
| 67 self._config = config | 66 self._config = config |
| 68 | 67 |
| 69 # TODO(http://crbug.com/659354): because self.client is volatile, | |
| 70 # we need some way of updating the Predator instance whenever the | |
| 71 # config changes. How to do that cleanly? | |
| 72 self._predator = None | |
| 73 self._stacktrace_parser = None | |
| 74 | |
| 75 # This is a class method because it should be the same for all | 68 # This is a class method because it should be the same for all |
| 76 # instances of this class. We can in fact call class methods on | 69 # instances of this class. We can in fact call class methods on |
| 77 # instances (not just on the class itself), so we could in principle | 70 # instances (not just on the class itself), so we could in principle |
| 78 # get by with just this method. However, a @classmethod is treated | 71 # get by with just this method. However, a @classmethod is treated |
| 79 # syntactically like a method, thus we'd need to have the ``()`` at the | 72 # syntactically like a method, thus we'd need to have the ``()`` at the |
| 80 # end, unlike for a @property. Thus we have both the class method and | 73 # end, unlike for a @property. Thus we have both the class method and |
| 81 # the property, in order to simulate a class property. | 74 # the property, in order to simulate a class property. |
| 82 @classmethod | 75 @classmethod |
| 83 def _ClientID(cls): # pragma: no cover | 76 def _ClientID(cls): # pragma: no cover |
| 84 """Get the client id for this class. | 77 """Get the client id for this class. |
| 85 | 78 |
| 86 This class method is private. Unless you really need to access | 79 This class method is private. Unless you really need to access |
| 87 this method directly for some reason, you should use the ``client_id`` | 80 this method directly for some reason, you should use the ``client_id`` |
| 88 property instead. | 81 property instead. |
| 89 | 82 |
| 90 Returns: | 83 Returns: |
| 91 A string which is member of the CrashClient enumeration. | 84 A string which is member of the CrashClient enumeration. |
| 92 """ | 85 """ |
| 93 if cls is Findit: | 86 if cls is Findit: |
| 94 logging.warning('Findit is abstract, ' | 87 logging.warning('Findit is abstract, ' |
| 95 'but someone constructed an instance and called _ClientID') | 88 'but someone constructed an instance and called _ClientID') |
| 96 else: | 89 else: |
| 97 logging.warning('Findit subclass %s forgot to implement _ClientID', | 90 logging.warning('Findit subclass %s forgot to implement _ClientID', |
| 98 cls.__name__) | 91 cls.__name__) |
| 99 raise NotImplementedError() | 92 raise NotImplementedError() |
| 100 | 93 |
| 94 def _Predator(self): | |
| 95 raise NotImplementedError() | |
| 96 | |
| 101 @property | 97 @property |
| 102 def client_id(self): | 98 def client_id(self): |
| 103 """Get the client id from the class of this object. | 99 """Get the client id from the class of this object. |
| 104 | 100 |
| 105 N.B., this property is static and should not be overridden.""" | 101 N.B., this property is static and should not be overridden.""" |
| 106 return self._ClientID() | 102 return self._ClientID() |
| 107 | 103 |
| 108 # TODO(http://crbug.com/659354): can we remove the dependency on | |
| 109 # CrashConfig entirely? It'd be better to receive method calls | |
| 110 # whenever things change, so that we know the change happened (and | |
| 111 # what in particular changed) so that we can update our internal state | |
| 112 # as appropriate. | |
| 113 @property | 104 @property |
| 114 def client_config(self): | 105 def client_config(self): |
| 115 """Get the current value of the client config for the class of this object. | 106 """Get the current value of the client config for the class of this object. |
| 116 | 107 |
| 117 N.B., this property is volatile and may change asynchronously. | 108 N.B., this property is volatile and may change asynchronously. |
| 118 | 109 |
| 119 If the event of an error this method will return ``None``. That we do | 110 If the event of an error this method will return ``None``. That we do |
| 120 not return the empty dict is intentional. All of the circumstances | 111 not return the empty dict is intentional. All of the circumstances |
| 121 which would lead to this method returning ``None`` indicate | 112 which would lead to this method returning ``None`` indicate |
| 122 underlying bugs in code elsewhere. (E.g., creating instances of | 113 underlying bugs in code elsewhere. (E.g., creating instances of |
| 123 abstract classes, implementing a subclass which is not suppported by | 114 abstract classes, implementing a subclass which is not suppported by |
| 124 ``CrashConfig``, etc.) Because we return ``None``, any subsequent | 115 ``CrashConfig``, etc.) Because we return ``None``, any subsequent |
| 125 calls to ``__getitem__`` or ``get`` will raise method-missing errors; | 116 calls to ``__getitem__`` or ``get`` will raise method-missing errors; |
| 126 which serve to highlight the underlying bug. Whereas, if we silently | 117 which serve to highlight the underlying bug. Whereas, if we silently |
| 127 returned the empty dict, then calls to ``get`` would "work" just | 118 returned the empty dict, then calls to ``get`` would "work" just |
| 128 fine; thereby masking the underlying bug! | 119 fine; thereby masking the underlying bug! |
| 129 """ | 120 """ |
| 130 return self._config.GetClientConfig(self.client_id) | 121 return self._config.GetClientConfig(self.client_id) |
| 131 | 122 |
| 132 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or | 123 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or |
| 133 # something like that. | 124 # something like that. |
| 134 def RenamePlatform(self, platform): | 125 def RenamePlatform(self, platform): |
| 135 """Remap the platform to a different one, based on the config.""" | 126 """Remap the platform to a different one, based on the config.""" |
| 136 # TODO(katesonia): Remove the default value after adding validity check to | 127 return self.client_config['platform_rename'].get(platform, platform) |
| 137 # config. | |
| 138 return self.client_config.get('platform_rename', {}).get(platform, platform) | |
| 139 | |
| 140 def CheckPolicy(self, crash_data): # pylint: disable=W0613 | |
| 141 """Check whether this client supports analyzing the given report. | |
| 142 | |
| 143 Some clients only support analysis for crashes on certain platforms | |
| 144 or channels, etc. This method checks to see whether this client can | |
| 145 analyze the given crash. The default implementation on the Findit | |
| 146 base class returns None for everything, so that unsupported clients | |
| 147 reject everything, as expected. | |
| 148 | |
| 149 Args: | |
| 150 crash_data (JSON): ?? | |
| 151 | |
| 152 Returns: | |
| 153 If satisfied, we return the ``crash_data`` which may have had some | |
| 154 fields modified. Otherwise returns None. | |
| 155 """ | |
| 156 return None | |
| 157 | 128 |
| 158 # TODO(http://crbug.com/644476): rename this to something like | 129 # TODO(http://crbug.com/644476): rename this to something like |
| 159 # _NewAnalysis, since it only does the "allocation" and needs to/will | 130 # _NewAnalysis, since it only does the "allocation" and needs to/will |
| 160 # be followed up with _InitializeAnalysis anyways. | 131 # be followed up with _InitializeAnalysis anyways. |
| 161 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613 | 132 def CreateAnalysis(self, crash_identifiers): # pragma: no cover |
| 162 return None | 133 raise NotImplementedError() |
| 163 | 134 |
| 164 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613 | 135 def GetAnalysis(self, crash_identifiers): # pragma: no cover |
| 165 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists. | 136 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists. |
| 166 | 137 |
| 167 Args: | 138 Args: |
| 168 crash_identifiers (JSON): ?? | 139 crash_identifiers (JSON): ?? |
| 169 | 140 |
| 170 Returns: | 141 Returns: |
| 171 If a CrashAnalysis ndb.Model already exists for the | 142 If a CrashAnalysis ndb.Model already exists for the |
| 172 ``crash_identifiers``, then we return it. Otherwise, returns None. | 143 ``crash_identifiers``, then we return it. Otherwise, returns None. |
| 173 """ | 144 """ |
| 174 return None | 145 raise NotImplementedError() |
| 175 | 146 |
| 176 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit. | 147 def GetCrashData(self, raw_crash_data): # pragma: no cover |
| 177 # TODO(http://crbug.com/659346): coverage tests for this class, not | 148 raise NotImplementedError() |
| 178 # just for FinditForFracas. | |
| 179 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover | |
| 180 """(Re)Initialize a CrashAnalysis ndb.Model, but do not ``put()`` it yet. | |
| 181 | 149 |
| 182 This method is only ever called from _NeedsNewAnalysis which is only | 150 def _CheckPolicy(self, crash_data): # pylint: disable=W0613 |
| 183 ever called from ScheduleNewAnalysis. It is used for filling in the | 151 """Check whether this client supports analyzing the given report. |
| 184 fields of a CrashAnalysis ndb.Model for the first time (though it | |
| 185 can also be used to re-initialize a given CrashAnalysis). Subclasses | |
| 186 should extend (not override) this to (re)initialize any | |
| 187 client-specific fields they may have.""" | |
| 188 # Get rid of any previous values there may have been. | |
| 189 model.Reset() | |
| 190 | 152 |
| 191 # Set the version. | 153 Some clients only support analysis for crashes on certain platforms |
| 192 # ``handlers.crash.test.crash_handler_test.testAnalysisScheduled`` | 154 or channels, etc. This method checks to see whether this client can |
| 193 # provides and expects this field to be called 'chrome_version', | 155 analyze the given crash. The default implementation on the Findit |
| 194 # whereas everyone else (e.g., in ``crash.test.crash_pipeline_test`` | 156 base class returns None for everything, so that unsupported clients |
| 195 # the tests ``testAnalysisNeededIfNoAnalysisYet``, | 157 reject everything, as expected. |
| 196 # ``testRunningAnalysisNoSuspectsFound``, ``testRunningAnalysis``, | |
| 197 # ``testAnalysisNeededIfLastOneFailed``, | |
| 198 # ``testRunningAnalysisWithSuspectsCls``) expects it to be called | |
| 199 # 'crashed_version'. The latter is the better/more general name, | |
| 200 # so the former needs to be changed in order to get rid of this | |
| 201 # defaulting ugliness. | |
| 202 model.crashed_version = crash_data.get('crashed_version', | |
| 203 crash_data.get('chrome_version', None)) | |
| 204 | 158 |
| 205 # Set (other) common properties. | 159 Args: |
| 206 model.stack_trace = crash_data['stack_trace'] | 160 raw_crash_data (JSON): ?? |
| 207 model.signature = crash_data['signature'] | |
| 208 model.platform = crash_data['platform'] | |
| 209 # TODO(wrengr): The only reason to have _InitializeAnalysis as a | |
| 210 # method of the Findit class rather than as a method on CrashAnalysis | |
| 211 # is so we can assert that crash_data['client_id'] == self.client_id. | |
| 212 # So, either we should do that, or else we should move this to be | |
| 213 # a method on CrashAnalysis. | |
| 214 model.client_id = self.client_id | |
| 215 model.regression_range = crash_data.get('regression_range', None) | |
| 216 | 161 |
| 217 # Set progress properties. | 162 Returns: |
| 218 model.status = analysis_status.PENDING | 163 Boolean to indicate whether the crash passed the policy check. |
| 219 model.requested_time = time_util.GetUTCNow() | 164 """ |
| 165 if not self.client_config: | |
| 166 logging.info('No configuration of client %s, analysis is not supported', | |
| 167 self.client_id) | |
| 168 return True | |
| 169 | |
| 170 for blacklist_marker in self.client_config['signature_blacklist_markers']: | |
| 171 if blacklist_marker in crash_data.signature: | |
| 172 logging.info('%s signature is not supported.', blacklist_marker) | |
| 173 return False | |
| 174 | |
| 175 return True | |
| 220 | 176 |
| 221 @ndb.transactional | 177 @ndb.transactional |
| 222 def _NeedsNewAnalysis(self, crash_data): | 178 def NeedsNewAnalysis(self, crash_data): |
| 223 raise NotImplementedError() | 179 """Checks if the crash needs new anlysis. |
| 224 | 180 |
| 225 # TODO(wrengr): does the parser actually need the version, signature, | 181 If a new analysis needs to be scheduled, initialize a ``CrashAnalysis`` |
| 226 # and platform? If not, then we should be able to just pass the string | 182 model using crash_data and put it in datastore for later culprit |
| 227 # to be parsed (which would make a lot more sense than passing the | 183 analyzing. |
| 228 # whole model). | |
| 229 # TODO(http://crbug.com/659346): coverage tests for this class, not | |
| 230 # just for FinditForFracas. | |
| 231 def ParseStacktrace(self, model): # pragma: no cover | |
| 232 """Parse a CrashAnalysis's ``stack_trace`` string into a Stacktrace object. | |
| 233 | 184 |
| 234 Args: | 185 Args: |
| 235 model (CrashAnalysis): The model containing the stack_trace string | 186 crash_data (CrashData): Crash buffer that contains all the information |
| 236 to be parsed. | 187 about the crash. |
| 237 | 188 |
| 238 Returns: | 189 Returns: |
| 239 On success, returns a Stacktrace object; on failure, returns None. | 190 Boolean of whether a new analysis needs to be scheduled. |
| 240 """ | 191 """ |
| 241 # Use up-to-date ``top_n`` in self.client_config to filter top n frames. | 192 # Check policy and modify the raw_crash_data as needed. |
| 242 stacktrace = self._stacktrace_parser.Parse( | 193 if not self._CheckPolicy(crash_data): |
| 243 model.stack_trace, | 194 logging.info('The analysis of %s is not supported, task will not be ' |
| 244 self._dep_fetcher.GetDependency(model.crashed_version, model.platform), | 195 'scheduled.', str(crash_data.identifiers)) |
| 245 model.signature, | 196 return False |
| 246 self.client_config.get('top_n')) | |
| 247 if not stacktrace: | |
| 248 logging.warning('Failed to parse the stacktrace %s', model.stack_trace) | |
| 249 return None | |
| 250 | 197 |
| 251 return stacktrace | 198 # Rename platform if configured. |
| 199 crash_data.platform = self.RenamePlatform(crash_data.platform) | |
| 252 | 200 |
| 253 def ProcessResultForPublishing(self, result, key): | 201 model = self.GetAnalysis(crash_data.identifiers) |
| 202 # N.B., for mocking reasons, we must not call DetectRegressionRange | |
| 203 # directly, but rather must access it indirectly through the module. | |
|
wrengr (wrong one)
2017/02/11 00:11:34
This comment is no longer relevant, since we don't
Sharu Jiang
2017/02/11 00:41:20
Done.
| |
| 204 if (model and not model.failed and | |
| 205 crash_data.regression_range == (tuple(model.regression_range) | |
| 206 if model.regression_range else | |
| 207 model.regression_range)): | |
|
wrengr (wrong one)
2017/02/11 00:11:34
this seems a bit strange/convoluted. Why not do so
Sharu Jiang
2017/02/11 00:41:20
If a crash doesn't have regression range, it shoul
wrengr (wrong one)
2017/02/11 01:14:54
ah.
Maybe make the else-branch explicitly return
Sharu Jiang
2017/02/13 18:40:05
Done.
| |
| 208 logging.info('The analysis of %s has already been done.', | |
| 209 repr(crash_data.identifiers)) | |
| 210 return False | |
| 211 | |
| 212 model = model or self.CreateAnalysis(crash_data.identifiers) | |
| 213 model.Initialize(crash_data) | |
| 214 model.put() | |
| 215 return True | |
| 216 | |
| 217 def ProcessResultForPublishing(self, result, key): # pragma: no cover | |
| 254 """Client specific processing of result data for publishing.""" | 218 """Client specific processing of result data for publishing.""" |
| 255 raise NotImplementedError() | 219 raise NotImplementedError() |
| 256 | 220 |
| 257 def GetPublishableResult(self, crash_identifiers, analysis): | 221 def GetPublishableResult(self, crash_identifiers, analysis): |
| 258 """Convert a culprit result into a publishable result for client. | 222 """Converts a culprit result into a publishable result for client. |
| 259 | 223 |
| 260 Note, this function must be called by a concrete subclass of CrashAnalysis | 224 Note, this function must be called by a concrete subclass of CrashAnalysis |
| 261 which implements the ProcessResultForPublishing method. | 225 which implements the ProcessResultForPublishing method. |
| 262 | 226 |
| 263 Args: | 227 Args: |
| 264 crash_identifiers (dict): Dict containing identifiers that can uniquely | 228 crash_identifiers (dict): Dict containing identifiers that can uniquely |
| 265 identify CrashAnalysis entity. | 229 identify CrashAnalysis entity. |
| 266 analysis (CrashAnalysis model): Model containing culprit result and other | 230 analysis (CrashAnalysis model): Model containing culprit result and other |
| 267 analysis information. | 231 analysis information. |
| 268 | 232 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 284 'client_id': self.client_id, | 248 'client_id': self.client_id, |
| 285 'result': result, | 249 'result': result, |
| 286 } | 250 } |
| 287 | 251 |
| 288 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``; | 252 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``; |
| 289 # we should be able to adjust things so that we only need to take in | 253 # we should be able to adjust things so that we only need to take in |
| 290 # ``crash_identifiers``, or a CrashReport, rather than taking in the | 254 # ``crash_identifiers``, or a CrashReport, rather than taking in the |
| 291 # whole model. And/or, we should just inline this there. | 255 # whole model. And/or, we should just inline this there. |
| 292 # TODO(http://crbug.com/659346): coverage tests for this class, not | 256 # TODO(http://crbug.com/659346): coverage tests for this class, not |
| 293 # just for FinditForFracas. | 257 # just for FinditForFracas. |
| 294 def FindCulprit(self, model): # pragma: no cover | 258 def FindCulprit(self, crash_report): # pragma: no cover |
| 295 """Given a CrashAnalysis ndb.Model, return a Culprit.""" | 259 """Given a CrashAnalysis ndb.Model, returns a Culprit.""" |
| 296 stacktrace = self.ParseStacktrace(model) | 260 return self._Predator().FindCulprit(crash_report) |
| 297 if stacktrace is None: | |
| 298 return None | |
| 299 | |
| 300 return self._predator.FindCulprit(CrashReport( | |
| 301 crashed_version = model.crashed_version, | |
| 302 signature = model.signature, | |
| 303 platform = model.platform, | |
| 304 stacktrace = stacktrace, | |
| 305 regression_range = model.regression_range)) | |
| OLD | NEW |