Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import copy | 5 import copy |
| 6 import json | 6 import json |
| 7 import logging | 7 import logging |
| 8 | 8 |
| 9 from google.appengine.ext import ndb | 9 from google.appengine.ext import ndb |
| 10 | 10 |
| 11 from common import appengine_util | 11 from common import appengine_util |
| 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
| 13 from common import constants | 13 from common import constants |
| 14 from crash.crash_report import CrashReport | 14 from crash.component import Component |
| 15 from crash.component_classifier import ComponentClassifier | |
| 16 from crash.project import Project | |
| 17 from crash.project_classifier import ProjectClassifier | |
| 15 from libs import time_util | 18 from libs import time_util |
| 16 from model import analysis_status | 19 from model import analysis_status |
| 17 from model.crash.crash_config import CrashConfig | |
| 18 | 20 |
| 19 | 21 |
| 20 # TODO(http://crbug.com/659346): since most of our unit tests are | 22 # TODO(http://crbug.com/659346): since most of our unit tests are |
| 21 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. | 23 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. |
| 22 # However, now we're missing coverage for most of this file (due to the | 24 # However, now we're missing coverage for most of this file (due to the |
| 23 # buggy way coverage is computed). Need to add a bunch of new unittests | 25 # buggy way coverage is computed). Need to add a bunch of new unittests |
| 24 # to get coverage back up. | 26 # to get coverage back up. |
| 25 | 27 |
| 26 # TODO: this class depends on ndb stuff, and should therefore move to | 28 # TODO: this class depends on ndb stuff, and should therefore move to |
| 27 # cr-culprit-finder/service/predator as part of the big reorganization. | 29 # cr-culprit-finder/service/predator as part of the big reorganization. |
| 28 # This class should be renamed to avoid confustion between Findit and Predator. | 30 # This class should be renamed to avoid confustion between Findit and Predator. |
| 29 # Think of a good name (e.g.'PredatorApp') for this class. | 31 # Think of a good name (e.g.'PredatorApp') for this class. |
| 30 class Findit(object): | 32 class Findit(object): |
| 31 | 33 |
| 32 def __init__(self, get_repository): | 34 def __init__(self, get_repository, config): |
| 33 """ | 35 """ |
| 34 Args: | 36 Args: |
| 35 get_repository (callable): a function from DEP urls to ``Repository`` | 37 get_repository (callable): a function from DEP urls to ``Repository`` |
| 36 objects, so we can get changelogs and blame for each dep. Notably, | 38 objects, so we can get changelogs and blame for each dep. Notably, |
| 37 to keep the code here generic, we make no assumptions about | 39 to keep the code here generic, we make no assumptions about |
| 38 which subclass of ``Repository`` this function returns. Thus, | 40 which subclass of ``Repository`` this function returns. Thus, |
| 39 it is up to the caller to decide what class to return and handle | 41 it is up to the caller to decide what class to return and handle |
| 40 any other arguments that class may require (e.g., an http client | 42 any other arguments that class may require (e.g., an http client |
| 41 for ``GitilesRepository``). | 43 for ``GitilesRepository``). |
| 44 config (ndb.CrashConfig): Config for clients and project and component | |
| 45 classifiers. | |
| 42 """ | 46 """ |
| 43 self._get_repository = get_repository | 47 self._get_repository = get_repository |
| 44 self._dep_fetcher = ChromeDependencyFetcher(get_repository) | 48 |
| 45 # TODO(http://crbug.com/659354): because self.client is volatile, | 49 # The top_n is the number of frames we want to check to get project |
| 46 # we need some way of updating the Predator instance whenever the | 50 # classifications. |
| 47 # config changes. How to do that cleanly? | 51 projects = [Project(name, path_regexs, function_regexs, host_directories) |
| 48 self._predator = None | 52 for name, path_regexs, function_regexs, host_directories |
| 49 self._stacktrace_parser = None | 53 in config.project_classifier['project_path_function_hosts']] |
| 54 self._project_classifier = ProjectClassifier( | |
| 55 projects, config.project_classifier['top_n'], | |
| 56 config.project_classifier['non_chromium_project_rank_priority']) | |
|
wrengr
2017/02/03 18:34:47
newline between 56 and 57?
Sharu Jiang
2017/02/03 23:13:38
Done.
| |
| 57 # The top_n is the number of frames we want to check to get component | |
| 58 # classifications. | |
| 59 components = [Component(component_name, path_regex, function_regex) | |
| 60 for path_regex, function_regex, component_name | |
| 61 in config.component_classifier['path_function_component']] | |
| 62 self._component_classifier = ComponentClassifier( | |
| 63 components, config.component_classifier['top_n']) | |
| 64 | |
| 65 # Get client-specific config. | |
| 66 self._client_config = config.GetClientConfig(self.client_id) | |
| 50 | 67 |
| 51 # This is a class method because it should be the same for all | 68 # This is a class method because it should be the same for all |
| 52 # instances of this class. We can in fact call class methods on | 69 # instances of this class. We can in fact call class methods on |
| 53 # instances (not just on the class itself), so we could in principle | 70 # instances (not just on the class itself), so we could in principle |
| 54 # get by with just this method. However, a @classmethod is treated | 71 # get by with just this method. However, a @classmethod is treated |
| 55 # syntactically like a method, thus we'd need to have the ``()`` at the | 72 # syntactically like a method, thus we'd need to have the ``()`` at the |
| 56 # end, unlike for a @property. Thus we have both the class method and | 73 # end, unlike for a @property. Thus we have both the class method and |
| 57 # the property, in order to simulate a class property. | 74 # the property, in order to simulate a class property. |
| 58 @classmethod | 75 @classmethod |
| 59 def _ClientID(cls): # pragma: no cover | 76 def _ClientID(cls): # pragma: no cover |
| 60 """Get the client id for this class. | 77 """Get the client id for this class. |
| 61 | 78 |
| 62 This class method is private. Unless you really need to access | 79 This class method is private. Unless you really need to access |
| 63 this method directly for some reason, you should use the ``client_id`` | 80 this method directly for some reason, you should use the ``client_id`` |
| 64 property instead. | 81 property instead. |
| 65 | 82 |
| 66 Returns: | 83 Returns: |
| 67 A string which is member of the CrashClient enumeration. | 84 A string which is member of the CrashClient enumeration. |
| 68 """ | 85 """ |
| 69 if cls is Findit: | 86 if cls is Findit: |
| 70 logging.warning('Findit is abstract, ' | 87 logging.warning('Findit is abstract, ' |
| 71 'but someone constructed an instance and called _ClientID') | 88 'but someone constructed an instance and called _ClientID') |
| 72 else: | 89 else: |
| 73 logging.warning('Findit subclass %s forgot to implement _ClientID', | 90 logging.warning('Findit subclass %s forgot to implement _ClientID', |
| 74 cls.__name__) | 91 cls.__name__) |
| 75 raise NotImplementedError() | 92 raise NotImplementedError() |
| 76 | 93 |
| 94 def _Predator(self): | |
| 95 raise NotImplementedError() | |
|
wrengr
2017/02/03 18:34:46
What's this for?
Sharu Jiang
2017/02/03 23:13:37
Since the ``FindCulprit`` will use self._predator,
| |
| 96 | |
| 77 @property | 97 @property |
| 78 def client_id(self): | 98 def client_id(self): |
| 79 """Get the client id from the class of this object. | 99 """Get the client id from the class of this object. |
| 80 | 100 |
| 81 N.B., this property is static and should not be overridden.""" | 101 N.B., this property is static and should not be overridden.""" |
| 82 return self._ClientID() | 102 return self._ClientID() |
| 83 | 103 |
| 84 # TODO(http://crbug.com/659354): can we remove the dependency on | |
| 85 # CrashConfig entirely? It'd be better to receive method calls | |
| 86 # whenever things change, so that we know the change happened (and | |
| 87 # what in particular changed) so that we can update our internal state | |
| 88 # as appropriate. | |
| 89 @property | 104 @property |
| 90 def config(self): | 105 def client_config(self): |
| 91 """Get the current value of the client config for the class of this object. | 106 """Get the current value of the client config for the class of this object. |
| 92 | 107 |
| 93 N.B., this property is volatile and may change asynchronously. | 108 N.B., this property is volatile and may change asynchronously. |
| 94 | 109 |
| 95 If the event of an error this method will return ``None``. That we do | 110 If the event of an error this method will return ``None``. That we do |
| 96 not return the empty dict is intentional. All of the circumstances | 111 not return the empty dict is intentional. All of the circumstances |
| 97 which would lead to this method returning ``None`` indicate | 112 which would lead to this method returning ``None`` indicate |
| 98 underlying bugs in code elsewhere. (E.g., creating instances of | 113 underlying bugs in code elsewhere. (E.g., creating instances of |
| 99 abstract classes, implementing a subclass which is not suppported by | 114 abstract classes, implementing a subclass which is not suppported by |
| 100 ``CrashConfig``, etc.) Because we return ``None``, any subsequent | 115 ``CrashConfig``, etc.) Because we return ``None``, any subsequent |
| 101 calls to ``__getitem__`` or ``get`` will raise method-missing errors; | 116 calls to ``__getitem__`` or ``get`` will raise method-missing errors; |
| 102 which serve to highlight the underlying bug. Whereas, if we silently | 117 which serve to highlight the underlying bug. Whereas, if we silently |
| 103 returned the empty dict, then calls to ``get`` would "work" just | 118 returned the empty dict, then calls to ``get`` would "work" just |
| 104 fine; thereby masking the underlying bug! | 119 fine; thereby masking the underlying bug! |
| 105 """ | 120 """ |
| 106 return CrashConfig.Get().GetClientConfig(self.client_id) | 121 return self._client_config |
| 107 | 122 |
| 108 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or | 123 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or |
| 109 # something like that. | 124 # something like that. |
| 110 def RenamePlatform(self, platform): | 125 def RenamePlatform(self, platform): |
| 111 """Remap the platform to a different one, based on the config.""" | 126 """Remap the platform to a different one, based on the config.""" |
| 112 # TODO(katesonia): Remove the default value after adding validity check to | 127 return self.client_config['platform_rename'].get(platform, platform) |
|
wrengr
2017/02/03 18:34:46
would be nice if ``platform_rename`` were a proper
Sharu Jiang
2017/02/03 23:13:38
Now the client config are all dicts, later we may
| |
| 113 # config. | |
| 114 return self.config.get('platform_rename', {}).get(platform, platform) | |
| 115 | 128 |
| 116 def CheckPolicy(self, crash_data): # pylint: disable=W0613 | 129 # TODO(http://crbug.com/644476): rename this to something like |
| 130 # _NewAnalysis, since it only does the "allocation" and needs to/will | |
| 131 # be followed up with _InitializeAnalysis anyways. | |
| 132 def CreateAnalysis(self, crash_identifiers): # pragma: no cover | |
| 133 raise NotImplementedError() | |
| 134 | |
| 135 def GetAnalysis(self, crash_identifiers): # pragma: no cover | |
| 136 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists. | |
| 137 | |
| 138 Args: | |
| 139 crash_identifiers (JSON): ?? | |
| 140 | |
| 141 Returns: | |
| 142 If a CrashAnalysis ndb.Model already exists for the | |
| 143 ``crash_identifiers``, then we return it. Otherwise, returns None. | |
| 144 """ | |
| 145 raise NotImplementedError() | |
| 146 | |
| 147 def GetCrashBuffer(self, crash_data): # pragma: no cover | |
| 148 raise NotImplementedError() | |
| 149 | |
| 150 def _CheckPolicy(self, crash_buffer): # pylint: disable=W0613 | |
| 117 """Check whether this client supports analyzing the given report. | 151 """Check whether this client supports analyzing the given report. |
| 118 | 152 |
| 119 Some clients only support analysis for crashes on certain platforms | 153 Some clients only support analysis for crashes on certain platforms |
| 120 or channels, etc. This method checks to see whether this client can | 154 or channels, etc. This method checks to see whether this client can |
| 121 analyze the given crash. The default implementation on the Findit | 155 analyze the given crash. The default implementation on the Findit |
| 122 base class returns None for everything, so that unsupported clients | 156 base class returns None for everything, so that unsupported clients |
| 123 reject everything, as expected. | 157 reject everything, as expected. |
| 124 | 158 |
| 125 Args: | 159 Args: |
| 126 crash_data (JSON): ?? | 160 crash_data (JSON): ?? |
| 127 | 161 |
| 128 Returns: | 162 Returns: |
| 129 If satisfied, we return the ``crash_data`` which may have had some | 163 If satisfied, we return the ``crash_data`` which may have had some |
| 130 fields modified. Otherwise returns None. | 164 fields modified. Otherwise returns None. |
| 131 """ | 165 """ |
| 132 return None | 166 if not self.client_config: |
| 167 logging.info('No configuration of client %s, analysis is not supported', | |
| 168 self.client_id) | |
| 169 return False | |
| 133 | 170 |
| 134 # TODO(http://crbug.com/644476): rename this to something like | 171 for blacklist_marker in self.client_config['signature_blacklist_markers']: |
| 135 # _NewAnalysis, since it only does the "allocation" and needs to/will | 172 if blacklist_marker in crash_buffer.signature: |
| 136 # be followed up with _InitializeAnalysis anyways. | 173 logging.info('%s signature is not supported.', blacklist_marker) |
| 137 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613 | 174 return False |
| 138 return None | |
| 139 | 175 |
| 140 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613 | 176 return True |
| 141 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists. | 177 |
| 178 @ndb.transactional | |
| 179 def NeedsNewAnalysis(self, crash_buffer): | |
| 180 """Checks if the crash needs new anlysis. | |
| 181 | |
| 182 If a new analysis needs to be scheduled, initialize a ``CrashAnalysis`` | |
| 183 model using crash_buffer and put it in datastore for later culprit | |
| 184 analyzing. | |
| 142 | 185 |
| 143 Args: | 186 Args: |
| 144 crash_identifiers (JSON): ?? | 187 crash_buffer (CrashBuffer): Crash buffer that contains all the information |
| 188 about the crash. | |
| 145 | 189 |
| 146 Returns: | 190 Returns: |
| 147 If a CrashAnalysis ndb.Model already exists for the | 191 Boolean of whether a new analysis needs to be scheduled. |
| 148 ``crash_identifiers``, then we return it. Otherwise, returns None. | |
| 149 """ | 192 """ |
| 150 return None | 193 # Check policy and modify the crash_data as needed. |
| 194 if not self._CheckPolicy(crash_buffer): | |
| 195 logging.info('The analysis of %s is not supported, task will not be ' | |
| 196 'scheduled.', str(crash_buffer.identifiers)) | |
| 197 return False | |
| 151 | 198 |
| 152 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit. | 199 # Rename platform if configured. |
| 153 # TODO(http://crbug.com/659346): coverage tests for this class, not | 200 crash_buffer.platform = self.RenamePlatform(crash_buffer.platform) |
| 154 # just for FinditForFracas. | |
| 155 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover | |
| 156 """(Re)Initialize a CrashAnalysis ndb.Model, but do not ``put()`` it yet. | |
| 157 | 201 |
| 158 This method is only ever called from _NeedsNewAnalysis which is only | 202 model = self.GetAnalysis(crash_buffer.identifiers) |
| 159 ever called from ScheduleNewAnalysis. It is used for filling in the | 203 # N.B., for mocking reasons, we must not call DetectRegressionRange |
| 160 fields of a CrashAnalysis ndb.Model for the first time (though it | 204 # directly, but rather must access it indirectly through the module. |
| 161 can also be used to re-initialize a given CrashAnalysis). Subclasses | 205 if (model and not model.failed and |
| 162 should extend (not override) this to (re)initialize any | 206 crash_buffer.regression_range == (tuple(model.regression_range) |
| 163 client-specific fields they may have.""" | 207 if model.regression_range else |
| 164 # Get rid of any previous values there may have been. | 208 model.regression_range)): |
| 165 model.Reset() | 209 logging.info('The analysis of %s has already been done.', |
| 210 repr(crash_buffer.identifiers)) | |
| 211 return False | |
| 166 | 212 |
| 167 # Set the version. | 213 model = model or self.CreateAnalysis(crash_buffer.identifiers) |
| 168 # ``handlers.crash.test.crash_handler_test.testAnalysisScheduled`` | 214 model.Initialize(crash_buffer) |
| 169 # provides and expects this field to be called 'chrome_version', | 215 model.put() |
| 170 # whereas everyone else (e.g., in ``crash.test.crash_pipeline_test`` | 216 return True |
| 171 # the tests ``testAnalysisNeededIfNoAnalysisYet``, | |
| 172 # ``testRunningAnalysisNoSuspectsFound``, ``testRunningAnalysis``, | |
| 173 # ``testAnalysisNeededIfLastOneFailed``, | |
| 174 # ``testRunningAnalysisWithSuspectsCls``) expects it to be called | |
| 175 # 'crashed_version'. The latter is the better/more general name, | |
| 176 # so the former needs to be changed in order to get rid of this | |
| 177 # defaulting ugliness. | |
| 178 model.crashed_version = crash_data.get('crashed_version', | |
| 179 crash_data.get('chrome_version', None)) | |
| 180 | 217 |
| 181 # Set (other) common properties. | 218 def ProcessResultForPublishing(self, result, key): # pragma: no cover |
| 182 model.stack_trace = crash_data['stack_trace'] | |
| 183 model.signature = crash_data['signature'] | |
| 184 model.platform = crash_data['platform'] | |
| 185 # TODO(wrengr): The only reason to have _InitializeAnalysis as a | |
| 186 # method of the Findit class rather than as a method on CrashAnalysis | |
| 187 # is so we can assert that crash_data['client_id'] == self.client_id. | |
| 188 # So, either we should do that, or else we should move this to be | |
| 189 # a method on CrashAnalysis. | |
| 190 model.client_id = self.client_id | |
| 191 model.regression_range = crash_data.get('regression_range', None) | |
| 192 | |
| 193 # Set progress properties. | |
| 194 model.status = analysis_status.PENDING | |
| 195 model.requested_time = time_util.GetUTCNow() | |
| 196 | |
| 197 @ndb.transactional | |
| 198 def _NeedsNewAnalysis(self, crash_data): | |
| 199 raise NotImplementedError() | |
| 200 | |
| 201 # TODO(wrengr): does the parser actually need the version, signature, | |
| 202 # and platform? If not, then we should be able to just pass the string | |
| 203 # to be parsed (which would make a lot more sense than passing the | |
| 204 # whole model). | |
| 205 # TODO(http://crbug.com/659346): coverage tests for this class, not | |
| 206 # just for FinditForFracas. | |
| 207 def ParseStacktrace(self, model): # pragma: no cover | |
| 208 """Parse a CrashAnalysis's ``stack_trace`` string into a Stacktrace object. | |
| 209 | |
| 210 Args: | |
| 211 model (CrashAnalysis): The model containing the stack_trace string | |
| 212 to be parsed. | |
| 213 | |
| 214 Returns: | |
| 215 On success, returns a Stacktrace object; on failure, returns None. | |
| 216 """ | |
| 217 # Use up-to-date ``top_n`` in self.config to filter top n frames. | |
| 218 stacktrace = self._stacktrace_parser.Parse( | |
| 219 model.stack_trace, | |
| 220 self._dep_fetcher.GetDependency(model.crashed_version, model.platform), | |
| 221 model.signature, | |
| 222 self.config.get('top_n')) | |
| 223 if not stacktrace: | |
| 224 logging.warning('Failed to parse the stacktrace %s', model.stack_trace) | |
| 225 return None | |
| 226 | |
| 227 return stacktrace | |
| 228 | |
| 229 def ProcessResultForPublishing(self, result, key): | |
| 230 """Client specific processing of result data for publishing.""" | 219 """Client specific processing of result data for publishing.""" |
| 231 raise NotImplementedError() | 220 raise NotImplementedError() |
| 232 | 221 |
| 233 def GetPublishableResult(self, crash_identifiers, analysis): | 222 def GetPublishableResult(self, crash_identifiers, analysis): |
| 234 """Convert a culprit result into a publishable result for client. | 223 """Converts a culprit result into a publishable result for client. |
| 235 | 224 |
| 236 Note, this function must be called by a concrete subclass of CrashAnalysis | 225 Note, this function must be called by a concrete subclass of CrashAnalysis |
| 237 which implements the ProcessResultForPublishing method. | 226 which implements the ProcessResultForPublishing method. |
| 238 | 227 |
| 239 Args: | 228 Args: |
| 240 crash_identifiers (dict): Dict containing identifiers that can uniquely | 229 crash_identifiers (dict): Dict containing identifiers that can uniquely |
| 241 identify CrashAnalysis entity. | 230 identify CrashAnalysis entity. |
| 242 analysis (CrashAnalysis model): Model containing culprit result and other | 231 analysis (CrashAnalysis model): Model containing culprit result and other |
| 243 analysis information. | 232 analysis information. |
| 244 | 233 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 260 'client_id': self.client_id, | 249 'client_id': self.client_id, |
| 261 'result': result, | 250 'result': result, |
| 262 } | 251 } |
| 263 | 252 |
| 264 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``; | 253 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``; |
| 265 # we should be able to adjust things so that we only need to take in | 254 # we should be able to adjust things so that we only need to take in |
| 266 # ``crash_identifiers``, or a CrashReport, rather than taking in the | 255 # ``crash_identifiers``, or a CrashReport, rather than taking in the |
| 267 # whole model. And/or, we should just inline this there. | 256 # whole model. And/or, we should just inline this there. |
| 268 # TODO(http://crbug.com/659346): coverage tests for this class, not | 257 # TODO(http://crbug.com/659346): coverage tests for this class, not |
| 269 # just for FinditForFracas. | 258 # just for FinditForFracas. |
| 270 def FindCulprit(self, model): # pragma: no cover | 259 def FindCulprit(self, crash_report): # pragma: no cover |
| 271 """Given a CrashAnalysis ndb.Model, return a Culprit.""" | 260 """Given a CrashAnalysis ndb.Model, returns a Culprit.""" |
| 272 stacktrace = self.ParseStacktrace(model) | 261 return self._Predator().FindCulprit(crash_report) |
| 273 if stacktrace is None: | |
| 274 return None | |
| 275 | |
| 276 return self._predator.FindCulprit(CrashReport( | |
| 277 crashed_version = model.crashed_version, | |
| 278 signature = model.signature, | |
| 279 platform = model.platform, | |
| 280 stacktrace = stacktrace, | |
| 281 regression_range = model.regression_range)) | |
| OLD | NEW |