| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import copy |
| 6 import logging |
| 7 |
| 8 from google.appengine.ext import ndb |
| 9 |
| 10 from common import appengine_util |
| 11 from common import chrome_dependency_fetcher |
| 12 from common import constants |
| 13 from common import time_util |
| 14 from crash.crash_report import CrashReport |
| 15 from crash.culprit import NullCulprit |
| 16 from model import analysis_status |
| 17 from model.crash.crash_config import CrashConfig |
| 18 from model.crash.cracas_crash_analysis import CracasCrashAnalysis |
| 19 from model.crash.fracas_crash_analysis import FracasCrashAnalysis |
| 20 |
| 21 # TODO(http://crbug.com/659346): since most of our unit tests are |
| 22 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py. |
| 23 # However, now we're missing coverage for most of this file (due to the |
| 24 # buggy way coverage is computed). Need to add a bunch of new unittests |
| 25 # to get coverage back up. |
| 26 |
| 27 # TODO: this class depends on ndb stuff, and should therefore move to |
| 28 # cr-culprit-finder/service/predator as part of the big reorganization. |
| 29 # Also, this class should be renamed to "PredatorApp" (alas, "Azalea" |
| 30 # was renamed to "Predator"). |
| 31 class Findit(object): |
| 32 def __init__(self, repository, pipeline_cls): |
| 33 """ |
| 34 Args: |
| 35 repository (Repository): the Git repository for getting CLs to classify. |
| 36 pipeline_cls (class): the class for constructing pipelines in |
| 37 ScheduleNewAnalysis. This will almost surely be |
| 38 |crash.crash_pipeline.CrashWrapperPipeline|; but we must pass |
| 39 the class in as a parameter in order to break an import cycle. |
| 40 """ |
| 41 self._repository = repository |
| 42 # TODO(http://crbug.com/659354): because self.client is volatile, |
| 43 # we need some way of updating the Azelea instance whenever the |
| 44 # config changes. How to do that cleanly? |
| 45 self._azalea = None |
| 46 self._stacktrace_parser = None |
| 47 self._pipeline_cls = pipeline_cls |
| 48 |
| 49 # This is a class method because it should be the same for all |
| 50 # instances of this class. We can in fact call class methods on |
| 51 # instances (not just on the class itself), so we could in principle |
| 52 # get by with just this method. However, a @classmethod is treated |
| 53 # syntactically like a method, thus we'd need to have the |()| at the |
| 54 # end, unlike for a @property. Thus we have both the class method and |
| 55 # the property, in order to simulate a class property. |
| 56 @classmethod |
| 57 def _ClientID(cls): # pragma: no cover |
| 58 """Get the client id for this class. |
| 59 |
| 60 This class method is private. Unless you really need to access |
| 61 this method directly for some reason, you should use the |client_id| |
| 62 property instead. |
| 63 |
| 64 Returns: |
| 65 A string which is member of the CrashClient enumeration. |
| 66 """ |
| 67 if cls is Findit: |
| 68 logging.warning('Findit is abstract, ' |
| 69 'but someone constructed an instance and called _ClientID') |
| 70 else: |
| 71 logging.warning('Findit subclass %s forgot to implement _ClientID', |
| 72 cls.__name__) |
| 73 raise NotImplementedError() |
| 74 |
| 75 @property |
| 76 def client_id(self): |
| 77 """Get the client id from the class of this object. |
| 78 |
| 79 N.B., this property is static and should not be overridden.""" |
| 80 return self._ClientID() |
| 81 |
| 82 # TODO(http://crbug.com/659354): can we remove the dependency on |
| 83 # CrashConfig entirely? It'd be better to receive method calls |
| 84 # whenever things change, so that we know the change happened (and |
| 85 # what in particular changed) so that we can update our internal state |
| 86 # as appropriate. |
| 87 @property |
| 88 def config(self): |
| 89 """Get the current value of the client config for the class of this object. |
| 90 |
| 91 N.B., this property is volatile and may change asynchronously. |
| 92 |
| 93 If the event of an error this method will return ``None``. That we do |
| 94 not return the empty dict is intentional. All of the circumstances |
| 95 which would lead to this method returning ``None`` indicate |
| 96 underlying bugs in code elsewhere. (E.g., creating instances of |
| 97 abstract classes, implementing a subclass which is not suppported by |
| 98 ``CrashConfig``, etc.) Because we return ``None``, any subsequent |
| 99 calls to ``__getitem__`` or ``get`` will raise method-missing errors; |
| 100 which serve to highlight the underlying bug. Whereas, if we silently |
| 101 returned the empty dict, then calls to ``get`` would "work" just |
| 102 fine; thereby masking the underlying bug! |
| 103 """ |
| 104 return CrashConfig.Get().GetClientConfig(self.client_id) |
| 105 |
| 106 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or |
| 107 # something like that. |
| 108 def RenamePlatform(self, platform): |
| 109 """Remap the platform to a different one, based on the config.""" |
| 110 # TODO(katesonia): Remove the default value after adding validity check to |
| 111 # config. |
| 112 return self.config.get('platform_rename', {}).get(platform, platform) |
| 113 |
| 114 def CheckPolicy(self, crash_data): # pylint: disable=W0613 |
| 115 """Check whether this client supports analyzing the given report. |
| 116 |
| 117 Some clients only support analysis for crashes on certain platforms |
| 118 or channels, etc. This method checks to see whether this client can |
| 119 analyze the given crash. The default implementation on the Findit |
| 120 base class returns None for everything, so that unsupported clients |
| 121 reject everything, as expected. |
| 122 |
| 123 Args: |
| 124 crash_data (JSON): ?? |
| 125 |
| 126 Returns: |
| 127 If satisfied, we return the |crash_data| which may have had some |
| 128 fields modified. Otherwise returns None. |
| 129 """ |
| 130 return None |
| 131 |
| 132 # TODO(http://crbug.com/644476): rename this to something like |
| 133 # _NewAnalysis, since it only does the "allocation" and needs to/will |
| 134 # be followed up with _InitializeAnalysis anyways. |
| 135 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613 |
| 136 return None |
| 137 |
| 138 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613 |
| 139 """Return the CrashAnalysis for the |crash_identifiers|, if one exists. |
| 140 |
| 141 Args: |
| 142 crash_identifiers (JSON): ?? |
| 143 |
| 144 Returns: |
| 145 If a CrashAnalysis ndb.Model already exists for the |
| 146 |crash_identifiers|, then we return it. Otherwise, returns None. |
| 147 """ |
| 148 return None |
| 149 |
| 150 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit. |
| 151 # TODO(http://crbug.com/659346): coverage tests for this class, not |
| 152 # just for FinditForFracas. |
| 153 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover |
| 154 """(Re)Initialize a CrashAnalysis ndb.Model, but do not |put()| it yet. |
| 155 |
| 156 This method is only ever called from _NeedsNewAnalysis which is only |
| 157 ever called from ScheduleNewAnalysis. It is used for filling in the |
| 158 fields of a CrashAnalysis ndb.Model for the first time (though it |
| 159 can also be used to re-initialize a given CrashAnalysis). Subclasses |
| 160 should extend (not override) this to (re)initialize any |
| 161 client-specific fields they may have.""" |
| 162 # Get rid of any previous values there may have been. |
| 163 model.Reset() |
| 164 |
| 165 # Set the version. |
| 166 # |handlers.crash.test.crash_handler_test.testAnalysisScheduled| |
| 167 # provides and expects this field to be called 'chrome_version', |
| 168 # whereas everyone else (e.g., in |crash.test.crash_pipeline_test| |
| 169 # the tests |testAnalysisNeededIfNoAnalysisYet|, |
| 170 # |testRunningAnalysisNoSuspectsFound|, |testRunningAnalysis|, |
| 171 # |testAnalysisNeededIfLastOneFailed|, |testRunningAnalysisWithSuspectsCls|) |
| 172 # expects it to be called 'crashed_version'. The latter is the |
| 173 # better/more general name, so the former needs to be changed in |
| 174 # order to get rid of this defaulting ugliness. |
| 175 model.crashed_version = crash_data.get('crashed_version', |
| 176 crash_data.get('chrome_version', None)) |
| 177 |
| 178 # Set (other) common properties. |
| 179 model.stack_trace = crash_data['stack_trace'] |
| 180 model.signature = crash_data['signature'] |
| 181 model.platform = crash_data['platform'] |
| 182 # TODO(wrengr): The only reason to have _InitializeAnalysis as a |
| 183 # method of the Findit class rather than as a method on CrashAnalysis |
| 184 # is so we can assert that crash_data['client_id'] == self.client_id. |
| 185 # So, either we should do that, or else we should move this to be |
| 186 # a method on CrashAnalysis. |
| 187 model.client_id = self.client_id |
| 188 |
| 189 # Set progress properties. |
| 190 model.status = analysis_status.PENDING |
| 191 model.requested_time = time_util.GetUTCNow() |
| 192 |
| 193 @ndb.transactional |
| 194 def _NeedsNewAnalysis(self, crash_data): |
| 195 raise NotImplementedError() |
| 196 |
| 197 # TODO(http://crbug.com/659345): try to break the cycle re pipeline_cls. |
| 198 # TODO(http://crbug.com/659346): we don't cover anything after the |
| 199 # call to _NeedsNewAnalysis. |
| 200 def ScheduleNewAnalysis(self, crash_data, |
| 201 queue_name=constants.DEFAULT_QUEUE): # pragma: no cover |
| 202 """Create a pipeline object to perform the analysis, and start it. |
| 203 |
| 204 If we can detect that the analysis doesn't need to be performed |
| 205 (e.g., it was already performed, or the |crash_data| is empty so |
| 206 there's nothig we can do), then we will skip creating the pipeline |
| 207 at all. |
| 208 |
| 209 Args: |
| 210 crash_data (JSON): ?? |
| 211 queue_name (??): the name of the AppEngine queue we should start |
| 212 the pipeline on. |
| 213 |
| 214 Returns: |
| 215 True if we started the pipeline; False otherwise. |
| 216 """ |
| 217 # Check policy and tune arguments if needed. |
| 218 crash_data = self.CheckPolicy(crash_data) |
| 219 if crash_data is None: |
| 220 return False |
| 221 |
| 222 # Detect the regression range, and decide if we actually need to |
| 223 # run a new anlaysis or not. |
| 224 if not self._NeedsNewAnalysis(crash_data): |
| 225 return False |
| 226 |
| 227 crash_identifiers = crash_data['crash_identifiers'] |
| 228 # N.B., we cannot pass |self| directly to the _pipeline_cls, because |
| 229 # it is not JSON-serializable (and there's no way to make it such, |
| 230 # since JSON-serializability is defined by JSON-encoders rather than |
| 231 # as methods on the objects being encoded). |
| 232 analysis_pipeline = self._pipeline_cls(self.client_id, crash_identifiers) |
| 233 # Attribute defined outside __init__ - pylint: disable=W0201 |
| 234 analysis_pipeline.target = appengine_util.GetTargetNameForModule( |
| 235 constants.CRASH_BACKEND[self.client_id]) |
| 236 analysis_pipeline.start(queue_name=queue_name) |
| 237 logging.info('New %s analysis is scheduled for %s', self.client_id, |
| 238 repr(crash_identifiers)) |
| 239 return True |
| 240 |
| 241 # TODO(wrengr): does the parser actually need the version, signature, |
| 242 # and platform? If not, then we should be able to just pass the string |
| 243 # to be parsed (which would make a lot more sense than passing the |
| 244 # whole model). |
| 245 # TODO(http://crbug.com/659346): coverage tests for this class, not |
| 246 # just for FinditForFracas. |
| 247 def ParseStacktrace(self, model): # pragma: no cover |
| 248 """Parse a CrashAnalysis's |stack_trace| string into a Stacktrace object. |
| 249 |
| 250 Args: |
| 251 model (CrashAnalysis): The model containing the stack_trace string |
| 252 to be parsed. |
| 253 |
| 254 Returns: |
| 255 On success, returns a Stacktrace object; on failure, returns None. |
| 256 """ |
| 257 stacktrace = self._stacktrace_parser.Parse( |
| 258 model.stack_trace, |
| 259 chrome_dependency_fetcher.ChromeDependencyFetcher( |
| 260 self._repository |
| 261 ).GetDependency( |
| 262 model.crashed_version, |
| 263 model.platform), |
| 264 model.signature) |
| 265 if not stacktrace: |
| 266 logging.warning('Failed to parse the stacktrace %s', model.stack_trace) |
| 267 return None |
| 268 |
| 269 return stacktrace |
| 270 |
| 271 # TODO(wrengr): This is only called by |CrashAnalysisPipeline.run|; |
| 272 # we should be able to adjust things so that we only need to take in |
| 273 # |crash_identifiers|, or a CrashReport, rather than taking in the |
| 274 # whole model. And/or, we should just inline this there. |
| 275 # TODO(http://crbug.com/659346): coverage tests for this class, not |
| 276 # just for FinditForFracas. |
| 277 def FindCulprit(self, model): # pragma: no cover |
| 278 """Given a CrashAnalysis ndb.Model, return a Culprit.""" |
| 279 stacktrace = self.ParseStacktrace(model) |
| 280 if stacktrace is None: |
| 281 # TODO(http://crbug.com/659359): refactor things so we don't need |
| 282 # the NullCulprit class. |
| 283 return NullCulprit() |
| 284 |
| 285 return self._azalea.FindCulprit(CrashReport( |
| 286 crashed_version = model.crashed_version, |
| 287 signature = model.signature, |
| 288 platform = model.platform, |
| 289 stacktrace = stacktrace, |
| 290 regression_range = model.regression_range)) |
| OLD | NEW |