appengine/findit/crash/findit.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: rebasing against recently landed cls Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import copy

	6 import logging

	7

	8 from google.appengine.ext import ndb

	9

	10 from common import appengine_util

	11 from common import chrome_dependency_fetcher

	12 from common import constants

	13 from common import time_util

	14 from crash.crash_report import CrashReport

	15 from crash.culprit import NullCulprit

	16 from model import analysis_status

	17 from model.crash.crash_config import CrashConfig

	18 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

	19 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

	20

	21 # TODO(http://crbug.com/659346): since most of our unit tests are

	22 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.

	23 # However, now we're missing coverage for most of this file (due to the

	24 # buggy way coverage is computed). Need to add a bunch of new unittests

	25 # to get coverage back up.

	26

	27 # TODO: this class depends on ndb stuff, and should therefore move to

	28 # cr-culprit-finder/service/predator as part of the big reorganization.

	29 # Also, this class should be renamed to "PredatorApp" (alas, "Azalea"

	30 # was renamed to "Predator").

	31 class Findit(object):

	32 def __init__(self, repository, pipeline_cls):

	33 """

	34 Args:

	35 repository (Repository): the Git repository for getting CLs to classify.

	36 pipeline_cls (class): the class for constructing pipelines in

	37 ScheduleNewAnalysis. This will almost surely be

	38 \|crash.crash_pipeline.CrashWrapperPipeline\|; but we must pass

	39 the class in as a parameter in order to break an import cycle.

	40 """

	41 self._repository = repository

	42 # TODO(http://crbug.com/659354): because self.client is volatile,

	43 # we need some way of updating the Azelea instance whenever the

	44 # config changes. How to do that cleanly?

	45 self._azalea = None

	46 self._stacktrace_parser = None

	47 self._pipeline_cls = pipeline_cls

	48

	49 # This is a class method because it should be the same for all

	50 # instances of this class. We can in fact call class methods on

	51 # instances (not just on the class itself), so we could in principle

	52 # get by with just this method. However, a @classmethod is treated

	53 # syntactically like a method, thus we'd need to have the \|()\| at the

	54 # end, unlike for a @property. Thus we have both the class method and

	55 # the property, in order to simulate a class property.

	56 @classmethod

	57 def _ClientID(cls):

	58 """Get the client id for this class.

	59

	60 This class method is private. Unless you really need to access

	61 this method directly for some reason, you should use the \|client_id\|

	62 property instead.

	63

	64 Returns:

	65 A string which is member of the CrashClient enumeration.

	66 """

	67 if cls is Findit:

	68 logging.warning('Findit is abstract, '

	69 'but someone constructed an instance and called _ClientID')

	70 else:

	71 logging.warning('Findit subclass %s forgot to implement _ClientID',

	72 cls.__name__)

	73 raise NotImplementedError()

	74

	75 @property

	76 def client_id(self):

	77 """Get the client id from the class of this object.

	78

	79 N.B., this property is static and should not be overridden."""

	80 return self._ClientID()

	81

	82 # TODO(http://crbug.com/659354): can we remove the dependency on

	83 # CrashConfig entirely? It'd be better to receive method calls

	84 # whenever things change, so that we know the change happened (and

	85 # what in particular changed) so that we can update our internal state

	86 # as appropriate.

	87 @property

	88 def config(self):

	89 """Get the current value of the client config for the class of this object.

	90

	91 N.B., this property is volatile and may change asynchronously."""

	92 return CrashConfig.Get().GetClientConfig(self.client_id)

	93

	94 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or

	95 # something like that.

	96 def RenamePlatform(self, platform):

	97 """Remap the platform to a different one, based on the config."""

	98 # TODO(katesonia): Remove the default value after adding validity check to

	99 # config.

	100 return self.config.get('platform_rename', {}).get(platform, platform)

	101

	102 def CheckPolicy(self, crash_data): # pylint: disable=W0613

	103 """Check whether this client supports analyzing the given report.

	104

	105 Some clients only support analysis for crashes on certain platforms

	106 or channels, etc. This method checks to see whether this client can

	107 analyze the given crash.

	108

	109 Args:

	110 crash_data (JSON): ??

	111

	112 Returns:

	113 If satisfied, we return the \|crash_data\| which may have had some

	114 fields modified. Otherwise returns None.

	115 """

	116 return None

	117

	118 # TODO(http://crbug.com/644476): rename this to something like

	119 # _NewAnalysis, since it only does the "allocation" and needs to/will

	120 # be followed up with _InitializeAnalysis anyways.

	121 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613

	122 return None

	123

	124 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613

	125 """Return the CrashAnalysis for the \|crash_identifiers\|, if one exists.

	126

	127 Args:

	128 crash_identifiers (JSON): ??

	129

	130 Returns:

	131 If a CrashAnalysis ndb.Model already exists for the

	132 \|crash_identifiers\|, then we return it. Otherwise, returns None.

	133 """

	134 return None

	135

	136 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.

	137 # TODO(http://crbug.com/659346): coverage tests for this class, not

	138 # just for FinditForFracas.

	139 def _InitializeAnalysis(self, model, crash_data):

	140 """(Re)Initialize a CrashAnalysis ndb.Model, but do not \|put()\| it yet.

	141

	142 This method is only ever called from _NeedsNewAnalysis which is only

	143 ever called from ScheduleNewAnalysis. It is used for filling in the

	144 fields of a CrashAnalysis ndb.Model for the first time (though it

	145 can also be used to re-initialize a given CrashAnalysis). Subclasses

	146 should extend (not override) this to (re)initialize any

	147 client-specific fields they may have."""

	148 # Get rid of any previous values there may have been.

	149 model.Reset()

	150

	151 # Set the version.

	152 # \|handlers.crash.test.crash_handler_test.testAnalysisScheduled\|

	153 # provides and expects this field to be called 'chrome_version',

	154 # whereas everyone else (e.g., in \|crash.test.crash_pipeline_test\|

	155 # the tests \|testAnalysisNeededIfNoAnalysisYet\|,

	156 # \|testRunningAnalysisNoSuspectsFound\|, \|testRunningAnalysis\|,

	157 # \|testAnalysisNeededIfLastOneFailed\|, \|testRunningAnalysisWithSuspectsCls\|)

	158 # expects it to be called 'crashed_version'. The latter is the

	159 # better/more general name, so the former needs to be changed in

	160 # order to get rid of this defaulting ugliness.

	161 model.crashed_version = crash_data.get('crashed_version',

	162 crash_data.get('chrome_version', None))

	163

	164 # Set (other) common properties.

	165 model.stack_trace = crash_data['stack_trace']

	166 model.signature = crash_data['signature']

	167 model.platform = crash_data['platform']

	168 # TODO(wrengr): The only reason to have _InitializeAnalysis as a

	169 # method of the Findit class rather than as a method on CrashAnalysis

	170 # is so we can assert that crash_data['client_id'] == self.client_id.

	171 # So, either we should do that, or else we should move this to be

	172 # a method on CrashAnalysis.

	173 model.client_id = self.client_id

	174

	175 # Set progress properties.

	176 model.status = analysis_status.PENDING

	177 model.requested_time = time_util.GetUTCNow()

	178

	179 @ndb.transactional

	180 def _NeedsNewAnalysis(self, crash_data):

	181 raise NotImplementedError()

	182

	183 # TODO(http://crbug.com/659345): try to break the cycle re pipeline_cls.

	184 # TODO(http://crbug.com/659346): coverage tests for when CheckPolicy succeeds.

	185 def ScheduleNewAnalysis(self, crash_data, queue_name=constants.DEFAULT_QUEUE):

	186 """Create a pipeline object to perform the analysis, and start it.

	187

	188 If we can detect that the analysis doesn't need to be performed

	189 (e.g., it was already performed, or the \|crash_data\| is empty so

	190 there's nothig we can do), then we will skip creating the pipeline

	191 at all.

	192

	193 Args:

	194 crash_data (JSON): ??

	195 queue_name (??): the name of the AppEngine queue we should start

	196 the pipeline on.

	197

	198 Returns:

	199 True if we started the pipeline; False otherwise.

	200 """

	201 # Check policy and tune arguments if needed.

	202 crash_data = self.CheckPolicy(crash_data)

	203 if crash_data is None:

	204 return False

	205

	206 # Detect the regression range, and decide if we actually need to

	207 # run a new anlaysis or not.

	208 if not self._NeedsNewAnalysis(crash_data):

	209 return False

	210

	211 crash_identifiers = crash_data['crash_identifiers']

	212 # N.B., we cannot pass \|self\| directly to the _pipeline_cls, because

	213 # it is not JSON-serializable (and there's no way to make it such,

	214 # since JSON-serializability is defined by JSON-encoders rather than

	215 # as methods on the objects being encoded).

	216 analysis_pipeline = self._pipeline_cls(self.client_id, crash_identifiers)

	217 # Attribute defined outside __init__ - pylint: disable=W0201

	218 analysis_pipeline.target = appengine_util.GetTargetNameForModule(

	219 constants.CRASH_BACKEND[self.client_id])

	220 analysis_pipeline.start(queue_name=queue_name)

	221 logging.info('New %s analysis is scheduled for %s', self.client_id,

	222 repr(crash_identifiers))

	223 return True

	224

	225 # TODO(wrengr): does the parser actually need the version, signature,

	226 # and platform? If not, then we should be able to just pass the string

	227 # to be parsed (which would make a lot more sense than passing the

	228 # whole model).

	229 # TODO(http://crbug.com/659346): coverage tests for this class, not

	230 # just for FinditForFracas.

	231 def ParseStacktrace(self, model):

	232 """Parse a CrashAnalysis's \|stack_trace\| string into a Stacktrace object.

	233

	234 Args:

	235 model (CrashAnalysis): The model containing the stack_trace string

	236 to be parsed.

	237

	238 Returns:

	239 On success, returns a Stacktrace object; on failure, returns None.

	240 """

	241 stacktrace = self._stacktrace_parser.Parse(

	242 model.stack_trace,

	243 chrome_dependency_fetcher.ChromeDependencyFetcher(

	244 self._repository

	245 ).GetDependency(

	246 model.crashed_version,

	247 model.platform),

	248 model.signature)

	249 if not stacktrace:

	250 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

	251 return None

	252

	253 return stacktrace

	254

	255 # TODO(wrengr): This is only called by \|CrashAnalysisPipeline.run\|;

	256 # we should be able to adjust things so that we only need to take in

	257 # \|crash_identifiers\|, or a CrashReport, rather than taking in the

	258 # whole model. And/or, we should just inline this there.

	259 # TODO(http://crbug.com/659346): coverage tests for this class, not

	260 # just for FinditForFracas.

	261 def FindCulprit(self, model):

	262 """Given a CrashAnalysis ndb.Model, return a Culprit."""

	263 stacktrace = self.ParseStacktrace(model)

	264 if stacktrace is None:

	265 # TODO(http://crbug.com/659359): refactor things so we don't need

	266 # the NullCulprit class.

	267 return NullCulprit()

	268

	269 return self._azalea.FindCulprit(CrashReport(

	270 crashed_version = model.crashed_version,

	271 signature = model.signature,

	272 platform = model.platform,

	273 stacktrace = stacktrace,

	274 regression_range = model.regression_range))

OLD	NEW