appengine/findit/crash/findit.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Addressing the crash_config.fracas issue Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« appengine/findit/crash/culprit.py ('K') | « appengine/findit/crash/culprit.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | appengine/findit/crash/findit_for_chromecrash.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import copy

	6 import logging

	7

	8 from google.appengine.ext import ndb

	9

	10 from common import appengine_util

	11 from common import chromium_deps

	12 from common import constants

	13 from common import time_util

	14 from crash.crash_report import CrashReport

	15 from crash.culprit import NullCulprit

	16 from model import analysis_status

	17 from model.crash.crash_config import CrashConfig

	18 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

	19 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

	20

	21 # TODO(wrengr): since most of our unit tests are FinditForFracas-specific,

	22 # I moved them to findit_for_chromecrash_test.py. However, now we're

	23 # missing coverage for most of this file (due to the buggy way coverage is

	24 # computed). Need to add a bunch of new unittests to get coverage back up.

	25

	26 # TODO: this class depends on ndb stuff, and should therefore move to

	27 # cr-culprit-finder/service/predator as part of the big reorganization.

	28 # Also, this class should be renamed to "PredatorApp" (alas, "Azalea"

	29 # was renamed to "Predator").

	30 class Findit(object):

	31 def __init__(self, pipeline_cls):

	32 """

	33 Args:

	34 pipeline_cls (class): the class for constructing pipelines in

	35 ScheduleNewAnalysis. This will almost surely be

	36 \|crash.crash_pipeline.CrashWrapperPipeline\|; but we must pass

	37 the class in as a parameter in order to break an import cycle.

	38 """

	39 # TODO(wrengr): because self.client is volatile, we need some way

	40 # of updating the Azelea instance whenever the config changes. How to

	41 # do that cleanly?

	42 self._azalea = None

	43 self._stacktrace_parser = None

	44 self._pipeline_cls = pipeline_cls

	45

	46 # This is a class method because it should be the same for all

	47 # instances of this class. We can in fact call class methods on

	48 # instances (not just on the class itself), so we could in principle

	49 # get by with just this method. However, a @classmethod is treated

	50 # syntactically like a method, thus we'd need to have the \|()\| at the

	51 # end, unlike for a @property. Thus we have both the class method and

	52 # the property, in order to simulate a class property.

	53 # TODO(wrengr): should we use @staticmethod instead?

	54 @classmethod

	55 def _ClientID(cls):

	56 """Get the client id for this class.

	57

	58 This class method is private. Unless you really need to access

	59 this method directly for some reason, you should use the \|client_id\|

	60 property instead.

	61

	62 Returns:

	63 A string which is member of the CrashClient enumeration.

	64 """

	65 if cls is Findit:

	66 logging.warning('Findit is abstract, '

	67 'but someone constructed an instance and called _ClientID')

	68 else:

	69 logging.warning('Findit subclass %s forgot to implement _ClientID',

	70 cls.__name__)

	71 raise NotImplementedError()

	72

	73 @property

	74 def client_id(self):

	75 """Get the client id from the class of this object.

	76

	77 N.B., this property is static and should not be overridden."""

	78 return self._ClientID()

	79

	80 # TODO(wrengr): can we remove the dependency on CrashConfig

	81 # entirely? It'd be better to receive method calls whenever things

	82 # change, so that we know the change happened (and what in particular

	83 # changed) so that we can update our internal state as appropriate.

	84 @property

	85 def config(self):

	86 """Get the current value of the client config for the class of this object.

	87

	88 N.B., this property is volatile and may change asynchronously."""

	89 return CrashConfig.Get().GetClientConfig(self.client_id)

	90

	91 # TODO(wrengr): rename to CanonicalizePlatform or something like that.

	92 def RenamePlatform(self, platform):

	93 """Remap the platform to a different one, based on the config."""

	94 # TODO(katesonia): Remove the default value after adding validity check to

	95 # config.

	96 return self.config.get('platform_rename', {}).get(platform, platform)

	97

	98 def CheckPolicy(self, crash_data): # pylint: disable=W0613

	99 """Check whether this client supports analyzing the given report.

	100

	101 Some clients only support analysis for crashes on certain platforms

	102 or channels, etc. This method checks to see whether this client can

	103 analyze the given crash.

	104

	105 Args:

	106 crash_data (JSON): ??

	107

	108 Returns:

	109 If satisfied, we return the \|crash_data\| which may have had some

	110 fields modified. Otherwise returns None.

	111 """

	112 return None

	113

	114 # TODO(wrengr): rename this to something like _NewAnalysis, since

	115 # it only does the "allocation" and needs to/will be followed up with

	116 # _InitializeAnalysis anyways.

	117 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613

	118 return None

	119

	120 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613

	121 """Return the CrashAnalysis for the \|crash_identifiers\|, if one exists.

	122

	123 Args:

	124 crash_identifiers (JSON): ??

	125

	126 Returns:

	127 If a CrashAnalysis ndb.Model already exists for the

	128 \|crash_identifiers\|, then we return it. Otherwise, returns None.

	129 """

	130 return None

	131

	132 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.

	133 # TODO(wrengr): coverage tests for this class, not just for FinditForFracas.

	134 def _InitializeAnalysis(self, model, crash_data):

	135 """(Re)Initialize a CrashAnalysis ndb.Model, but do not \|put()\| it yet.

	136

	137 This method is only ever called from _NeedsNewAnalysis which is only

	138 ever called from ScheduleNewAnalysis. It is used for filling in the

	139 fields of a CrashAnalysis ndb.Model for the first time (though it

	140 can also be used to re-initialize a given CrashAnalysis). Subclasses

	141 should extend (not override) this to (re)initialize any

	142 client-specific fields they may have."""

	143 # Get rid of any previous values there may have been.

	144 model.Reset()

	145

	146 # Set the version.

	147 # \|handlers.crash.test.crash_handler_test.testAnalysisScheduled\|

	148 # provides and expects this field to be called 'chrome_version',

	149 # whereas everyone else (e.g., in \|crash.test.crash_pipeline_test\|

	150 # the tests \|testAnalysisNeededIfNoAnalysisYet\|,

	151 # \|testRunningAnalysisNoSuspectsFound\|, \|testRunningAnalysis\|,

	152 # \|testAnalysisNeededIfLastOneFailed\|, \|testRunningAnalysisWithSuspectsCls\|)

	153 # expects it to be called 'crashed_version'. The latter is the

	154 # better/more general name, so the former needs to be changed in

	155 # order to get rid of this defaulting ugliness.

	156 model.crashed_version = crash_data.get('crashed_version',

	157 crash_data.get('chrome_version', None))

	158

	159 # Set (other) common properties.

	160 model.stack_trace = crash_data['stack_trace']

	161 model.signature = crash_data['signature']

	162 model.platform = crash_data['platform']

	163 # TODO(wrengr): The only reason to have _InitializeAnalysis as a

	164 # method of the Findit class rather than as a method on CrashAnalysis

	165 # is so we can assert that crash_data['client_id'] == self.client_id.

	166 # So, either we should do that, or else we should move this to be

	167 # a method on CrashAnalysis.

	168 model.client_id = self.client_id

	169

	170 # Set progress properties.

	171 model.status = analysis_status.PENDING

	172 model.requested_time = time_util.GetUTCNow()

	173

	174 @ndb.transactional

	175 def _NeedsNewAnalysis(self, crash_data):

	176 raise NotImplementedError()

	177

	178 # TODO(wrengr): coverage tests for when CheckPolicy succeeds.

	179 def ScheduleNewAnalysis(self, crash_data, queue_name=constants.DEFAULT_QUEUE):

	180 """Create a pipeline object to perform the analysis, and start it.

	181

	182 If we can detect that the analysis doesn't need to be performed

	183 (e.g., it was already performed, or the \|crash_data\| is empty so

	184 there's nothig we can do), then we will skip creating the pipeline

	185 at all.

	186

	187 Args:

	188 crash_data (JSON): ??

	189 queue_name (??): the name of the AppEngine queue we should start

	190 the pipeline on.

	191

	192 Returns:

	193 True if we started the pipeline; False otherwise.

	194 """

	195 # Check policy and tune arguments if needed.

	196 crash_data = self.CheckPolicy(crash_data)

	197 if crash_data is None:

	198 return False

	199

	200 # Detect the regression range, and decide if we actually need to

	201 # run a new anlaysis or not.

	202 if not self._NeedsNewAnalysis(crash_data):

	203 return False

	204

	205 crash_identifiers = crash_data['crash_identifiers']

	206 # N.B., we cannot pass \|self\| directly to the _pipeline_cls, because

	207 # it is not JSON-serializable (and there's no way to make it such,

	208 # since JSON-serializability is defined by JSON-encoders rather than

	209 # as methods on the objects being encoded).

	210 analysis_pipeline = self._pipeline_cls(self.client_id, crash_identifiers)
	stgao 2016/10/25 18:03:41 This is a bit unfortunate for the cycle dependency This is a bit unfortunate for the cycle dependency. Later, we could consider factoring this out and make the caller of this code to start the pipeline. Sharu Jiang 2016/10/25 18:31:44 I have the same concern, we can discuss this in th Show quoted text On 2016/10/25 18:03:41, stgao wrote: > This is a bit unfortunate for the cycle dependency. Later, we could consider > factoring this out and make the caller of this code to start the pipeline. I have the same concern, we can discuss this in the meeting. wrengr 2016/10/25 19:49:54 Yeah, the cyclic dependency problem is unfortunate Show quoted text On 2016/10/25 18:03:41, stgao wrote: > This is a bit unfortunate for the cycle dependency. Later, we could consider > factoring this out and make the caller of this code to start the pipeline. Yeah, the cyclic dependency problem is unfortunate. Let's set up a meeting to see if we can't come up with some cleaner way of breaking the cycle.
	211 # Attribute defined outside __init__ - pylint: disable=W0201

	212 analysis_pipeline.target = appengine_util.GetTargetNameForModule(

	213 constants.CRASH_BACKEND[self.client_id])

	214 analysis_pipeline.start(queue_name=queue_name)

	215 logging.info('New %s analysis is scheduled for %s', self.client_id,

	216 repr(crash_identifiers))

	217 return True

	218

	219 # TODO(wrengr): does the parser actually need the version, signature,

	220 # and platform? If not, then we should be able to just pass the string

	221 # to be parsed (which would make a lot more sense than passing the

	222 # whole model).

	223 # TODO(wrengr): coverage tests for this class, not just for FinditForFracas.

	224 def ParseStacktrace(self, model):

	225 """Parse a CrashAnalysis's \|stack_trace\| string into a Stacktrace object.

	226

	227 Args:

	228 model (CrashAnalysis): The model containing the stack_trace string

	229 to be parsed.

	230

	231 Returns:

	232 On success, returns a Stacktrace object; on failure, returns None.

	233 """

	234 stacktrace = self._stacktrace_parser.Parse(model.stack_trace,

	235 chromium_deps.GetChromeDependency(

	236 model.crashed_version, model.platform),

	237 model.signature)

	238 if not stacktrace:

	239 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

	240 return None

	241

	242 return stacktrace

	243

	244 # TODO(wrengr): This is only called by \|CrashAnalysisPipeline.run\|;

	245 # we should be able to adjust things so that we only need to take in

	246 # \|crash_identifiers\|, or a CrashReport, rather than taking in the

	247 # whole model. And/or, we should just inline this there.

	248 # TODO(wrengr): coverage tests for this class, not just for FinditForFracas.

	249 def FindCulprit(self, model):

	250 """Given a CrashAnalysis ndb.Model, return a Culprit."""

	251 stacktrace = self.ParseStacktrace(model)

	252 if stacktrace is None:

	253 # TODO(wrengr): refactor things so we don't need the NullCulprit class.

	254 return NullCulprit()

	255

	256 return self._azalea.FindCulprit(CrashReport(

	257 crashed_version = model.crashed_version,

	258 signature = model.signature,

	259 platform = model.platform,

	260 stacktrace = stacktrace,

	261 regression_range = model.regression_range))

OLD	NEW