appengine/findit/crash/findit.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: linting for coverage Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import copy

	6 import logging

	7

	8 from google.appengine.ext import ndb

	9

	10 from common import appengine_util

	11 from common import chromium_deps

	12 from common import constants

	13 from common import time_util

	14 from crash.crash_report import CrashReport

	15 from crash.culprit import NullCulprit

	16 from model import analysis_status

	17 from model.crash.crash_config import CrashConfig

	18 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

	19 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

	20

	21

	22 # TODO: this class depends on ndb stuff, and should therefore move to

	23 # cr-culprit-finder/service/predator as part of the big reorganization.

	24 # Also, this class should be renamed to "PredatorApp" (alas, "Azalea"

	25 # was renamed to "Predator").

	26 class Findit(object):

	27 def __init__(self, pipeline_cls):

	28 """

	29 Args:

	30 pipeline_cls (class): the class for constructing pipelines in

	31 ScheduleNewAnalysis. This will almost surely be

	32 \|crash.crash_pipeline.CrashWrapperPipeline\|; but we must pass

	33 the class in as a parameter in order to break an import cycle.

	34 """

	35 # TODO(wrengr): because self.client is volatile, we need some way

	36 # of updating the Azelea instance whenever the config changes. How to

	37 # do that cleanly?

	38 self._azalea = None

	39 self._stacktrace_parser = None

	40 self._pipeline_cls = pipeline_cls

	41

	42 # This is a method rather than an attribute to ensure it can't be

	43 # changed. And it's a classmethod rather than a property, since we can

	44 # get it directly from the class itself, without needing to allocate

	45 # an instance first.

	46 @classmethod

	47 def ClientID(cls):

	48 """Reify the name of this class as a CrashClient id, for serialization."""

	49 raise NotImplementedError()

	50

	51 @property

	52 def client_id(self):

	53 """Get the client id from the class of this object.

	54

	55 N.B., this property is static and should not be overridden."""

	56 return self.__class__.ClientID()

	57

	58 # TODO(wrengr): can we remove the dependency on CrashConfig

	59 # entirely? It'd be better to receive method calls whenever things

	60 # change, so that we know the change happened (and what in particular

	61 # changed) so that we can update our internal state as appropriate.

	62 @property

	63 def config(self):

	64 """Get the current value of the client config.

	65

	66 N.B., this property is volatile and may change asynchronously."""

	67 return CrashConfig.Get().GetClientConfig(self.client_id)

	68

	69 # TODO(wrengr): rename to CanonicalizePlatform or something like that.

	70 def RenamePlatform(self, platform):

	71 """Remap the platform to a different one, based on the config."""

	72 # TODO(katesonia): Remove the default value after adding validity check to

	73 # config.

	74 return self.config.get('platform_rename', {}).get(platform, platform)

	75

	76 def CheckPolicy(self, crash_data): # pylint: disable=W0613

	77 """Check whether this client supports analyzing the given report.

	78

	79 Some clients only support analysis for crashes on certain platforms

	80 or channels, etc. This method checks to see whether this client can

	81 analyze the given crash.

	82

	83 Args:

	84 crash_data (JSON): ??

	85

	86 Returns:

	87 If satisfied, we return the \|crash_data\| which may have had some

	88 fields modified. Otherwise returns None.

	89 """

	90 return None

	91

	92 # TODO(wrengr): rename this to something like _NewAnalysis, since

	93 # it only does the "allocation" and needs to/will be followed up with

	94 # _InitializeAnalysis anyways.

	95 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613

	96 return None

	97

	98 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613

	99 """Return the CrashAnalysis for the \|crash_identifiers\|, if one exists.

	100

	101 Args:

	102 crash_identifiers (JSON): ??

	103

	104 Returns:

	105 If a CrashAnalysis ndb.Model already exists for the

	106 \|crash_identifiers\|, then we return it. Otherwise, returns None.

	107 """

	108 return None

	109

	110 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.

	111 def _InitializeAnalysis(self, model, crash_data):

	112 """(Re)Initialize a CrashAnalysis ndb.Model, but do not \|put()\| it yet.

	113

	114 This method is only ever called from _NeedsNewAnalysis which is only

	115 ever called from ScheduleNewAnalysis. It is used for filling in the

	116 fields of a CrashAnalysis ndb.Model for the first time (though it

	117 can also be used to re-initialize a given CrashAnalysis). Subclasses

	118 should extend (not override) this to (re)initialize any

	119 client-specific fields they may have."""

	120 # Get rid of any previous values there may have been.

	121 model.Reset()

	122

	123 # Set the version.

	124 # \|handlers.crash.test.crash_handler_test.testAnalysisScheduled\|

	125 # provides and expects this field to be called 'chrome_version',

	126 # whereas everyone else (e.g., in \|crash.test.crash_pipeline_test\|

	127 # the tests \|testAnalysisNeededIfNoAnalysisYet\|,

	128 # \|testRunningAnalysisNoSuspectsFound\|, \|testRunningAnalysis\|,

	129 # \|testAnalysisNeededIfLastOneFailed\|, \|testRunningAnalysisWithSuspectsCls\|)

	130 # expects it to be called 'crashed_version'. The latter is the

	131 # better/more general name, so the former needs to be changed in

	132 # order to get rid of this defaulting ugliness.

	133 model.crashed_version = crash_data.get('crashed_version',

	134 crash_data.get('chrome_version', None))

	135

	136 # Set (other) common properties.

	137 model.stack_trace = crash_data['stack_trace']

	138 model.signature = crash_data['signature']

	139 model.platform = crash_data['platform']

	140 # TODO(wrengr): The only reason to have _InitializeAnalysis as a

	141 # method of the Findit class rather than as a method on CrashAnalysis

	142 # is so we can assert that crash_data['client_id'] == self.client_id.

	143 # So, either we should do that, or else we should move this to be

	144 # a method on CrashAnalysis.

	145 model.client_id = self.client_id

	146

	147 # Set progress properties.

	148 model.status = analysis_status.PENDING

	149 model.requested_time = time_util.GetUTCNow()

	150

	151 @ndb.transactional

	152 def _NeedsNewAnalysis(self, crash_data):

	153 raise NotImplementedError()

	154

	155 # TODO(wrengr): we misplaced the coverage test for the True path; find it!

	156 def ScheduleNewAnalysis(self, crash_data, queue_name=constants.DEFAULT_QUEUE):

	157 """Create a pipeline object to perform the analysis, and start it.

	158

	159 If we can detect that the analysis doesn't need to be performed

	160 (e.g., it was already performed, or the \|crash_data\| is empty so

	161 there's nothig we can do), then we will skip creating the pipeline

	162 at all.

	163

	164 Args:

	165 crash_data (JSON): ??

	166 queue_name (??): the name of the AppEngine queue we should start

	167 the pipeline on.

	168

	169 Returns:

	170 True if we started the pipeline; False otherwise.

	171 """

	172 # Check policy and tune arguments if needed.

	173 crash_data = self.CheckPolicy(crash_data)

	174 if crash_data is None:

	175 return False

	176

	177 # Detect the regression range, and decide if we actually need to

	178 # run a new anlaysis or not.

	179 if not self._NeedsNewAnalysis(crash_data):

	180 return False

	181

	182 crash_identifiers = crash_data['crash_identifiers']

	183 # N.B., we cannot pass \|self\| directly to the _pipeline_cls, because

	184 # it is not JSON-serializable (and there's no way to make it such,

	185 # since JSON-serializability is defined by JSON-encoders rather than

	186 # as methods on the objects being encoded).

	187 analysis_pipeline = self._pipeline_cls(self.client_id, crash_identifiers)
	Sharu Jiang 2016/10/21 21:57:06 This is like we construct a Findit object (in cras This is like we construct a Findit object (in crash_handler), and construct a pipeline inside it, and yet construct another identical Findit object in this pipeline and run analysis using this Findit object. How about we move this function out of Findit class, and make this stand-alone function which simply creates the CrashWrapperPipeline and run its _findit for analysis. wrengr 2016/10/22 00:18:49 The Findit object contains all the stuff for actua Show quoted text On 2016/10/21 21:57:06, sharu jiang wrote: > This is like we construct a Findit object (in crash_handler), and construct a > pipeline inside it, and yet construct another identical Findit object in this > pipeline and run analysis using this Findit object. > > How about we move this function out of Findit class, and make this stand-alone > function which simply creates the CrashWrapperPipeline and run its _findit for > analysis. The Findit object contains all the stuff for actually doing the analysis (by calling the Azalea object to do so), but objects in general (and Findit objects in particular) aren't JSON-serializable; thus, no matter how we organize things, the pipelines are going to have to reconstruct Findit objects every time they're initialized. The goal of putting ScheduleNewAnalysis (and _NeedsNewAnalysis, and...) in this class is to try and encapsulate/hide all the gross implementation details of how we spawn off a bunch of processes to actually do things. That is, conceptually the Findit object fires off a bunch of pipeline tasks and outlives all of them (even if, technically, due to implementation details we must actually reconstruct the Findit object every time we start up a new process to execute a given task).
	188 # Attribute defined outside __init__ - pylint: disable=W0201

	189 analysis_pipeline.target = appengine_util.GetTargetNameForModule(

	190 constants.CRASH_BACKEND[self.client_id])

	191 analysis_pipeline.start(queue_name=queue_name)

	192 logging.info('New %s analysis is scheduled for %s', self.client_id,

	193 repr(crash_identifiers))

	194 return True

	195

	196 # TODO(wrengr): does the parser actually need the version, signature,

	197 # and platform? If not, then we should be able to just pass the string

	198 # to be parsed (which would make a lot more sense than passing the

	199 # whole model).

	200 # TODO(wrengr): we misplaced the coverage test; find it!

	201 def ParseStacktrace(self, model):

	202 """Parse a CrashAnalysis's \|stack_trace\| string into a Stacktrace object.

	203

	204 Args:

	205 model (CrashAnalysis): The model containing the stack_trace string

	206 to be parsed.

	207

	208 Returns:

	209 On success, returns a Stacktrace object; on failure, returns None.

	210 """

	211 stacktrace = self._stacktrace_parser.Parse(model.stack_trace,

	212 chromium_deps.GetChromeDependency(

	213 model.crashed_version, model.platform),

	214 model.signature)

	215 if not stacktrace:

	216 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

	217 return None

	218

	219 return stacktrace

	220

	221 # TODO(wrengr): This is only called by \|CrashAnalysisPipeline.run\|;

	222 # we should be able to adjust things so that we only need to take in

	223 # \|crash_identifiers\|, or a CrashReport, rather than taking in the

	224 # whole model. And/or, we should just inline this there.

	225 def FindCulprit(self, model):

	226 """Given a CrashAnalysis ndb.Model, return a Culprit."""

	227 stacktrace = self.ParseStacktrace(model)

	228 if stacktrace is None:

	229 # TODO(wrengr): refactor things so we don't need the NullCulprit class.

	230 return NullCulprit()

	231

	232 return self._azalea.FindCulprit(CrashReport(

	233 crashed_version = model.crashed_version,

	234 signature = model.signature,

	235 platform = model.platform,

	236 stacktrace = stacktrace,

	237 regression_range = model.regression_range))

OLD	NEW

« no previous file with comments | « appengine/findit/crash/culprit.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | appengine/findit/crash/findit_for_chromecrash.py » ('J')