appengine/findit/crash/findit.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Unified Diff: appengine/findit/crash/findit.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Finally fixed the mock tests! Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/findit/crash/findit.py

diff --git a/appengine/findit/crash/findit.py b/appengine/findit/crash/findit.py

new file mode 100644

index 0000000000000000000000000000000000000000..c8de5375366f0c13e52ed721cb6b099ad1057c69

--- /dev/null

+++ b/appengine/findit/crash/findit.py

@@ -0,0 +1,290 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import copy

+import logging

+from google.appengine.ext import ndb

+from common import appengine_util

+from common import chrome_dependency_fetcher

+from common import constants

+from common import time_util

+from crash.crash_report import CrashReport

+from crash.culprit import NullCulprit

+from model import analysis_status

+from model.crash.crash_config import CrashConfig

+from model.crash.cracas_crash_analysis import CracasCrashAnalysis

+from model.crash.fracas_crash_analysis import FracasCrashAnalysis

+# TODO(http://crbug.com/659346): since most of our unit tests are

+# FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.

+# However, now we're missing coverage for most of this file (due to the

+# buggy way coverage is computed). Need to add a bunch of new unittests

+# to get coverage back up.

+# TODO: this class depends on ndb stuff, and should therefore move to

+# cr-culprit-finder/service/predator as part of the big reorganization.

+# Also, this class should be renamed to "PredatorApp" (alas, "Azalea"

+# was renamed to "Predator").

+class Findit(object):

+ def __init__(self, repository, pipeline_cls):

+ """

+ Args:

+ repository (Repository): the Git repository for getting CLs to classify.

+ pipeline_cls (class): the class for constructing pipelines in

+ ScheduleNewAnalysis. This will almost surely be

+ |crash.crash_pipeline.CrashWrapperPipeline|; but we must pass

+ the class in as a parameter in order to break an import cycle.

+ """

+ self._repository = repository

+ # TODO(http://crbug.com/659354): because self.client is volatile,

+ # we need some way of updating the Azelea instance whenever the

+ # config changes. How to do that cleanly?

+ self._azalea = None

+ self._stacktrace_parser = None

+ self._pipeline_cls = pipeline_cls

+ # This is a class method because it should be the same for all

+ # instances of this class. We can in fact call class methods on

+ # instances (not just on the class itself), so we could in principle

+ # get by with just this method. However, a @classmethod is treated

+ # syntactically like a method, thus we'd need to have the |()| at the

+ # end, unlike for a @property. Thus we have both the class method and

+ # the property, in order to simulate a class property.

+ @classmethod

+ def _ClientID(cls): # pragma: no cover

+ """Get the client id for this class.

+ This class method is private. Unless you really need to access

+ this method directly for some reason, you should use the |client_id|

+ property instead.

+ Returns:

+ A string which is member of the CrashClient enumeration.

+ """

+ if cls is Findit:

+ logging.warning('Findit is abstract, '

+ 'but someone constructed an instance and called _ClientID')

+ else:

+ logging.warning('Findit subclass %s forgot to implement _ClientID',

+ cls.__name__)

+ raise NotImplementedError()

+ @property

+ def client_id(self):

+ """Get the client id from the class of this object.

+ N.B., this property is static and should not be overridden."""

+ return self._ClientID()

+ # TODO(http://crbug.com/659354): can we remove the dependency on

+ # CrashConfig entirely? It'd be better to receive method calls

+ # whenever things change, so that we know the change happened (and

+ # what in particular changed) so that we can update our internal state

+ # as appropriate.

+ @property

+ def config(self):

+ """Get the current value of the client config for the class of this object.

+ N.B., this property is volatile and may change asynchronously.

+ If the event of an error this method will return ``None``. That we do

+ not return the empty dict is intentional. All of the circumstances

+ which would lead to this method returning ``None`` indicate

+ underlying bugs in code elsewhere. (E.g., creating instances of

+ abstract classes, implementing a subclass which is not suppported by

+ ``CrashConfig``, etc.) Because we return ``None``, any subsequent

+ calls to ``__getitem__`` or ``get`` will raise method-missing errors;

+ which serve to highlight the underlying bug. Whereas, if we silently

+ returned the empty dict, then calls to ``get`` would "work" just

+ fine; thereby masking the underlying bug!

+ """

+ return CrashConfig.Get().GetClientConfig(self.client_id)

+ # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or

+ # something like that.

+ def RenamePlatform(self, platform):

+ """Remap the platform to a different one, based on the config."""

+ # TODO(katesonia): Remove the default value after adding validity check to

+ # config.

+ return self.config.get('platform_rename', {}).get(platform, platform)

+ def CheckPolicy(self, crash_data): # pylint: disable=W0613

+ """Check whether this client supports analyzing the given report.

+ Some clients only support analysis for crashes on certain platforms

+ or channels, etc. This method checks to see whether this client can

+ analyze the given crash. The default implementation on the Findit

+ base class returns None for everything, so that unsupported clients

+ reject everything, as expected.

+ Args:

+ crash_data (JSON): ??

+ Returns:

+ If satisfied, we return the |crash_data| which may have had some

+ fields modified. Otherwise returns None.

+ """

+ return None

+ # TODO(http://crbug.com/644476): rename this to something like

+ # _NewAnalysis, since it only does the "allocation" and needs to/will

+ # be followed up with _InitializeAnalysis anyways.

+ def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613

+ return None

+ def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613

+ """Return the CrashAnalysis for the |crash_identifiers|, if one exists.

+ Args:

+ crash_identifiers (JSON): ??

+ Returns:

+ If a CrashAnalysis ndb.Model already exists for the

+ |crash_identifiers|, then we return it. Otherwise, returns None.

+ """

+ return None

+ # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.

+ # TODO(http://crbug.com/659346): coverage tests for this class, not

+ # just for FinditForFracas.

+ def _InitializeAnalysis(self, model, crash_data): # pragma: no cover

+ """(Re)Initialize a CrashAnalysis ndb.Model, but do not |put()| it yet.

+ This method is only ever called from _NeedsNewAnalysis which is only

+ ever called from ScheduleNewAnalysis. It is used for filling in the

+ fields of a CrashAnalysis ndb.Model for the first time (though it

+ can also be used to re-initialize a given CrashAnalysis). Subclasses

+ should extend (not override) this to (re)initialize any

+ client-specific fields they may have."""

+ # Get rid of any previous values there may have been.

+ model.Reset()

+ # Set the version.

+ # |handlers.crash.test.crash_handler_test.testAnalysisScheduled|

+ # provides and expects this field to be called 'chrome_version',

+ # whereas everyone else (e.g., in |crash.test.crash_pipeline_test|

+ # the tests |testAnalysisNeededIfNoAnalysisYet|,

+ # |testRunningAnalysisNoSuspectsFound|, |testRunningAnalysis|,

+ # |testAnalysisNeededIfLastOneFailed|, |testRunningAnalysisWithSuspectsCls|)

+ # expects it to be called 'crashed_version'. The latter is the

+ # better/more general name, so the former needs to be changed in

+ # order to get rid of this defaulting ugliness.

+ model.crashed_version = crash_data.get('crashed_version',

+ crash_data.get('chrome_version', None))

+ # Set (other) common properties.

+ model.stack_trace = crash_data['stack_trace']

+ model.signature = crash_data['signature']

+ model.platform = crash_data['platform']

+ # TODO(wrengr): The only reason to have _InitializeAnalysis as a

+ # method of the Findit class rather than as a method on CrashAnalysis

+ # is so we can assert that crash_data['client_id'] == self.client_id.

+ # So, either we should do that, or else we should move this to be

+ # a method on CrashAnalysis.

+ model.client_id = self.client_id

+ # Set progress properties.

+ model.status = analysis_status.PENDING

+ model.requested_time = time_util.GetUTCNow()

+ @ndb.transactional

+ def _NeedsNewAnalysis(self, crash_data):

+ raise NotImplementedError()

+ # TODO(http://crbug.com/659345): try to break the cycle re pipeline_cls.

+ # TODO(http://crbug.com/659346): we don't cover anything after the

+ # call to _NeedsNewAnalysis.

+ def ScheduleNewAnalysis(self, crash_data,

+ queue_name=constants.DEFAULT_QUEUE): # pragma: no cover

+ """Create a pipeline object to perform the analysis, and start it.

+ If we can detect that the analysis doesn't need to be performed

+ (e.g., it was already performed, or the |crash_data| is empty so

+ there's nothig we can do), then we will skip creating the pipeline

+ at all.

+ Args:

+ crash_data (JSON): ??

+ queue_name (??): the name of the AppEngine queue we should start

+ the pipeline on.

+ Returns:

+ True if we started the pipeline; False otherwise.

+ """

+ # Check policy and tune arguments if needed.

+ crash_data = self.CheckPolicy(crash_data)

+ if crash_data is None:

+ return False

+ # Detect the regression range, and decide if we actually need to

+ # run a new anlaysis or not.

+ if not self._NeedsNewAnalysis(crash_data):

+ return False

+ crash_identifiers = crash_data['crash_identifiers']

+ # N.B., we cannot pass |self| directly to the _pipeline_cls, because

+ # it is not JSON-serializable (and there's no way to make it such,

+ # since JSON-serializability is defined by JSON-encoders rather than

+ # as methods on the objects being encoded).

+ analysis_pipeline = self._pipeline_cls(self.client_id, crash_identifiers)

+ # Attribute defined outside __init__ - pylint: disable=W0201

+ analysis_pipeline.target = appengine_util.GetTargetNameForModule(

+ constants.CRASH_BACKEND[self.client_id])

+ analysis_pipeline.start(queue_name=queue_name)

+ logging.info('New %s analysis is scheduled for %s', self.client_id,

+ repr(crash_identifiers))

+ return True

+ # TODO(wrengr): does the parser actually need the version, signature,

+ # and platform? If not, then we should be able to just pass the string

+ # to be parsed (which would make a lot more sense than passing the

+ # whole model).

+ # TODO(http://crbug.com/659346): coverage tests for this class, not

+ # just for FinditForFracas.

+ def ParseStacktrace(self, model): # pragma: no cover

+ """Parse a CrashAnalysis's |stack_trace| string into a Stacktrace object.

+ Args:

+ model (CrashAnalysis): The model containing the stack_trace string

+ to be parsed.

+ Returns:

+ On success, returns a Stacktrace object; on failure, returns None.

+ """

+ stacktrace = self._stacktrace_parser.Parse(

+ model.stack_trace,

+ chrome_dependency_fetcher.ChromeDependencyFetcher(

+ self._repository

+ ).GetDependency(

+ model.crashed_version,

+ model.platform),

+ model.signature)

+ if not stacktrace:

+ logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

+ return None

+ return stacktrace

+ # TODO(wrengr): This is only called by |CrashAnalysisPipeline.run|;

+ # we should be able to adjust things so that we only need to take in

+ # |crash_identifiers|, or a CrashReport, rather than taking in the

+ # whole model. And/or, we should just inline this there.

+ # TODO(http://crbug.com/659346): coverage tests for this class, not

+ # just for FinditForFracas.

+ def FindCulprit(self, model): # pragma: no cover

+ """Given a CrashAnalysis ndb.Model, return a Culprit."""

+ stacktrace = self.ParseStacktrace(model)

+ if stacktrace is None:

+ # TODO(http://crbug.com/659359): refactor things so we don't need

+ # the NullCulprit class.

+ return NullCulprit()

+ return self._azalea.FindCulprit(CrashReport(

+ crashed_version = model.crashed_version,

+ signature = model.signature,

+ platform = model.platform,

+ stacktrace = stacktrace,

+ regression_range = model.regression_range))

« no previous file with comments | « appengine/findit/crash/culprit.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »