appengine/findit/crash/crash_pipeline.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Unified Diff: appengine/findit/crash/crash_pipeline.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: trying to fix some tests Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« appengine/findit/crash/azalea.py ('K') | « appengine/findit/crash/component_classifier.py ('k') | appengine/findit/crash/crash_report.py » ('j') | appengine/findit/crash/crash_report.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/crash/crash_pipeline.py

diff --git a/appengine/findit/crash/crash_pipeline.py b/appengine/findit/crash/crash_pipeline.py

index 6aa184692e2b6434388bc6de29f846a1b66963d1..ce39fb073dec210b9c3c1ea37477bc45cc334c1c 100644

--- a/appengine/findit/crash/crash_pipeline.py

+++ b/appengine/findit/crash/crash_pipeline.py

@@ -6,7 +6,6 @@ import copy

import json

import logging

-from google.appengine.api import app_identity

from google.appengine.ext import ndb

from common import appengine_util

@@ -15,39 +14,97 @@ from common import pubsub_util

from common import time_util

from common.pipeline_wrapper import BasePipeline

from common.pipeline_wrapper import pipeline

-from crash import findit_for_client

+from crash import findit

+from crash import findit_for_chromecrash

+from crash.type_enums import CrashClient

+from crash.detect_regression_range import DetectRegressionRange

from model import analysis_status

from model.crash.crash_config import CrashConfig

+# TODO(katesonia): Move this to fracas config.

+_FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE = '%s/crash/fracas-result-feedback?key=%s'

+# TODO(wrengr): rename this. Should be "Pub" or "Publish*ed*" at the very least.

+# TODO(wrengr): this should prolly be a method on CrashAnalysis rather

Sharu Jiang 2016/10/12 22:35:03 I prefer to put this in CrashAnalysis.

wrengr 2016/10/18 23:13:53 Done.

+# than a stand-alone function. Or, we could have the CrashAnalysis be part

+# of the Findit class directly, in order to get rid of this last remaining

+# case switching on the |client_id|. With all the FooAnalysis methods, it

+# basically already is a part of that class, just not done cleanly is all.

+def GetPublishResultFromAnalysis(model, crash_identifiers):

+ """Convert the datastore analysis into a publishable result.

+ Args:

+ model (CrashAnalysis): the datastore to be converted.

+ crash_identifiers (??): ??

+ Returns:

+ A dict of ??

+ """

+ model_result = copy.deepcopy(model.result)

+ client_id = model.client_id

+ if (client_id == CrashClient.FRACAS or

+ client_id == CrashClient.CRACAS):

+ model_result['feedback_url'] = _FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE % (

+ appengine_util.GetDefaultVersionHostname(), model.key.urlsafe())

+ if model_result['found']:

+ for cl in model_result['suspected_cls']:

+ cl['confidence'] = round(cl['confidence'], 2)

+ cl.pop('reason', None)

+ elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover.

+ # TODO(katesonia): Post process clusterfuzz model result if needed.

+ pass

+ return {

+ 'crash_identifiers': crash_identifiers,

+ 'client_id': client_id,

+ 'result': model_result,

+ }

+# TODO(wrengr): we'd like to move this to findit.py (or somewhere similar)

+# so it's encapsulated along with the Findit classes; but to do that we'd

+# need to merge findit.py and findit_for_chromecrash.py to avoid cyclic

+# dependencies. Is there a clean way to do what we want?

+def FinditForClientID(client_id):

+ if client_id == CrashClient.FRACAS:

+ return findit_for_chromecrash.FinditForFracas()

+ elif client_id == CrashClient.CRACAS:

+ return findit_for_chromecrash.FinditForCracas()

+ elif client_id == CrashClient.CLUSTERFUZZ:

+ return findit.FinditForClusterfuzz()

Sharu Jiang 2016/10/12 22:35:03 We'd better create a findit_for_clusterfuzz for cl

wrengr 2016/10/18 23:13:54 Done.

+ else:

+ logging.info('Client %s is not supported by findit right now', client_id)

+ raise ValueError()

+# TODO(wrengr): make this a member of Findit (or, perhaps, merge the

+# two classes), so we don't need to pass the |client_id| around manually.

class CrashBasePipeline(BasePipeline):

def __init__(self, crash_identifiers, client_id):

super(CrashBasePipeline, self).__init__(crash_identifiers, client_id)

self.crash_identifiers = crash_identifiers

self.client_id = client_id

+ self.findit = FinditForClientID(client_id)

+ # TODO(wrengr): why not just use __call__? Or, if giving it this name,

+ # why not specify the right number of arguments so that we don't need

+ # to disable pylint?

stgao 2016/10/12 15:46:58 Unfornately, we can't do __call__, because this is

wrengr 2016/10/12 21:14:51 Acknowledged.

def run(self, *args, **kwargs):

raise NotImplementedError()

class CrashAnalysisPipeline(CrashBasePipeline):

- def _SetErrorIfAborted(self, aborted):

- if not aborted:

- return

- logging.error('Aborted analysis for %s', repr(self.crash_identifiers))

- analysis = findit_for_client.GetAnalysisForClient(self.crash_identifiers,

- self.client_id)

- analysis.status = analysis_status.ERROR

- analysis.put()

def finalized(self):

- self._SetErrorIfAborted(self.was_aborted)

+ if self.was_aborted:

+ logging.error('Aborted analysis for %s', repr(self.crash_identifiers))

stgao 2016/10/12 15:46:58 For testing purpose, we need to factor this out as

wrengr 2016/10/12 21:14:51 I've been told elsewhere that the main code should

stgao 2016/10/13 06:03:16 Yep, I agreed with this in general.

wrengr 2016/10/18 23:13:54 Done.

+ analysis = self.findit.GetAnalysis(self.crash_identifiers)

+ analysis.status = analysis_status.ERROR

+ analysis.put()

# Arguments number differs from overridden method - pylint: disable=W0221

- def run(self, crash_identifiers, client_id):

- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,

- client_id)

+ def run(self, crash_identifiers):

+ analysis = self.findit.GetAnalysis(crash_identifiers)

# Update analysis status.

analysis.pipeline_status_path = self.pipeline_status_path()

@@ -57,7 +114,7 @@ class CrashAnalysisPipeline(CrashBasePipeline):

analysis.put()

# Run the analysis.

- result, tags = findit_for_client.FindCulprit(analysis)

+ result, tags = self.findit.FindCulprit(analysis)

# Update analysis status and save the analysis result.

analysis.completed_time = time_util.GetUTCNow()

@@ -76,40 +133,34 @@ class PublishResultPipeline(CrashBasePipeline):

logging.error('Failed to publish %s analysis result for %s',

repr(self.crash_identifiers), self.client_id)

# Arguments number differs from overridden method - pylint: disable=W0221

- def run(self, crash_identifiers, client_id):

- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,

- client_id)

- result = findit_for_client.GetPublishResultFromAnalysis(analysis,

- crash_identifiers,

- client_id)

+ def run(self, crash_identifiers):

+ analysis = self.findit.GetAnalysis(crash_identifiers)

+ result = GetPublishResultFromAnalysis(analysis, crash_identifiers)

messages_data = [json.dumps(result, sort_keys=True)]

- client_config = CrashConfig.Get().GetClientConfig(client_id)

+ client_config = CrashConfig.Get().GetClientConfig(self.client_id)

# TODO(katesonia): Clean string uses in config.

topic = client_config['analysis_result_pubsub_topic']

pubsub_util.PublishMessagesToTopic(messages_data, topic)

- logging.info('Published %s analysis result for %s', client_id,

+ logging.info('Published %s analysis result for %s', self.client_id,

repr(crash_identifiers))

class CrashWrapperPipeline(BasePipeline):

# Arguments number differs from overridden method - pylint: disable=W0221

- def run(self, crash_identifiers, client_id):

- run_analysis = yield CrashAnalysisPipeline(crash_identifiers, client_id)

+ def run(self, crash_identifiers):

+ run_analysis = yield CrashAnalysisPipeline(crash_identifiers, self.client_id)

with pipeline.After(run_analysis):

- yield PublishResultPipeline(crash_identifiers, client_id)

+ yield PublishResultPipeline(crash_identifiers, self.client_id)

+# TODO(wrengr): this should surely be a method on Findit, rather than

+# a standalone function.

@ndb.transactional

-def _NeedsNewAnalysis(

- crash_identifiers, chrome_version, signature, client_id,

- platform, stack_trace, customized_data):

- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,

- client_id)

- regression_range = findit_for_client.GetRegressionRange(client_id,

- customized_data)

+def _NeedsNewAnalysis(findit_client, crash_identifiers, report, historical_metadata=None):

Sharu Jiang 2016/10/12 22:35:03 instead of passing in historical_metadata, we shou

wrengr 2016/10/18 23:13:54 I can add another optional argument for regression

+ analysis = findit_client.GetAnalysis(crash_identifiers)

+ regression_range = DetectRegressionRange(historical_metadata)

if (analysis and not analysis.failed and

regression_range == analysis.regression_range):

logging.info('The analysis of %s has already been done.',

@@ -118,37 +169,36 @@ def _NeedsNewAnalysis(

# Create analysis for findit to run if this is not a rerun.

if not analysis:

- analysis = findit_for_client.CreateAnalysisForClient(crash_identifiers,

- client_id)

+ analysis = findit_client.CreateAnalysis(crash_identifiers)

- findit_for_client.ResetAnalysis(analysis, chrome_version, signature,

- client_id, platform, stack_trace,

- customized_data, regression_range)

+ findit_client.UpdateAnalysis(analysis, report)

return True

-def ScheduleNewAnalysisForCrash(

- crash_identifiers, chrome_version, signature, client_id,

- platform, stack_trace, customized_data,

- queue_name=constants.DEFAULT_QUEUE):

+# TODO(wrengr): this should surely be a method on Findit, rather than

+# a standalone function. To see whether we can actually do so,

+# note that this method is only called by crash.crash_pipeline and handlers.crash.crash_handler

+def ScheduleNewAnalysisForCrash(findit_client, crash_identifiers, report,

Sharu Jiang 2016/10/12 22:35:03 ? In crash_handler, this function is called as bel

wrengr 2016/10/18 23:13:54 Whoops, I missed that callsite when reordering/reo

+ channel, queue_name=constants.DEFAULT_QUEUE):

"""Schedules an analysis."""

- # Check policy and tune arguments if needed.

- pass_policy, updated_analysis_args = findit_for_client.CheckPolicyForClient(

- crash_identifiers, chrome_version, signature,

- client_id, platform, stack_trace,

- customized_data)

+ if not isinstance(findit_client, findit.Findit):

+ raise TypeError('In the first argument to ScheduleNewAnalysisForCrash, '

+ 'expected Findit object, but got %s object instead.' %

+ findit_client.__class__.__name__)

- if not pass_policy:

+ # Check policy and tune arguments if needed.

+ updated_report = findit_client.CheckPolicy(crash_identifiers, report, channel)

+ if not updated_report:

return False

- if _NeedsNewAnalysis(*updated_analysis_args):

- analysis_pipeline = CrashWrapperPipeline(crash_identifiers, client_id)

- # Attribute defined outside __init__ - pylint: disable=W0201

- analysis_pipeline.target = appengine_util.GetTargetNameForModule(

- constants.CRASH_BACKEND[client_id])

- analysis_pipeline.start(queue_name=queue_name)

- logging.info('New %s analysis is scheduled for %s', client_id,

- repr(crash_identifiers))

- return True

+ if not _NeedsNewAnalysis(findit_client, crash_identifiers, updated_report):

+ return False

- return False

+ analysis_pipeline = CrashWrapperPipeline(crash_identifiers, findit_client.client_id)

Sharu Jiang 2016/10/12 22:35:03 Assume we already created findit_client here, why

wrengr 2016/10/18 23:13:53 The general goal is to avoid passing the client_id

+ # Attribute defined outside __init__ - pylint: disable=W0201

+ analysis_pipeline.target = appengine_util.GetTargetNameForModule(

+ constants.CRASH_BACKEND[findit_client.client_id])

+ analysis_pipeline.start(queue_name=queue_name)

+ logging.info('New %s analysis is scheduled for %s', findit_client.client_id,

+ repr(crash_identifiers))

+ return True

« appengine/findit/crash/azalea.py ('K') | « appengine/findit/crash/component_classifier.py ('k') | appengine/findit/crash/crash_report.py » ('j') | appengine/findit/crash/crash_report.py » ('J')