Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(313)

Unified Diff: appengine/findit/crash/crash_pipeline.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: trying to fix some tests Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/crash/crash_pipeline.py
diff --git a/appengine/findit/crash/crash_pipeline.py b/appengine/findit/crash/crash_pipeline.py
index 6aa184692e2b6434388bc6de29f846a1b66963d1..ce39fb073dec210b9c3c1ea37477bc45cc334c1c 100644
--- a/appengine/findit/crash/crash_pipeline.py
+++ b/appengine/findit/crash/crash_pipeline.py
@@ -6,7 +6,6 @@ import copy
import json
import logging
-from google.appengine.api import app_identity
from google.appengine.ext import ndb
from common import appengine_util
@@ -15,39 +14,97 @@ from common import pubsub_util
from common import time_util
from common.pipeline_wrapper import BasePipeline
from common.pipeline_wrapper import pipeline
-from crash import findit_for_client
+from crash import findit
+from crash import findit_for_chromecrash
+from crash.type_enums import CrashClient
+from crash.detect_regression_range import DetectRegressionRange
from model import analysis_status
from model.crash.crash_config import CrashConfig
-
+# TODO(katesonia): Move this to fracas config.
+_FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE = '%s/crash/fracas-result-feedback?key=%s'
+
+# TODO(wrengr): rename this. Should be "Pub" or "Publish*ed*" at the very least.
+# TODO(wrengr): this should prolly be a method on CrashAnalysis rather
Sharu Jiang 2016/10/12 22:35:03 I prefer to put this in CrashAnalysis.
wrengr 2016/10/18 23:13:53 Done.
+# than a stand-alone function. Or, we could have the CrashAnalysis be part
+# of the Findit class directly, in order to get rid of this last remaining
+# case switching on the |client_id|. With all the FooAnalysis methods, it
+# basically already is a part of that class, just not done cleanly is all.
+def GetPublishResultFromAnalysis(model, crash_identifiers):
+ """Convert the datastore analysis into a publishable result.
+
+ Args:
+ model (CrashAnalysis): the datastore to be converted.
+ crash_identifiers (??): ??
+
+ Returns:
+ A dict of ??
+ """
+ model_result = copy.deepcopy(model.result)
+ client_id = model.client_id
+
+ if (client_id == CrashClient.FRACAS or
+ client_id == CrashClient.CRACAS):
+ model_result['feedback_url'] = _FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE % (
+ appengine_util.GetDefaultVersionHostname(), model.key.urlsafe())
+ if model_result['found']:
+ for cl in model_result['suspected_cls']:
+ cl['confidence'] = round(cl['confidence'], 2)
+ cl.pop('reason', None)
+ elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover.
+ # TODO(katesonia): Post process clusterfuzz model result if needed.
+ pass
+
+ return {
+ 'crash_identifiers': crash_identifiers,
+ 'client_id': client_id,
+ 'result': model_result,
+ }
+
+
+# TODO(wrengr): we'd like to move this to findit.py (or somewhere similar)
+# so it's encapsulated along with the Findit classes; but to do that we'd
+# need to merge findit.py and findit_for_chromecrash.py to avoid cyclic
+# dependencies. Is there a clean way to do what we want?
+def FinditForClientID(client_id):
+ if client_id == CrashClient.FRACAS:
+ return findit_for_chromecrash.FinditForFracas()
+ elif client_id == CrashClient.CRACAS:
+ return findit_for_chromecrash.FinditForCracas()
+ elif client_id == CrashClient.CLUSTERFUZZ:
+ return findit.FinditForClusterfuzz()
Sharu Jiang 2016/10/12 22:35:03 We'd better create a findit_for_clusterfuzz for cl
wrengr 2016/10/18 23:13:54 Done.
+ else:
+ logging.info('Client %s is not supported by findit right now', client_id)
+ raise ValueError()
+
+
+# TODO(wrengr): make this a member of Findit (or, perhaps, merge the
+# two classes), so we don't need to pass the |client_id| around manually.
class CrashBasePipeline(BasePipeline):
def __init__(self, crash_identifiers, client_id):
super(CrashBasePipeline, self).__init__(crash_identifiers, client_id)
self.crash_identifiers = crash_identifiers
self.client_id = client_id
+ self.findit = FinditForClientID(client_id)
+ # TODO(wrengr): why not just use __call__? Or, if giving it this name,
+ # why not specify the right number of arguments so that we don't need
+ # to disable pylint?
stgao 2016/10/12 15:46:58 Unfornately, we can't do __call__, because this is
wrengr 2016/10/12 21:14:51 Acknowledged.
def run(self, *args, **kwargs):
raise NotImplementedError()
class CrashAnalysisPipeline(CrashBasePipeline):
- def _SetErrorIfAborted(self, aborted):
- if not aborted:
- return
-
- logging.error('Aborted analysis for %s', repr(self.crash_identifiers))
- analysis = findit_for_client.GetAnalysisForClient(self.crash_identifiers,
- self.client_id)
- analysis.status = analysis_status.ERROR
- analysis.put()
-
def finalized(self):
- self._SetErrorIfAborted(self.was_aborted)
+ if self.was_aborted:
+ logging.error('Aborted analysis for %s', repr(self.crash_identifiers))
stgao 2016/10/12 15:46:58 For testing purpose, we need to factor this out as
wrengr 2016/10/12 21:14:51 I've been told elsewhere that the main code should
stgao 2016/10/13 06:03:16 Yep, I agreed with this in general.
wrengr 2016/10/18 23:13:54 Done.
+ analysis = self.findit.GetAnalysis(self.crash_identifiers)
+ analysis.status = analysis_status.ERROR
+ analysis.put()
# Arguments number differs from overridden method - pylint: disable=W0221
- def run(self, crash_identifiers, client_id):
- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,
- client_id)
+ def run(self, crash_identifiers):
+ analysis = self.findit.GetAnalysis(crash_identifiers)
# Update analysis status.
analysis.pipeline_status_path = self.pipeline_status_path()
@@ -57,7 +114,7 @@ class CrashAnalysisPipeline(CrashBasePipeline):
analysis.put()
# Run the analysis.
- result, tags = findit_for_client.FindCulprit(analysis)
+ result, tags = self.findit.FindCulprit(analysis)
# Update analysis status and save the analysis result.
analysis.completed_time = time_util.GetUTCNow()
@@ -76,40 +133,34 @@ class PublishResultPipeline(CrashBasePipeline):
logging.error('Failed to publish %s analysis result for %s',
repr(self.crash_identifiers), self.client_id)
-
# Arguments number differs from overridden method - pylint: disable=W0221
- def run(self, crash_identifiers, client_id):
- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,
- client_id)
- result = findit_for_client.GetPublishResultFromAnalysis(analysis,
- crash_identifiers,
- client_id)
+ def run(self, crash_identifiers):
+ analysis = self.findit.GetAnalysis(crash_identifiers)
+ result = GetPublishResultFromAnalysis(analysis, crash_identifiers)
messages_data = [json.dumps(result, sort_keys=True)]
- client_config = CrashConfig.Get().GetClientConfig(client_id)
+ client_config = CrashConfig.Get().GetClientConfig(self.client_id)
# TODO(katesonia): Clean string uses in config.
topic = client_config['analysis_result_pubsub_topic']
pubsub_util.PublishMessagesToTopic(messages_data, topic)
- logging.info('Published %s analysis result for %s', client_id,
+ logging.info('Published %s analysis result for %s', self.client_id,
repr(crash_identifiers))
class CrashWrapperPipeline(BasePipeline):
# Arguments number differs from overridden method - pylint: disable=W0221
- def run(self, crash_identifiers, client_id):
- run_analysis = yield CrashAnalysisPipeline(crash_identifiers, client_id)
+ def run(self, crash_identifiers):
+ run_analysis = yield CrashAnalysisPipeline(crash_identifiers, self.client_id)
with pipeline.After(run_analysis):
- yield PublishResultPipeline(crash_identifiers, client_id)
+ yield PublishResultPipeline(crash_identifiers, self.client_id)
+# TODO(wrengr): this should surely be a method on Findit, rather than
+# a standalone function.
@ndb.transactional
-def _NeedsNewAnalysis(
- crash_identifiers, chrome_version, signature, client_id,
- platform, stack_trace, customized_data):
- analysis = findit_for_client.GetAnalysisForClient(crash_identifiers,
- client_id)
- regression_range = findit_for_client.GetRegressionRange(client_id,
- customized_data)
+def _NeedsNewAnalysis(findit_client, crash_identifiers, report, historical_metadata=None):
Sharu Jiang 2016/10/12 22:35:03 instead of passing in historical_metadata, we shou
wrengr 2016/10/18 23:13:54 I can add another optional argument for regression
+ analysis = findit_client.GetAnalysis(crash_identifiers)
+ regression_range = DetectRegressionRange(historical_metadata)
if (analysis and not analysis.failed and
regression_range == analysis.regression_range):
logging.info('The analysis of %s has already been done.',
@@ -118,37 +169,36 @@ def _NeedsNewAnalysis(
# Create analysis for findit to run if this is not a rerun.
if not analysis:
- analysis = findit_for_client.CreateAnalysisForClient(crash_identifiers,
- client_id)
+ analysis = findit_client.CreateAnalysis(crash_identifiers)
- findit_for_client.ResetAnalysis(analysis, chrome_version, signature,
- client_id, platform, stack_trace,
- customized_data, regression_range)
+ findit_client.UpdateAnalysis(analysis, report)
return True
-def ScheduleNewAnalysisForCrash(
- crash_identifiers, chrome_version, signature, client_id,
- platform, stack_trace, customized_data,
- queue_name=constants.DEFAULT_QUEUE):
+# TODO(wrengr): this should surely be a method on Findit, rather than
+# a standalone function. To see whether we can actually do so,
+# note that this method is only called by crash.crash_pipeline and handlers.crash.crash_handler
+def ScheduleNewAnalysisForCrash(findit_client, crash_identifiers, report,
Sharu Jiang 2016/10/12 22:35:03 ? In crash_handler, this function is called as bel
wrengr 2016/10/18 23:13:54 Whoops, I missed that callsite when reordering/reo
+ channel, queue_name=constants.DEFAULT_QUEUE):
"""Schedules an analysis."""
- # Check policy and tune arguments if needed.
- pass_policy, updated_analysis_args = findit_for_client.CheckPolicyForClient(
- crash_identifiers, chrome_version, signature,
- client_id, platform, stack_trace,
- customized_data)
+ if not isinstance(findit_client, findit.Findit):
+ raise TypeError('In the first argument to ScheduleNewAnalysisForCrash, '
+ 'expected Findit object, but got %s object instead.' %
+ findit_client.__class__.__name__)
- if not pass_policy:
+ # Check policy and tune arguments if needed.
+ updated_report = findit_client.CheckPolicy(crash_identifiers, report, channel)
+ if not updated_report:
return False
- if _NeedsNewAnalysis(*updated_analysis_args):
- analysis_pipeline = CrashWrapperPipeline(crash_identifiers, client_id)
- # Attribute defined outside __init__ - pylint: disable=W0201
- analysis_pipeline.target = appengine_util.GetTargetNameForModule(
- constants.CRASH_BACKEND[client_id])
- analysis_pipeline.start(queue_name=queue_name)
- logging.info('New %s analysis is scheduled for %s', client_id,
- repr(crash_identifiers))
- return True
+ if not _NeedsNewAnalysis(findit_client, crash_identifiers, updated_report):
+ return False
- return False
+ analysis_pipeline = CrashWrapperPipeline(crash_identifiers, findit_client.client_id)
Sharu Jiang 2016/10/12 22:35:03 Assume we already created findit_client here, why
wrengr 2016/10/18 23:13:53 The general goal is to avoid passing the client_id
+ # Attribute defined outside __init__ - pylint: disable=W0201
+ analysis_pipeline.target = appengine_util.GetTargetNameForModule(
+ constants.CRASH_BACKEND[findit_client.client_id])
+ analysis_pipeline.start(queue_name=queue_name)
+ logging.info('New %s analysis is scheduled for %s', findit_client.client_id,
+ repr(crash_identifiers))
+ return True

Powered by Google App Engine
This is Rietveld 408576698