Chromium Code Reviews| Index: appengine/findit/crash/crash_pipeline.py |
| diff --git a/appengine/findit/crash/crash_pipeline.py b/appengine/findit/crash/crash_pipeline.py |
| index 6aa184692e2b6434388bc6de29f846a1b66963d1..ce39fb073dec210b9c3c1ea37477bc45cc334c1c 100644 |
| --- a/appengine/findit/crash/crash_pipeline.py |
| +++ b/appengine/findit/crash/crash_pipeline.py |
| @@ -6,7 +6,6 @@ import copy |
| import json |
| import logging |
| -from google.appengine.api import app_identity |
| from google.appengine.ext import ndb |
| from common import appengine_util |
| @@ -15,39 +14,97 @@ from common import pubsub_util |
| from common import time_util |
| from common.pipeline_wrapper import BasePipeline |
| from common.pipeline_wrapper import pipeline |
| -from crash import findit_for_client |
| +from crash import findit |
| +from crash import findit_for_chromecrash |
| +from crash.type_enums import CrashClient |
| +from crash.detect_regression_range import DetectRegressionRange |
| from model import analysis_status |
| from model.crash.crash_config import CrashConfig |
| - |
| +# TODO(katesonia): Move this to fracas config. |
| +_FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE = '%s/crash/fracas-result-feedback?key=%s' |
| + |
| +# TODO(wrengr): rename this. Should be "Pub" or "Publish*ed*" at the very least. |
| +# TODO(wrengr): this should prolly be a method on CrashAnalysis rather |
|
Sharu Jiang
2016/10/12 22:35:03
I prefer to put this in CrashAnalysis.
wrengr
2016/10/18 23:13:53
Done.
|
| +# than a stand-alone function. Or, we could have the CrashAnalysis be part |
| +# of the Findit class directly, in order to get rid of this last remaining |
| +# case switching on the |client_id|. With all the FooAnalysis methods, it |
| +# basically already is a part of that class, just not done cleanly is all. |
| +def GetPublishResultFromAnalysis(model, crash_identifiers): |
| + """Convert the datastore analysis into a publishable result. |
| + |
| + Args: |
| + model (CrashAnalysis): the datastore to be converted. |
| + crash_identifiers (??): ?? |
| + |
| + Returns: |
| + A dict of ?? |
| + """ |
| + model_result = copy.deepcopy(model.result) |
| + client_id = model.client_id |
| + |
| + if (client_id == CrashClient.FRACAS or |
| + client_id == CrashClient.CRACAS): |
| + model_result['feedback_url'] = _FINDIT_FRACAS_FEEDBACK_URL_TEMPLATE % ( |
| + appengine_util.GetDefaultVersionHostname(), model.key.urlsafe()) |
| + if model_result['found']: |
| + for cl in model_result['suspected_cls']: |
| + cl['confidence'] = round(cl['confidence'], 2) |
| + cl.pop('reason', None) |
| + elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover. |
| + # TODO(katesonia): Post process clusterfuzz model result if needed. |
| + pass |
| + |
| + return { |
| + 'crash_identifiers': crash_identifiers, |
| + 'client_id': client_id, |
| + 'result': model_result, |
| + } |
| + |
| + |
| +# TODO(wrengr): we'd like to move this to findit.py (or somewhere similar) |
| +# so it's encapsulated along with the Findit classes; but to do that we'd |
| +# need to merge findit.py and findit_for_chromecrash.py to avoid cyclic |
| +# dependencies. Is there a clean way to do what we want? |
| +def FinditForClientID(client_id): |
| + if client_id == CrashClient.FRACAS: |
| + return findit_for_chromecrash.FinditForFracas() |
| + elif client_id == CrashClient.CRACAS: |
| + return findit_for_chromecrash.FinditForCracas() |
| + elif client_id == CrashClient.CLUSTERFUZZ: |
| + return findit.FinditForClusterfuzz() |
|
Sharu Jiang
2016/10/12 22:35:03
We'd better create a findit_for_clusterfuzz for cl
wrengr
2016/10/18 23:13:54
Done.
|
| + else: |
| + logging.info('Client %s is not supported by findit right now', client_id) |
| + raise ValueError() |
| + |
| + |
| +# TODO(wrengr): make this a member of Findit (or, perhaps, merge the |
| +# two classes), so we don't need to pass the |client_id| around manually. |
| class CrashBasePipeline(BasePipeline): |
| def __init__(self, crash_identifiers, client_id): |
| super(CrashBasePipeline, self).__init__(crash_identifiers, client_id) |
| self.crash_identifiers = crash_identifiers |
| self.client_id = client_id |
| + self.findit = FinditForClientID(client_id) |
| + # TODO(wrengr): why not just use __call__? Or, if giving it this name, |
| + # why not specify the right number of arguments so that we don't need |
| + # to disable pylint? |
|
stgao
2016/10/12 15:46:58
Unfornately, we can't do __call__, because this is
wrengr
2016/10/12 21:14:51
Acknowledged.
|
| def run(self, *args, **kwargs): |
| raise NotImplementedError() |
| class CrashAnalysisPipeline(CrashBasePipeline): |
| - def _SetErrorIfAborted(self, aborted): |
| - if not aborted: |
| - return |
| - |
| - logging.error('Aborted analysis for %s', repr(self.crash_identifiers)) |
| - analysis = findit_for_client.GetAnalysisForClient(self.crash_identifiers, |
| - self.client_id) |
| - analysis.status = analysis_status.ERROR |
| - analysis.put() |
| - |
| def finalized(self): |
| - self._SetErrorIfAborted(self.was_aborted) |
| + if self.was_aborted: |
| + logging.error('Aborted analysis for %s', repr(self.crash_identifiers)) |
|
stgao
2016/10/12 15:46:58
For testing purpose, we need to factor this out as
wrengr
2016/10/12 21:14:51
I've been told elsewhere that the main code should
stgao
2016/10/13 06:03:16
Yep, I agreed with this in general.
wrengr
2016/10/18 23:13:54
Done.
|
| + analysis = self.findit.GetAnalysis(self.crash_identifiers) |
| + analysis.status = analysis_status.ERROR |
| + analysis.put() |
| # Arguments number differs from overridden method - pylint: disable=W0221 |
| - def run(self, crash_identifiers, client_id): |
| - analysis = findit_for_client.GetAnalysisForClient(crash_identifiers, |
| - client_id) |
| + def run(self, crash_identifiers): |
| + analysis = self.findit.GetAnalysis(crash_identifiers) |
| # Update analysis status. |
| analysis.pipeline_status_path = self.pipeline_status_path() |
| @@ -57,7 +114,7 @@ class CrashAnalysisPipeline(CrashBasePipeline): |
| analysis.put() |
| # Run the analysis. |
| - result, tags = findit_for_client.FindCulprit(analysis) |
| + result, tags = self.findit.FindCulprit(analysis) |
| # Update analysis status and save the analysis result. |
| analysis.completed_time = time_util.GetUTCNow() |
| @@ -76,40 +133,34 @@ class PublishResultPipeline(CrashBasePipeline): |
| logging.error('Failed to publish %s analysis result for %s', |
| repr(self.crash_identifiers), self.client_id) |
| - |
| # Arguments number differs from overridden method - pylint: disable=W0221 |
| - def run(self, crash_identifiers, client_id): |
| - analysis = findit_for_client.GetAnalysisForClient(crash_identifiers, |
| - client_id) |
| - result = findit_for_client.GetPublishResultFromAnalysis(analysis, |
| - crash_identifiers, |
| - client_id) |
| + def run(self, crash_identifiers): |
| + analysis = self.findit.GetAnalysis(crash_identifiers) |
| + result = GetPublishResultFromAnalysis(analysis, crash_identifiers) |
| messages_data = [json.dumps(result, sort_keys=True)] |
| - client_config = CrashConfig.Get().GetClientConfig(client_id) |
| + client_config = CrashConfig.Get().GetClientConfig(self.client_id) |
| # TODO(katesonia): Clean string uses in config. |
| topic = client_config['analysis_result_pubsub_topic'] |
| pubsub_util.PublishMessagesToTopic(messages_data, topic) |
| - logging.info('Published %s analysis result for %s', client_id, |
| + logging.info('Published %s analysis result for %s', self.client_id, |
| repr(crash_identifiers)) |
| class CrashWrapperPipeline(BasePipeline): |
| # Arguments number differs from overridden method - pylint: disable=W0221 |
| - def run(self, crash_identifiers, client_id): |
| - run_analysis = yield CrashAnalysisPipeline(crash_identifiers, client_id) |
| + def run(self, crash_identifiers): |
| + run_analysis = yield CrashAnalysisPipeline(crash_identifiers, self.client_id) |
| with pipeline.After(run_analysis): |
| - yield PublishResultPipeline(crash_identifiers, client_id) |
| + yield PublishResultPipeline(crash_identifiers, self.client_id) |
| +# TODO(wrengr): this should surely be a method on Findit, rather than |
| +# a standalone function. |
| @ndb.transactional |
| -def _NeedsNewAnalysis( |
| - crash_identifiers, chrome_version, signature, client_id, |
| - platform, stack_trace, customized_data): |
| - analysis = findit_for_client.GetAnalysisForClient(crash_identifiers, |
| - client_id) |
| - regression_range = findit_for_client.GetRegressionRange(client_id, |
| - customized_data) |
| +def _NeedsNewAnalysis(findit_client, crash_identifiers, report, historical_metadata=None): |
|
Sharu Jiang
2016/10/12 22:35:03
instead of passing in historical_metadata, we shou
wrengr
2016/10/18 23:13:54
I can add another optional argument for regression
|
| + analysis = findit_client.GetAnalysis(crash_identifiers) |
| + regression_range = DetectRegressionRange(historical_metadata) |
| if (analysis and not analysis.failed and |
| regression_range == analysis.regression_range): |
| logging.info('The analysis of %s has already been done.', |
| @@ -118,37 +169,36 @@ def _NeedsNewAnalysis( |
| # Create analysis for findit to run if this is not a rerun. |
| if not analysis: |
| - analysis = findit_for_client.CreateAnalysisForClient(crash_identifiers, |
| - client_id) |
| + analysis = findit_client.CreateAnalysis(crash_identifiers) |
| - findit_for_client.ResetAnalysis(analysis, chrome_version, signature, |
| - client_id, platform, stack_trace, |
| - customized_data, regression_range) |
| + findit_client.UpdateAnalysis(analysis, report) |
| return True |
| -def ScheduleNewAnalysisForCrash( |
| - crash_identifiers, chrome_version, signature, client_id, |
| - platform, stack_trace, customized_data, |
| - queue_name=constants.DEFAULT_QUEUE): |
| +# TODO(wrengr): this should surely be a method on Findit, rather than |
| +# a standalone function. To see whether we can actually do so, |
| +# note that this method is only called by crash.crash_pipeline and handlers.crash.crash_handler |
| +def ScheduleNewAnalysisForCrash(findit_client, crash_identifiers, report, |
|
Sharu Jiang
2016/10/12 22:35:03
?
In crash_handler, this function is called as bel
wrengr
2016/10/18 23:13:54
Whoops, I missed that callsite when reordering/reo
|
| + channel, queue_name=constants.DEFAULT_QUEUE): |
| """Schedules an analysis.""" |
| - # Check policy and tune arguments if needed. |
| - pass_policy, updated_analysis_args = findit_for_client.CheckPolicyForClient( |
| - crash_identifiers, chrome_version, signature, |
| - client_id, platform, stack_trace, |
| - customized_data) |
| + if not isinstance(findit_client, findit.Findit): |
| + raise TypeError('In the first argument to ScheduleNewAnalysisForCrash, ' |
| + 'expected Findit object, but got %s object instead.' % |
| + findit_client.__class__.__name__) |
| - if not pass_policy: |
| + # Check policy and tune arguments if needed. |
| + updated_report = findit_client.CheckPolicy(crash_identifiers, report, channel) |
| + if not updated_report: |
| return False |
| - if _NeedsNewAnalysis(*updated_analysis_args): |
| - analysis_pipeline = CrashWrapperPipeline(crash_identifiers, client_id) |
| - # Attribute defined outside __init__ - pylint: disable=W0201 |
| - analysis_pipeline.target = appengine_util.GetTargetNameForModule( |
| - constants.CRASH_BACKEND[client_id]) |
| - analysis_pipeline.start(queue_name=queue_name) |
| - logging.info('New %s analysis is scheduled for %s', client_id, |
| - repr(crash_identifiers)) |
| - return True |
| + if not _NeedsNewAnalysis(findit_client, crash_identifiers, updated_report): |
| + return False |
| - return False |
| + analysis_pipeline = CrashWrapperPipeline(crash_identifiers, findit_client.client_id) |
|
Sharu Jiang
2016/10/12 22:35:03
Assume we already created findit_client here, why
wrengr
2016/10/18 23:13:53
The general goal is to avoid passing the client_id
|
| + # Attribute defined outside __init__ - pylint: disable=W0201 |
| + analysis_pipeline.target = appengine_util.GetTargetNameForModule( |
| + constants.CRASH_BACKEND[findit_client.client_id]) |
| + analysis_pipeline.start(queue_name=queue_name) |
| + logging.info('New %s analysis is scheduled for %s', findit_client.client_id, |
| + repr(crash_identifiers)) |
| + return True |