appengine/findit/crash/findit_for_chromecrash.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Unified Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: trying to fix some tests Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« appengine/findit/crash/findit.py ('K') | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | appengine/findit/crash/test/crash_pipeline_test.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/crash/findit_for_chromecrash.py

diff --git a/appengine/findit/crash/findit_for_chromecrash.py b/appengine/findit/crash/findit_for_chromecrash.py

index f375bafd35fc8b5dc3a38a2baf86d9e3df3c7197..b1fae8ca9f7265905d0dc6178f45dc1da7b08cdc 100644

--- a/appengine/findit/crash/findit_for_chromecrash.py

+++ b/appengine/findit/crash/findit_for_chromecrash.py

@@ -3,215 +3,134 @@

# found in the LICENSE file.

import logging

-from collections import namedtuple

from common import chromium_deps

-from crash import detect_regression_range

-from crash import findit_for_crash

+from crash.azalea import Azalea

+from crash.changelist_classifier import ChangelistClassifier

from crash.chromecrash_parser import ChromeCrashParser

-from crash.project_classifier import ProjectClassifier

from crash.component_classifier import Component

from crash.component_classifier import ComponentClassifier

+from crash.crash_report import CrashReport

+from crash.culprit import NullCulprit

+from crash import detect_regression_range

+from crash.findit import Findit

+from crash.project_classifier import ProjectClassifier

+from crash.type_enums import CrashClient

+from model.crash.cracas_crash_analysis import CracasCrashAnalysis

from model.crash.crash_config import CrashConfig

+from model.crash.fracas_crash_analysis import FracasCrashAnalysis

# TODO(katesonia): Remove the default value after adding validity check to

# config.

_DEFAULT_TOP_N = 7

-# TODO(wrengr): move this to its own file, so it can be shared. When we do

-# so, we'll need to also pass in the 'solution' argument for the tag_dict.

-class Culprit(namedtuple('Culprit',

- ['project', 'components', 'cls', 'regression_range'])):

- # TODO(wrengr): better name for this method.

- def ToDicts(self):

- """Convert this object to a pair of anonymous dicts for JSON.

- Returns:

- (analysis_result_dict, tag_dict)

- The analysis result is a dict like below:

- {

- # Indicate if Findit found any suspects_cls, project,

- # components or regression_range.

- "found": true,

- "suspected_project": "chromium-v8", # Which project is most suspected.

- "feedback_url": "https://.."

- "suspected_cls": [

- {

- "revision": "commit-hash",

- "url": "https://chromium.googlesource.com/chromium/src/+/...",

- "review_url": "https://codereview.chromium.org/issue-number",

- "project_path": "third_party/pdfium",

- "author": "who@chromium.org",

- "time": "2015-08-17 03:38:16",

- "reason": "a plain string with '\n' as line break to expla..."

- "reason": [('MinDistance', 1, 'minimum distance is 0.'),

- ('TopFrame', 0.9, 'top frame is2nd frame.')],

- "changed_files": [

- {"file": "file_name1.cc",

- "blame_url": "https://...",

- "info": "minimum distance (LOC) 0, frame #2"},

- {"file": "file_name2.cc",

- "blame_url": "https://...",

- "info": "minimum distance (LOC) 20, frame #4"},

- ...

- ],

- "confidence": 0.60

- },

- ...,

- ],

- "regression_range": [ # Detected regression range.

- "53.0.2765.0",

- "53.0.2766.0"

- ],

- "suspected_components": [ # A list of crbug components to file bugs.

- "Blink>JavaScript"

- ]

- }

- The code review url might not always be available, because not all

- commits go through code review. In that case, commit url should

- be used instead.

- The tag dict are allowed key/value pairs to tag the analysis result

- for query and monitoring purpose on Findit side. For allowed keys,

- please refer to crash_analysis.py and fracas_crash_analysis.py:

- For results with normal culprit-finding algorithm: {

- 'found_suspects': True,

- 'has_regression_range': True,

- 'solution': 'core_algorithm',

- }

- For results using git blame without a regression range: {

- 'found_suspects': True,

- 'has_regression_range': False,

- 'solution': 'blame',

- }

- If nothing is found: {

- 'found_suspects': False,

- }

- """

- cls_list = [result.ToDict() for result in self.cls]

- # TODO(wrengr): reformulate the JSON stuff so we can drop fields which

- # are empty; so that, in turn, we can get rid of the NullCulprit class.

- return (

- {

- 'found': (bool(self.project) or

- bool(self.components) or

- bool(cls_list) or

- bool(self.regression_range)),

- 'regression_range': self.regression_range,

- 'suspected_project': self.project,

- 'suspected_components': self.components,

- 'suspected_cls': cls_list,

- },

- {

- 'found_suspects': bool(cls_list),

- 'found_project': bool(self.project),

- 'found_components': bool(self.components),

- 'has_regression_range': bool(self.regression_range),

- 'solution': 'core_algorithm',

- }

- )

-# TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit

-# can, in fact, return None; or else (b) reformulate the JSON stuff so

-# that the various fields are optional, so that we can just use

-# Culprit('', [], [], None) directly.

-class NullCulprit(object):

- """A helper class so FindCulprit doesn't return None.

- This class is analogous to Culprit(None, [], [], None), except that the

- result of the ToDicts method is more minimalistic."""

- def ToDicts(self):

- return (

- {'found': False},

- {'found_suspects': False,

- 'has_regression_range': False}

- )

-# TODO(wrengr): better name for this class. Given the "findit_for_*.py"

-# file names, one might suspect that each of those files implements

-# something analogous to this file, and hence we could make a superclass

-# to factor out the common bits. However, in truth, all those files

-# are unrelated and do completely different things.

-class FinditForChromeCrash(object):

- """Process crashes from Chrome crash server and find culprits for them.

- Even though this class has only one method, it is helpful because it

- allows us to cache things which should outlive each call to that method.

- For example, we store a single ComponentClassifier object so that we

- only compile the regexes for each Component object once, rather than

- doing so on each call to FindCulprit. In addition, the class lets

- us cache various configuration options so that we need not depend

- on CrashConfig; thereby decoupling the analysis itself from UX concerns

- about deciding how to run those analyses.

- """

+class FinditForChromeCrash(Findit):

+ """Find culprits for crash reports from the Chrome Crash server."""

# TODO(wrengr): remove the dependency on CrashConfig entirely, by

# passing the relevant data as arguments to this constructor.

def __init__(self):

+ super(FinditForChromeCrash, self).__init__()

crash_config = CrashConfig.Get()

component_classifier_config = crash_config.component_classifier

- # TODO(wrengr): why are these two different?

- component_classifier_top_n = component_classifier_config['top_n']

- self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)

- self.component_classifier = ComponentClassifier(

- [Component(component_name, path_regex, function_regex)

- for path_regex, function_regex, component_name

- in component_classifier_config['path_function_component']],

- component_classifier_top_n)

- # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.

- self.project_classifier = ProjectClassifier()

- # TODO(wrengr): since this is the only method of interest, it would

- # be better IMO to rename it to __call__ to reduce verbosity.

- def FindCulprit(self, signature, platform, stack_trace, crashed_version,

- regression_range):

- """Finds culprits for a Chrome crash.

- Args:

- signature (str): The signature of a crash on the Chrome crash server.

- platform (str): The platform name, could be 'win', 'mac', 'linux',

- 'android', 'ios', etc.

- stack_trace (str): A string containing the stack trace of a crash.

- crashed_version (str): The version of Chrome in which the crash occurred.

- regression_range (list or None): [good_version, bad_revision] or None.

- Returns:

- A Culprit object.

- """

- crash_deps = chromium_deps.GetChromeDependency(crashed_version, platform)

- stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)

+ self.chromecrash_parser = ChromeCrashParser()

Sharu Jiang 2016/10/12 22:35:03 this can be renamed to more general name like stac

+ # For how these two "top n" differ, see http://crbug.com/644476#c4

+ self.azalea = Azalea(

Sharu Jiang 2016/10/12 22:35:03 If we lift the FulCulprit to findit class, the aza

wrengr 2016/10/18 23:13:54 We can't move this creation of Azalea to the base

+ cl_classifier = ChangelistClassifier(

+ top_n_frames = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)),

+ component_classifier = ComponentClassifier(

+ [Component(component_name, path_regex, function_regex)

+ for path_regex, function_regex, component_name

+ in component_classifier_config['path_function_component']],

+ component_classifier_config['top_n']),

+ project_classifier = ProjectClassifier())

+ # TODO(wrengr): In older versions of the code, this method used to

+ # also set |channel| and |historical_metadata|. However, the values

+ # for those aren't clear from the one callsite for |UpdateAnalysis|

+ # (i.e., in crash_pipeline.py)

+ #def ResetAnalysis(self, model, report):

+ # super(FinditForChromeCrash, self).ResetAnalysis(model, report)

+ def CheckPolicy(self, crash_identifiers, report, channel):

+ # TODO(katesonia): Remove the default value after adding validity check to

+ # config.

+ if report.platform not in self.config.get(

+ 'supported_platform_list_by_channel', {}).get(channel, []):

+ # Bail out if either the channel or platform is not supported yet.

+ logging.info('Analysis of channel %s, platform %s is not supported. '

+ 'No analysis is scheduled for %s',

+ channel, report.platform, repr(crash_identifiers))

+ return None

+ # TODO(katesonia): Remove the default value after adding validity check to

+ # config.

+ for blacklist_marker in self.config.get('signature_blacklist_markers', []):

+ if blacklist_marker in report.signature:

+ logging.info('%s signature is not supported. '

+ 'No analysis is scheduled for %s', blacklist_marker,

+ repr(crash_identifiers))

+ return None

+ return report._replace(platform=self.RenamePlatform(report.platform))

+ # TODO(wrengr): can this implementation be lifted to the Findit

+ # superclass? (perhaps by abstracting out the stacktrace parsing into

Sharu Jiang 2016/10/12 17:46:29 This part can be lifted, since it can be shared by

wrengr 2016/10/12 21:14:51 Acknowledged.

+ # a separate method)

+ # TODO(wrengr): is the historical metadata stored in the model somewhere?

+ def FindCulprit(self, model, **kwargs):

+ crashed_version = model.crashed_version

+ signature = model.signature

+ platform = model.platform

+ stacktrace = self.chromecrash_parser.Parse(model.stack_trace,

+ chromium_deps.GetChromeDependency(crashed_version, platform),

+ signature)

if not stacktrace:

- logging.warning('Failed to parse the stacktrace %s', stack_trace)

+ logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

# TODO(wrengr): refactor things so we don't need the NullCulprit class.

return NullCulprit()

- # Get regression deps and crash deps.

- regression_deps_rolls = {}

- if regression_range:

- last_good_version, first_bad_version = regression_range

- logging.info('Find regression range %s:%s', last_good_version,

- first_bad_version)

- regression_deps_rolls = chromium_deps.GetDEPSRollsDict(

- last_good_version, first_bad_version, platform)

+ regression_range = None

+ historical_metadata = kwargs.get('historical_metadata', None)

Sharu Jiang 2016/10/12 17:46:29 We already got regression range before running ana

wrengr 2016/10/12 21:14:51 Is the regression_range stored in the CrashAnalysi

Sharu Jiang 2016/10/12 22:35:03 It should be stored in CrashAnalysis model before

wrengr 2016/10/18 23:13:54 Done.

+ if historical_metadata is not None:

+ # N.B., because of the way we do mock testing, we must call

+ # DetectRegressionRange indirectly through the module, and must not

+ # call it directly (causing the mock not to be used).

+ regression_range = detect_regression_range.DetectRegressionRange(historical_metadata)

+ return self.azalea.FindCulprit(CrashReport(

+ crashed_version = crashed_version,

+ signature = signature,

+ platform = platform,

+ stacktrace = stacktrace,

+ regression_range = regression_range))

+class FinditForCracas(FinditForChromeCrash):

+ @classmethod

+ def ClientID(cls):

+ return CrashClient.CRACAS

+ def CreateAnalysis(self, crash_identifiers):

+ # TODO: inline CracasCrashAnalysis.Create stuff here.

+ return CracasCrashAnalysis.Create(crash_identifiers)

+ def GetAnalysis(self, crash_identifiers):

+ # TODO: inline CracasCrashAnalysis.Get stuff here.

+ return CracasCrashAnalysis.Get(crash_identifiers)

- suspected_cls = findit_for_crash.FindItForCrash(

- stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n)

- crash_stack = stacktrace.crash_stack

- suspected_project = self.project_classifier.Classify(

- suspected_cls, crash_stack)

+class FinditForFracas(FinditForChromeCrash):

+ @classmethod

+ def ClientID(cls):

+ return CrashClient.FRACAS

- suspected_components = self.component_classifier.Classify(

- suspected_cls, crash_stack)

+ def CreateAnalysis(self, crash_identifiers):

+ # TODO: inline FracasCrashAnalysis.Create stuff here.

+ return FracasCrashAnalysis.Create(crash_identifiers)

- return Culprit(suspected_project, suspected_components, suspected_cls,

- regression_range)

+ def GetAnalysis(self, crash_identifiers):

+ # TODO: inline FracasCrashAnalysis.Get stuff here.

+ return FracasCrashAnalysis.Get(crash_identifiers)