Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(68)

Unified Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Finally fixed the mock tests! Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/findit/crash/findit_for_chromecrash.py
diff --git a/appengine/findit/crash/findit_for_chromecrash.py b/appengine/findit/crash/findit_for_chromecrash.py
index daeb7535cc42a4532a3d9cc12fe4c203a6b2652e..4fe4c12779012ff6e58649f6e9f62d2d348c9f99 100644
--- a/appengine/findit/crash/findit_for_chromecrash.py
+++ b/appengine/findit/crash/findit_for_chromecrash.py
@@ -3,225 +3,160 @@
# found in the LICENSE file.
import logging
-from collections import namedtuple
+
+from google.appengine.ext import ndb
from common import chrome_dependency_fetcher
from crash import detect_regression_range
-from crash import findit_for_crash
+from crash.azalea import Azalea
+from crash.changelist_classifier import ChangelistClassifier
from crash.chromecrash_parser import ChromeCrashParser
-from crash.project_classifier import ProjectClassifier
from crash.component_classifier import Component
from crash.component_classifier import ComponentClassifier
+from crash.crash_report import CrashReport
+from crash.culprit import NullCulprit
+from crash.findit import Findit
+from crash.project_classifier import ProjectClassifier
+from crash.type_enums import CrashClient
+from model.crash.cracas_crash_analysis import CracasCrashAnalysis
from model.crash.crash_config import CrashConfig
+from model.crash.fracas_crash_analysis import FracasCrashAnalysis
# TODO(katesonia): Remove the default value after adding validity check to
# config.
_DEFAULT_TOP_N = 7
-
-# TODO(wrengr): move this to its own file, so it can be shared. When we do
-# so, we'll need to also pass in the 'solution' argument for the tag_dict.
-class Culprit(namedtuple('Culprit',
- ['project', 'components', 'cls', 'regression_range'])):
-
- @property
- def fields(self):
- return self._fields
-
- # TODO(wrengr): better name for this method.
- def ToDicts(self):
- """Convert this object to a pair of anonymous dicts for JSON.
-
- Returns:
- (analysis_result_dict, tag_dict)
- The analysis result is a dict like below:
- {
- # Indicate if Findit found any suspects_cls, project,
- # components or regression_range.
- "found": true,
- "suspected_project": "chromium-v8", # Which project is most suspected.
- "feedback_url": "https://.."
- "suspected_cls": [
- {
- "revision": "commit-hash",
- "url": "https://chromium.googlesource.com/chromium/src/+/...",
- "review_url": "https://codereview.chromium.org/issue-number",
- "project_path": "third_party/pdfium",
- "author": "who@chromium.org",
- "time": "2015-08-17 03:38:16",
- "reason": "a plain string with '\n' as line break to expla..."
- "reason": [('MinDistance', 1, 'minimum distance is 0.'),
- ('TopFrame', 0.9, 'top frame is2nd frame.')],
- "changed_files": [
- {"file": "file_name1.cc",
- "blame_url": "https://...",
- "info": "minimum distance (LOC) 0, frame #2"},
- {"file": "file_name2.cc",
- "blame_url": "https://...",
- "info": "minimum distance (LOC) 20, frame #4"},
- ...
- ],
- "confidence": 0.60
- },
- ...,
- ],
- "regression_range": [ # Detected regression range.
- "53.0.2765.0",
- "53.0.2766.0"
- ],
- "suspected_components": [ # A list of crbug components to file bugs.
- "Blink>JavaScript"
- ]
- }
-
- The code review url might not always be available, because not all
- commits go through code review. In that case, commit url should
- be used instead.
-
- The tag dict are allowed key/value pairs to tag the analysis result
- for query and monitoring purpose on Findit side. For allowed keys,
- please refer to crash_analysis.py and fracas_crash_analysis.py:
- For results with normal culprit-finding algorithm: {
- 'found_suspects': True,
- 'has_regression_range': True,
- 'solution': 'core_algorithm',
- }
- For results using git blame without a regression range: {
- 'found_suspects': True,
- 'has_regression_range': False,
- 'solution': 'blame',
- }
- If nothing is found: {
- 'found_suspects': False,
- }
- """
- cls_list = [result.ToDict() for result in self.cls]
-
- # TODO(wrengr): reformulate the JSON stuff so we can drop fields which
- # are empty; so that, in turn, we can get rid of the NullCulprit class.
- return (
- {
- 'found': (bool(self.project) or
- bool(self.components) or
- bool(cls_list) or
- bool(self.regression_range)),
- 'regression_range': self.regression_range,
- 'suspected_project': self.project,
- 'suspected_components': self.components,
- 'suspected_cls': cls_list,
- },
- {
- 'found_suspects': bool(cls_list),
- 'found_project': bool(self.project),
- 'found_components': bool(self.components),
- 'has_regression_range': bool(self.regression_range),
- 'solution': 'core_algorithm',
- }
- )
-
-
-# TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit
-# can, in fact, return None; or else (b) reformulate the JSON stuff so
-# that the various fields are optional, so that we can just use
-# Culprit('', [], [], None) directly.
-class NullCulprit(object):
- """A helper class so FindCulprit doesn't return None.
-
- This class is analogous to Culprit(None, [], [], None), except that the
- result of the ToDicts method is more minimalistic."""
-
- def ToDicts(self):
- return (
- {'found': False},
- {'found_suspects': False,
- 'has_regression_range': False}
- )
-
-
-# TODO(wrengr): better name for this class. Given the "findit_for_*.py"
-# file names, one might suspect that each of those files implements
-# something analogous to this file, and hence we could make a superclass
-# to factor out the common bits. However, in truth, all those files
-# are unrelated and do completely different things.
-class FinditForChromeCrash(object):
- """Process crashes from Chrome crash server and find culprits for them.
-
- Even though this class has only one method, it is helpful because it
- allows us to cache things which should outlive each call to that method.
- For example, we store a single ComponentClassifier object so that we
- only compile the regexes for each Component object once, rather than
- doing so on each call to FindCulprit. In addition, the class lets
- us cache various configuration options so that we need not depend
- on CrashConfig; thereby decoupling the analysis itself from UX concerns
- about deciding how to run those analyses.
- """
- # TODO(wrengr): remove the dependency on CrashConfig entirely, by
- # passing the relevant data as arguments to this constructor.
- def __init__(self, repository):
- crash_config = CrashConfig.Get()
- component_classifier_config = crash_config.component_classifier
-
- # TODO(wrengr): why are these two different?
- component_classifier_top_n = component_classifier_config['top_n']
- self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)
-
- self.component_classifier = ComponentClassifier(
- [Component(component_name, path_regex, function_regex)
- for path_regex, function_regex, component_name
- in component_classifier_config['path_function_component']],
- component_classifier_top_n)
-
- # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.
- self.project_classifier = ProjectClassifier()
- # TODO(katesonia): Lift this to Findit superclass after refatoring cl
- # committed.
- self.repository = repository
- self.dep_fetcher = chrome_dependency_fetcher.ChromeDependencyFetcher(
- repository)
-
- # TODO(wrengr): since this is the only method of interest, it would
- # be better IMO to rename it to __call__ to reduce verbosity.
- def FindCulprit(self, signature, platform, stack_trace, crashed_version,
- regression_range):
- """Finds culprits for a Chrome crash.
-
- Args:
- signature (str): The signature of a crash on the Chrome crash server.
- platform (str): The platform name, could be 'win', 'mac', 'linux',
- 'android', 'ios', etc.
- stack_trace (str): A string containing the stack trace of a crash.
- crashed_version (str): The version of Chrome in which the crash occurred.
- regression_range (list or None): [good_version, bad_revision] or None.
-
- Returns:
- A Culprit object.
- """
- crash_deps = self.dep_fetcher.GetDependency(crashed_version, platform)
- stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)
- if not stacktrace:
- logging.warning('Failed to parse the stacktrace %s', stack_trace)
- # TODO(wrengr): refactor things so we don't need the NullCulprit class.
- return NullCulprit()
-
- # Get regression deps and crash deps.
- regression_deps_rolls = {}
- if regression_range:
- last_good_version, first_bad_version = regression_range
- logging.info('Find regression range %s:%s', last_good_version,
- first_bad_version)
- regression_deps_rolls = self.dep_fetcher.GetDependencyRollsDict(
- last_good_version, first_bad_version, platform)
-
- suspected_cls = findit_for_crash.FindItForCrash(
- stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n,
- self.repository)
-
- crash_stack = stacktrace.crash_stack
- suspected_project = self.project_classifier.Classify(
- suspected_cls, crash_stack)
-
- suspected_components = self.component_classifier.Classify(
- suspected_cls, crash_stack)
-
- return Culprit(suspected_project, suspected_components, suspected_cls,
- regression_range)
+# TODO(wrengr): [Note#1] in many places below we have to do some ugly
+# defaulting in case crash_data is missing certain keys. If we had
+# crash_data be a proper class, rather than an anonymous dict, then we
+# could clean all this up by having the properties themselves do the check
+# and return the default whenever keys are missing. This would also
+# let us do things like have regression_range be automatically computed
+# from historical_metadata (when historical_metadata is provided and
+# regression_range is not).
+
+class FinditForChromeCrash(Findit):
+ """Find culprits for crash reports from the Chrome Crash server."""
+
+ @classmethod
+ def _ClientID(cls): # pragma: no cover
+ if cls is FinditForChromeCrash:
+ logging.warning('FinditForChromeCrash is abstract, '
+ 'but someone constructed an instance and called _ClientID')
+ else:
+ logging.warning(
+ 'FinditForChromeCrash subclass %s forgot to implement _ClientID',
+ cls.__name__)
+ raise NotImplementedError()
+
+ # TODO(http://crbug.com/659354): remove the dependency on CrashConfig
+ # entirely, by passing the relevant data as arguments to this constructor.
+ def __init__(self, repository, pipeline_cls):
+ super(FinditForChromeCrash, self).__init__(repository, pipeline_cls)
+ component_classifier_config = CrashConfig.Get().component_classifier
+
+ self._stacktrace_parser = ChromeCrashParser()
+
+ # For how these two "top n" differ, see http://crbug.com/644476#c4
+ self._azalea = Azalea(
+ cl_classifier = ChangelistClassifier(
+ repository = repository,
+ top_n_frames = self.config.get('top_n', _DEFAULT_TOP_N)),
+ component_classifier = ComponentClassifier(
+ [Component(component_name, path_regex, function_regex)
+ for path_regex, function_regex, component_name
+ in component_classifier_config['path_function_component']],
+ component_classifier_config['top_n']),
+ project_classifier = ProjectClassifier())
+
+ def _InitializeAnalysis(self, model, crash_data):
+ super(FinditForChromeCrash, self)._InitializeAnalysis(model, crash_data)
+ # TODO(wrengr): see Note#1
+ model.regression_range = crash_data.get('regression_range', None)
+ customized_data = crash_data.get('customized_data', {})
+ model.channel = customized_data.get('channel', None)
+ model.historical_metadata = customized_data.get('historical_metadata', [])
+
+ # TODO(wrengr): see Note#1, which would allow us to lift this
+ # implementation to the Findit base class.
+ @ndb.transactional
+ def _NeedsNewAnalysis(self, crash_data):
+ crash_identifiers = crash_data['crash_identifiers']
+ historical_metadata = crash_data['customized_data']['historical_metadata']
+ model = self.GetAnalysis(crash_identifiers)
+ # N.B., for mocking reasons, we must not call DetectRegressionRange
+ # directly, but rather must access it indirectly through the module.
+ new_regression_range = detect_regression_range.DetectRegressionRange(
+ historical_metadata)
+ if (model and not model.failed and
+ new_regression_range == model.regression_range):
+ logging.info('The analysis of %s has already been done.',
+ repr(crash_identifiers))
+ return False
+
+ if not model:
+ model = self.CreateAnalysis(crash_identifiers)
+
+ crash_data['regression_range'] = new_regression_range
+ self._InitializeAnalysis(model, crash_data)
+ model.put()
+ return True
+
+ def CheckPolicy(self, crash_data):
+ crash_identifiers = crash_data['crash_identifiers']
+ platform = crash_data['platform']
+ # TODO(wrengr): see Note#1
+ channel = crash_data.get('customized_data', {}).get('channel', None)
+ # TODO(katesonia): Remove the default value after adding validity check to
+ # config.
+ if platform not in self.config.get(
+ 'supported_platform_list_by_channel', {}).get(channel, []):
+ # Bail out if either the channel or platform is not supported yet.
+ logging.info('Analysis of channel %s, platform %s is not supported. '
+ 'No analysis is scheduled for %s',
+ channel, platform, repr(crash_identifiers))
+ return None
+
+ signature = crash_data['signature']
+ # TODO(wrengr): can this blacklist stuff be lifted to the base class?
+ # TODO(katesonia): Remove the default value after adding validity check to
+ # config.
+ for blacklist_marker in self.config.get('signature_blacklist_markers', []):
+ if blacklist_marker in signature:
+ logging.info('%s signature is not supported. '
+ 'No analysis is scheduled for %s', blacklist_marker,
+ repr(crash_identifiers))
+ return None
+
+ # TODO(wrengr): should we clone |crash_data| rather than mutating it?
+ crash_data['platform'] = self.RenamePlatform(platform)
+ return crash_data
+
+
+# TODO(http://crbug.com/659346): we misplaced the coverage tests; find them!
+class FinditForCracas(FinditForChromeCrash): # pragma: no cover
+ @classmethod
+ def _ClientID(cls):
+ return CrashClient.CRACAS
+
+ def CreateAnalysis(self, crash_identifiers):
+ # TODO: inline CracasCrashAnalysis.Create stuff here.
+ return CracasCrashAnalysis.Create(crash_identifiers)
+
+ def GetAnalysis(self, crash_identifiers):
+ # TODO: inline CracasCrashAnalysis.Get stuff here.
+ return CracasCrashAnalysis.Get(crash_identifiers)
+
+
+class FinditForFracas(FinditForChromeCrash):
+ @classmethod
+ def _ClientID(cls):
+ return CrashClient.FRACAS
+
+ def CreateAnalysis(self, crash_identifiers):
+ # TODO: inline FracasCrashAnalysis.Create stuff here.
+ return FracasCrashAnalysis.Create(crash_identifiers)
+
+ def GetAnalysis(self, crash_identifiers):
+ # TODO: inline FracasCrashAnalysis.Get stuff here.
+ return FracasCrashAnalysis.Get(crash_identifiers)
« no previous file with comments | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698