Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)
Patch Set: Fixing call to ScheduleNewAnalysis in handlers/crash/crash_handler.py Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 from collections import namedtuple 6
7 from google.appengine.ext import ndb
7 8
8 from common import chromium_deps 9 from common import chromium_deps
9 from crash import detect_regression_range 10 from crash.azalea import Azalea
10 from crash import findit_for_crash 11 from crash.changelist_classifier import ChangelistClassifier
11 from crash.chromecrash_parser import ChromeCrashParser 12 from crash.chromecrash_parser import ChromeCrashParser
12 from crash.project_classifier import ProjectClassifier
13 from crash.component_classifier import Component 13 from crash.component_classifier import Component
14 from crash.component_classifier import ComponentClassifier 14 from crash.component_classifier import ComponentClassifier
15 from crash.crash_report import CrashReport
16 from crash.culprit import NullCulprit
17 from crash import detect_regression_range
18 from crash.findit import Findit
19 from crash.project_classifier import ProjectClassifier
20 from crash.type_enums import CrashClient
21 from model.crash.cracas_crash_analysis import CracasCrashAnalysis
15 from model.crash.crash_config import CrashConfig 22 from model.crash.crash_config import CrashConfig
23 from model.crash.fracas_crash_analysis import FracasCrashAnalysis
16 24
17 # TODO(katesonia): Remove the default value after adding validity check to 25 # TODO(katesonia): Remove the default value after adding validity check to
18 # config. 26 # config.
19 _DEFAULT_TOP_N = 7 27 _DEFAULT_TOP_N = 7
20 28
29 class FinditForChromeCrash(Findit):
30 """Find culprits for crash reports from the Chrome Crash server."""
21 31
22 # TODO(wrengr): move this to its own file, so it can be shared. When we do 32 @classmethod
23 # so, we'll need to also pass in the 'solution' argument for the tag_dict. 33 def ClientID(cls):
24 class Culprit(namedtuple('Culprit', 34 logging.info('FinditForChromeCrash.ClientID: '
25 ['project', 'components', 'cls', 'regression_range'])): 35 ' the subclass %s forgot to override this method', cls.__name__)
36 raise NotImplementedError()
26 37
27 # TODO(wrengr): better name for this method.
28 def ToDicts(self):
29 """Convert this object to a pair of anonymous dicts for JSON.
30
31 Returns:
32 (analysis_result_dict, tag_dict)
33 The analysis result is a dict like below:
34 {
35 # Indicate if Findit found any suspects_cls, project,
36 # components or regression_range.
37 "found": true,
38 "suspected_project": "chromium-v8", # Which project is most suspected.
39 "feedback_url": "https://.."
40 "suspected_cls": [
41 {
42 "revision": "commit-hash",
43 "url": "https://chromium.googlesource.com/chromium/src/+/...",
44 "review_url": "https://codereview.chromium.org/issue-number",
45 "project_path": "third_party/pdfium",
46 "author": "who@chromium.org",
47 "time": "2015-08-17 03:38:16",
48 "reason": "a plain string with '\n' as line break to expla..."
49 "reason": [('MinDistance', 1, 'minimum distance is 0.'),
50 ('TopFrame', 0.9, 'top frame is2nd frame.')],
51 "changed_files": [
52 {"file": "file_name1.cc",
53 "blame_url": "https://...",
54 "info": "minimum distance (LOC) 0, frame #2"},
55 {"file": "file_name2.cc",
56 "blame_url": "https://...",
57 "info": "minimum distance (LOC) 20, frame #4"},
58 ...
59 ],
60 "confidence": 0.60
61 },
62 ...,
63 ],
64 "regression_range": [ # Detected regression range.
65 "53.0.2765.0",
66 "53.0.2766.0"
67 ],
68 "suspected_components": [ # A list of crbug components to file bugs.
69 "Blink>JavaScript"
70 ]
71 }
72
73 The code review url might not always be available, because not all
74 commits go through code review. In that case, commit url should
75 be used instead.
76
77 The tag dict are allowed key/value pairs to tag the analysis result
78 for query and monitoring purpose on Findit side. For allowed keys,
79 please refer to crash_analysis.py and fracas_crash_analysis.py:
80 For results with normal culprit-finding algorithm: {
81 'found_suspects': True,
82 'has_regression_range': True,
83 'solution': 'core_algorithm',
84 }
85 For results using git blame without a regression range: {
86 'found_suspects': True,
87 'has_regression_range': False,
88 'solution': 'blame',
89 }
90 If nothing is found: {
91 'found_suspects': False,
92 }
93 """
94 cls_list = [result.ToDict() for result in self.cls]
95
96 # TODO(wrengr): reformulate the JSON stuff so we can drop fields which
97 # are empty; so that, in turn, we can get rid of the NullCulprit class.
98 return (
99 {
100 'found': (bool(self.project) or
101 bool(self.components) or
102 bool(cls_list) or
103 bool(self.regression_range)),
104 'regression_range': self.regression_range,
105 'suspected_project': self.project,
106 'suspected_components': self.components,
107 'suspected_cls': cls_list,
108 },
109 {
110 'found_suspects': bool(cls_list),
111 'found_project': bool(self.project),
112 'found_components': bool(self.components),
113 'has_regression_range': bool(self.regression_range),
114 'solution': 'core_algorithm',
115 }
116 )
117
118
119 # TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit
120 # can, in fact, return None; or else (b) reformulate the JSON stuff so
121 # that the various fields are optional, so that we can just use
122 # Culprit('', [], [], None) directly.
123 class NullCulprit(object):
124 """A helper class so FindCulprit doesn't return None.
125
126 This class is analogous to Culprit(None, [], [], None), except that the
127 result of the ToDicts method is more minimalistic."""
128
129 def ToDicts(self):
130 return (
131 {'found': False},
132 {'found_suspects': False,
133 'has_regression_range': False}
134 )
135
136
137 # TODO(wrengr): better name for this class. Given the "findit_for_*.py"
138 # file names, one might suspect that each of those files implements
139 # something analogous to this file, and hence we could make a superclass
140 # to factor out the common bits. However, in truth, all those files
141 # are unrelated and do completely different things.
142 class FinditForChromeCrash(object):
143 """Process crashes from Chrome crash server and find culprits for them.
144
145 Even though this class has only one method, it is helpful because it
146 allows us to cache things which should outlive each call to that method.
147 For example, we store a single ComponentClassifier object so that we
148 only compile the regexes for each Component object once, rather than
149 doing so on each call to FindCulprit. In addition, the class lets
150 us cache various configuration options so that we need not depend
151 on CrashConfig; thereby decoupling the analysis itself from UX concerns
152 about deciding how to run those analyses.
153 """
154 # TODO(wrengr): remove the dependency on CrashConfig entirely, by 38 # TODO(wrengr): remove the dependency on CrashConfig entirely, by
155 # passing the relevant data as arguments to this constructor. 39 # passing the relevant data as arguments to this constructor.
156 def __init__(self): 40 def __init__(self):
41 super(FinditForChromeCrash, self).__init__()
157 crash_config = CrashConfig.Get() 42 crash_config = CrashConfig.Get()
158 component_classifier_config = crash_config.component_classifier 43 component_classifier_config = crash_config.component_classifier
159 44
160 # TODO(wrengr): why are these two different? 45 self.stacktrace_parser = ChromeCrashParser()
161 component_classifier_top_n = component_classifier_config['top_n']
162 self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)
163 46
164 self.component_classifier = ComponentClassifier( 47 # For how these two "top n" differ, see http://crbug.com/644476#c4
165 [Component(component_name, path_regex, function_regex) 48 self.azalea = Azalea(
166 for path_regex, function_regex, component_name 49 cl_classifier = ChangelistClassifier(
167 in component_classifier_config['path_function_component']], 50 top_n_frames = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)),
168 component_classifier_top_n) 51 component_classifier = ComponentClassifier(
52 [Component(component_name, path_regex, function_regex)
53 for path_regex, function_regex, component_name
54 in component_classifier_config['path_function_component']],
55 component_classifier_config['top_n']),
56 project_classifier = ProjectClassifier())
169 57
170 # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig. 58 def _InitializeAnalysis(self, model, crash_data):
171 self.project_classifier = ProjectClassifier() 59 super(FinditForChromeCrash, self)._InitializeAnalysis(model, crash_data)
60 # N.B., these are attributes of ChromeCrashAnalysis, not CrashAnalysis.
61 # TODO(wrengr): clean up this defaulting ugliness.
62 model.channel = crash_data.get('customized_data', {}).get('channel', None)
63 model.historical_metadata = (
64 crash_data.get('customized_data', {}).get('historical_metadata', []))
Sharu Jiang 2016/10/19 00:20:06 Should also set model.regression_range here or in
wrengr 2016/10/19 18:05:16 IIRC, the regression range isn't available from th
Sharu Jiang 2016/10/19 21:03:09 Conceptually, it would be better to let it be top-
wrengr 2016/10/22 00:18:49 I couldn't remember where it was before I started
172 65
173 # TODO(wrengr): since this is the only method of interest, it would 66 # TODO(wrengr); if crash_data were a proper class rather than an
174 # be better IMO to rename it to __call__ to reduce verbosity. 67 # anonymous dict, then we could have regression_range be a property
175 def FindCulprit(self, signature, platform, stack_trace, crashed_version, 68 # that's automatically computed from historical_metadata; and then we
176 regression_range): 69 # could lift this implementation to the Findit base class.
177 """Finds culprits for a Chrome crash. 70 @ndb.transactional
71 def _NeedsNewAnalysis(self, crash_data):
72 crash_identifiers = crash_data['crash_identifiers']
73 historical_metadata = crash_data['customized_data']['historical_metadata']
74 model = self.GetAnalysis(crash_identifiers)
75 # N.B., for mocking reasons, we must not call DetectRegressionRange
76 # directly, but rather must access it indirectly through the module.
77 new_regression_range = detect_regression_range.DetectRegressionRange(
78 historical_metadata)
79 if (model and not model.failed and
80 new_regression_range == model.regression_range):
81 logging.info('The analysis of %s has already been done.',
82 repr(crash_identifiers))
83 return False
178 84
179 Args: 85 if not model:
180 signature (str): The signature of a crash on the Chrome crash server. 86 model = self.CreateAnalysis(crash_identifiers)
181 platform (str): The platform name, could be 'win', 'mac', 'linux',
182 'android', 'ios', etc.
183 stack_trace (str): A string containing the stack trace of a crash.
184 crashed_version (str): The version of Chrome in which the crash occurred.
185 regression_range (list or None): [good_version, bad_revision] or None.
186 87
187 Returns: 88 crash_data['customized_data']['regression_range'] = new_regression_range
188 A Culprit object. 89 self._InitializeAnalysis(model, crash_data)
189 """ 90 model.put()
190 crash_deps = chromium_deps.GetChromeDependency(crashed_version, platform) 91 return True
191 stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)
192 if not stacktrace:
193 logging.warning('Failed to parse the stacktrace %s', stack_trace)
194 # TODO(wrengr): refactor things so we don't need the NullCulprit class.
195 return NullCulprit()
196 92
197 # Get regression deps and crash deps. 93 def CheckPolicy(self, crash_data):
198 regression_deps_rolls = {} 94 # TODO(wrengr): Should we use brackets and let exceptions be thrown?
199 if regression_range: 95 crash_identifiers = crash_data['crash_identifiers']
200 last_good_version, first_bad_version = regression_range 96 platform = crash_data['platform']
201 logging.info('Find regression range %s:%s', last_good_version, 97 signature = crash_data['signature']
202 first_bad_version) 98 # TODO(wrengr): clean up this defaulting ugliness.
203 regression_deps_rolls = chromium_deps.GetDEPSRollsDict( 99 channel = crash_data.get('customized_data', {}).get('channel', None)
204 last_good_version, first_bad_version, platform) 100 # TODO(katesonia): Remove the default value after adding validity check to
101 # config.
102 if platform not in self.config.get(
103 'supported_platform_list_by_channel', {}).get(channel, []):
104 # Bail out if either the channel or platform is not supported yet.
105 logging.info('Analysis of channel %s, platform %s is not supported. '
106 'No analysis is scheduled for %s',
107 channel, platform, repr(crash_identifiers))
108 return None
205 109
206 suspected_cls = findit_for_crash.FindItForCrash( 110 # TODO(katesonia): Remove the default value after adding validity check to
207 stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n) 111 # config.
112 for blacklist_marker in self.config.get('signature_blacklist_markers', []):
113 if blacklist_marker in signature:
114 logging.info('%s signature is not supported. '
115 'No analysis is scheduled for %s', blacklist_marker,
116 repr(crash_identifiers))
117 return None
208 118
209 crash_stack = stacktrace.crash_stack 119 # TODO(wrengr): shoudl we clone |crash_data| rather than mutating it?
210 suspected_project = self.project_classifier.Classify( 120 crash_data['platform'] = self.RenamePlatform(platform)
211 suspected_cls, crash_stack) 121 return crash_data
212 122
213 suspected_components = self.component_classifier.Classify(
214 suspected_cls, crash_stack)
215 123
216 return Culprit(suspected_project, suspected_components, suspected_cls, 124 class FinditForCracas(FinditForChromeCrash):
217 regression_range) 125 @classmethod
126 def ClientID(cls):
127 return CrashClient.CRACAS
128
129 def CreateAnalysis(self, crash_identifiers):
130 # TODO: inline CracasCrashAnalysis.Create stuff here.
131 return CracasCrashAnalysis.Create(crash_identifiers)
132
133 def GetAnalysis(self, crash_identifiers):
134 # TODO: inline CracasCrashAnalysis.Get stuff here.
135 return CracasCrashAnalysis.Get(crash_identifiers)
136
137
138 class FinditForFracas(FinditForChromeCrash):
139 @classmethod
140 def ClientID(cls):
141 return CrashClient.FRACAS
142
143 def CreateAnalysis(self, crash_identifiers):
144 # TODO: inline FracasCrashAnalysis.Create stuff here.
145 return FracasCrashAnalysis.Create(crash_identifiers)
146
147 def GetAnalysis(self, crash_identifiers):
148 # TODO: inline FracasCrashAnalysis.Get stuff here.
149 return FracasCrashAnalysis.Get(crash_identifiers)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698