appengine/findit/crash/findit_for_chromecrash.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Addressing the crash_config.fracas issue Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import namedtuple	6

	7 from google.appengine.ext import ndb

7	8

8 from common import chromium_deps	9 from common import chromium_deps

9 from crash import detect_regression_range	10 from crash.azalea import Azalea

10 from crash import findit_for_crash	11 from crash.changelist_classifier import ChangelistClassifier

11 from crash.chromecrash_parser import ChromeCrashParser	12 from crash.chromecrash_parser import ChromeCrashParser

12 from crash.project_classifier import ProjectClassifier

13 from crash.component_classifier import Component	13 from crash.component_classifier import Component

14 from crash.component_classifier import ComponentClassifier	14 from crash.component_classifier import ComponentClassifier

	15 from crash.crash_report import CrashReport

	16 from crash.culprit import NullCulprit

	17 from crash import detect_regression_range

	18 from crash.findit import Findit

	19 from crash.project_classifier import ProjectClassifier

	20 from crash.type_enums import CrashClient

	21 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

15 from model.crash.crash_config import CrashConfig	22 from model.crash.crash_config import CrashConfig

	23 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

16	24

17 # TODO(katesonia): Remove the default value after adding validity check to	25 # TODO(katesonia): Remove the default value after adding validity check to

18 # config.	26 # config.

19 _DEFAULT_TOP_N = 7	27 _DEFAULT_TOP_N = 7

20	28

	29 # TODO(wrengr): [Note#1] in many places below we have to do some ugly

	30 # defaulting in case crash_data is missing certain keys. If we had

	31 # crash_data be a proper class, rather than an anonymous dict, then we

	32 # could clean all this up by having the properties themselves do the check

	33 # and return the default whenever keys are missing. This would also

	34 # let us do things like have regression_range be automatically computed

	35 # from historical_metadata (when historical_metadata is provided and

	36 # regression_range is not).

21	37

22 # TODO(wrengr): move this to its own file, so it can be shared. When we do	38 class FinditForChromeCrash(Findit):

23 # so, we'll need to also pass in the 'solution' argument for the tag_dict.	39 """Find culprits for crash reports from the Chrome Crash server."""

24 class Culprit(namedtuple('Culprit',

25 ['project', 'components', 'cls', 'regression_range'])):

26	40

27 @property	41 @classmethod

28 def fields(self):	42 def _ClientID(cls):

29 return self._fields	43 if cls is FinditForChromeCrash:

	44 logging.warning('FinditForChromeCrash is abstract, '

	45 'but someone constructed an instance and called _ClientID')

	46 else:

	47 logging.warning(

	48 'FinditForChromeCrash subclass %s forgot to implement _ClientID',

	49 cls.__name__)

	50 raise NotImplementedError()

30	51

31 # TODO(wrengr): better name for this method.	52 # TODO(wrengr): remove the dependency on CrashConfig entirely, by

32 def ToDicts(self):	53 # passing the relevant data as arguments to this constructor.

33 """Convert this object to a pair of anonymous dicts for JSON.	54 def __init__(self, pipeline_cls):

	55 super(FinditForChromeCrash, self).__init__(pipeline_cls)

	56 component_classifier_config = CrashConfig.Get().component_classifier

34	57

35 Returns:	58 self._stacktrace_parser = ChromeCrashParser()

36 (analysis_result_dict, tag_dict)

37 The analysis result is a dict like below:

38 {

39 # Indicate if Findit found any suspects_cls, project,

40 # components or regression_range.

41 "found": true,

42 "suspected_project": "chromium-v8", # Which project is most suspected.

43 "feedback_url": "https://.."

44 "suspected_cls": [

45 {

46 "revision": "commit-hash",

47 "url": "https://chromium.googlesource.com/chromium/src/+/...",

48 "review_url": "https://codereview.chromium.org/issue-number",

49 "project_path": "third_party/pdfium",

50 "author": "who@chromium.org",

51 "time": "2015-08-17 03:38:16",

52 "reason": "a plain string with '\n' as line break to expla..."

53 "reason": [('MinDistance', 1, 'minimum distance is 0.'),

54 ('TopFrame', 0.9, 'top frame is2nd frame.')],

55 "changed_files": [

56 {"file": "file_name1.cc",

57 "blame_url": "https://...",

58 "info": "minimum distance (LOC) 0, frame #2"},

59 {"file": "file_name2.cc",

60 "blame_url": "https://...",

61 "info": "minimum distance (LOC) 20, frame #4"},

62 ...

63 ],

64 "confidence": 0.60

65 },

66 ...,

67 ],

68 "regression_range": [ # Detected regression range.

69 "53.0.2765.0",

70 "53.0.2766.0"

71 ],

72 "suspected_components": [ # A list of crbug components to file bugs.

73 "Blink>JavaScript"

74 ]

75 }

76	59

77 The code review url might not always be available, because not all	60 # For how these two "top n" differ, see http://crbug.com/644476#c4

78 commits go through code review. In that case, commit url should	61 self._azalea = Azalea(

79 be used instead.	62 cl_classifier = ChangelistClassifier(

	63 top_n_frames = self.config.get('top_n', _DEFAULT_TOP_N)),

	64 component_classifier = ComponentClassifier(

	65 [Component(component_name, path_regex, function_regex)

	66 for path_regex, function_regex, component_name

	67 in component_classifier_config['path_function_component']],

	68 component_classifier_config['top_n']),

	69 project_classifier = ProjectClassifier())

80	70

81 The tag dict are allowed key/value pairs to tag the analysis result	71 def _InitializeAnalysis(self, model, crash_data):

82 for query and monitoring purpose on Findit side. For allowed keys,	72 super(FinditForChromeCrash, self)._InitializeAnalysis(model, crash_data)

83 please refer to crash_analysis.py and fracas_crash_analysis.py:	73 # TODO(wrengr): see Note#1

84 For results with normal culprit-finding algorithm: {	74 model.regression_range = crash_data.get('regression_range', None)
	Sharu Jiang 2016/10/25 18:31:44 This should be lifted to findit superclass, since This should be lifted to findit superclass, since the regression_range is the top-level field that all clients(user) should have. The _InitializeAnalysis here should just handle data in customized_data wrengr 2016/10/25 19:49:54 Acknowledged. Show quoted text On 2016/10/25 18:31:44, sharu jiang wrote: > This should be lifted to findit superclass, since the regression_range is the > top-level field that all clients(user) should have. > > The _InitializeAnalysis here should just handle data in customized_data Acknowledged.
85 'found_suspects': True,	75 customized_data = crash_data.get('customized_data', {})

86 'has_regression_range': True,	76 model.channel = customized_data.get('channel', None)

87 'solution': 'core_algorithm',	77 model.historical_metadata = customized_data.get('historical_metadata', [])

88 }

89 For results using git blame without a regression range: {

90 'found_suspects': True,

91 'has_regression_range': False,

92 'solution': 'blame',

93 }

94 If nothing is found: {

95 'found_suspects': False,

96 }

97 """

98 cls_list = [result.ToDict() for result in self.cls]

99	78

100 # TODO(wrengr): reformulate the JSON stuff so we can drop fields which	79 # TODO(wrengr): see Note#1, which would allow us to lift this

101 # are empty; so that, in turn, we can get rid of the NullCulprit class.	80 # implementation to the Findit base class.

102 return (	81 @ndb.transactional

103 {	82 def _NeedsNewAnalysis(self, crash_data):

104 'found': (bool(self.project) or	83 crash_identifiers = crash_data['crash_identifiers']

105 bool(self.components) or	84 historical_metadata = crash_data['customized_data']['historical_metadata']

106 bool(cls_list) or	85 model = self.GetAnalysis(crash_identifiers)

107 bool(self.regression_range)),	86 # N.B., for mocking reasons, we must not call DetectRegressionRange

108 'regression_range': self.regression_range,	87 # directly, but rather must access it indirectly through the module.

109 'suspected_project': self.project,	88 new_regression_range = detect_regression_range.DetectRegressionRange(

110 'suspected_components': self.components,	89 historical_metadata)

111 'suspected_cls': cls_list,	90 if (model and not model.failed and

112 },	91 new_regression_range == model.regression_range):

113 {	92 logging.info('The analysis of %s has already been done.',

114 'found_suspects': bool(cls_list),	93 repr(crash_identifiers))

115 'found_project': bool(self.project),	94 return False

116 'found_components': bool(self.components),	95

117 'has_regression_range': bool(self.regression_range),	96 if not model:

118 'solution': 'core_algorithm',	97 model = self.CreateAnalysis(crash_identifiers)

119 }	98

120 )	99 crash_data['regression_range'] = new_regression_range

	100 self._InitializeAnalysis(model, crash_data)

	101 model.put()

	102 return True

	103

	104 def CheckPolicy(self, crash_data):

	105 crash_identifiers = crash_data['crash_identifiers']

	106 platform = crash_data['platform']

	107 signature = crash_data['signature']

	108 # TODO(wrengr): see Note#1

	109 channel = crash_data.get('customized_data', {}).get('channel', None)

	110 # TODO(katesonia): Remove the default value after adding validity check to

	111 # config.

	112 if platform not in self.config.get(

	113 'supported_platform_list_by_channel', {}).get(channel, []):

	114 # Bail out if either the channel or platform is not supported yet.

	115 logging.info('Analysis of channel %s, platform %s is not supported. '

	116 'No analysis is scheduled for %s',

	117 channel, platform, repr(crash_identifiers))

	118 return None

	119

	120 # TODO(katesonia): Remove the default value after adding validity check to

	121 # config.

	122 for blacklist_marker in self.config.get('signature_blacklist_markers', []):

	123 if blacklist_marker in signature:

	124 logging.info('%s signature is not supported. '

	125 'No analysis is scheduled for %s', blacklist_marker,

	126 repr(crash_identifiers))

	127 return None

	128

	129 # TODO(wrengr): should we clone \|crash_data\| rather than mutating it?

	130 crash_data['platform'] = self.RenamePlatform(platform)

	131 return crash_data

121	132

122	133

123 # TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit	134 # TODO(wrengr): we misplaced the coverage tests; find them!

124 # can, in fact, return None; or else (b) reformulate the JSON stuff so	135 class FinditForCracas(FinditForChromeCrash):

125 # that the various fields are optional, so that we can just use	136 @classmethod

126 # Culprit('', [], [], None) directly.	137 def _ClientID(cls):

127 class NullCulprit(object):	138 return CrashClient.CRACAS

128 """A helper class so FindCulprit doesn't return None.

129	139

130 This class is analogous to Culprit(None, [], [], None), except that the	140 def CreateAnalysis(self, crash_identifiers):

131 result of the ToDicts method is more minimalistic."""	141 # TODO: inline CracasCrashAnalysis.Create stuff here.

	142 return CracasCrashAnalysis.Create(crash_identifiers)

132	143

133 def ToDicts(self):	144 def GetAnalysis(self, crash_identifiers):

134 return (	145 # TODO: inline CracasCrashAnalysis.Get stuff here.

135 {'found': False},	146 return CracasCrashAnalysis.Get(crash_identifiers)

136 {'found_suspects': False,

137 'has_regression_range': False}

138 )

139	147

140	148

141 # TODO(wrengr): better name for this class. Given the "findit_for_*.py"	149 class FinditForFracas(FinditForChromeCrash):

142 # file names, one might suspect that each of those files implements	150 @classmethod

143 # something analogous to this file, and hence we could make a superclass	151 def _ClientID(cls):

144 # to factor out the common bits. However, in truth, all those files	152 return CrashClient.FRACAS

145 # are unrelated and do completely different things.

146 class FinditForChromeCrash(object):

147 """Process crashes from Chrome crash server and find culprits for them.

148	153

149 Even though this class has only one method, it is helpful because it	154 def CreateAnalysis(self, crash_identifiers):

150 allows us to cache things which should outlive each call to that method.	155 # TODO: inline FracasCrashAnalysis.Create stuff here.

151 For example, we store a single ComponentClassifier object so that we	156 return FracasCrashAnalysis.Create(crash_identifiers)

152 only compile the regexes for each Component object once, rather than

153 doing so on each call to FindCulprit. In addition, the class lets

154 us cache various configuration options so that we need not depend

155 on CrashConfig; thereby decoupling the analysis itself from UX concerns

156 about deciding how to run those analyses.

157 """

158 # TODO(wrengr): remove the dependency on CrashConfig entirely, by

159 # passing the relevant data as arguments to this constructor.

160 def __init__(self):

161 crash_config = CrashConfig.Get()

162 component_classifier_config = crash_config.component_classifier

163	157

164 # TODO(wrengr): why are these two different?	158 def GetAnalysis(self, crash_identifiers):

165 component_classifier_top_n = component_classifier_config['top_n']	159 # TODO: inline FracasCrashAnalysis.Get stuff here.

166 self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)	160 return FracasCrashAnalysis.Get(crash_identifiers)

167

168 self.component_classifier = ComponentClassifier(

169 [Component(component_name, path_regex, function_regex)

170 for path_regex, function_regex, component_name

171 in component_classifier_config['path_function_component']],

172 component_classifier_top_n)

173

174 # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.

175 self.project_classifier = ProjectClassifier()

176

177 # TODO(wrengr): since this is the only method of interest, it would

178 # be better IMO to rename it to __call__ to reduce verbosity.

179 def FindCulprit(self, signature, platform, stack_trace, crashed_version,

180 regression_range):

181 """Finds culprits for a Chrome crash.

182

183 Args:

184 signature (str): The signature of a crash on the Chrome crash server.

185 platform (str): The platform name, could be 'win', 'mac', 'linux',

186 'android', 'ios', etc.

187 stack_trace (str): A string containing the stack trace of a crash.

188 crashed_version (str): The version of Chrome in which the crash occurred.

189 regression_range (list or None): [good_version, bad_revision] or None.

190

191 Returns:

192 A Culprit object.

193 """

194 crash_deps = chromium_deps.GetChromeDependency(crashed_version, platform)

195 stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)

196 if not stacktrace:

197 logging.warning('Failed to parse the stacktrace %s', stack_trace)

198 # TODO(wrengr): refactor things so we don't need the NullCulprit class.

199 return NullCulprit()

200

201 # Get regression deps and crash deps.

202 regression_deps_rolls = {}

203 if regression_range:

204 last_good_version, first_bad_version = regression_range

205 logging.info('Find regression range %s:%s', last_good_version,

206 first_bad_version)

207 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(

208 last_good_version, first_bad_version, platform)

209

210 suspected_cls = findit_for_crash.FindItForCrash(

211 stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n)

212

213 crash_stack = stacktrace.crash_stack

214 suspected_project = self.project_classifier.Classify(

215 suspected_cls, crash_stack)

216

217 suspected_components = self.component_classifier.Classify(

218 suspected_cls, crash_stack)

219

220 return Culprit(suspected_project, suspected_components, suspected_cls,

221 regression_range)

OLD	NEW