appengine/findit/crash/findit_for_chromecrash.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: linting for coverage Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import namedtuple	6

	7 from google.appengine.ext import ndb

7	8

8 from common import chromium_deps	9 from common import chromium_deps

9 from crash import detect_regression_range	10 from crash.azalea import Azalea

10 from crash import findit_for_crash	11 from crash.changelist_classifier import ChangelistClassifier

11 from crash.chromecrash_parser import ChromeCrashParser	12 from crash.chromecrash_parser import ChromeCrashParser

12 from crash.project_classifier import ProjectClassifier

13 from crash.component_classifier import Component	13 from crash.component_classifier import Component

14 from crash.component_classifier import ComponentClassifier	14 from crash.component_classifier import ComponentClassifier

	15 from crash.crash_report import CrashReport

	16 from crash.culprit import NullCulprit

	17 from crash import detect_regression_range

	18 from crash.findit import Findit

	19 from crash.project_classifier import ProjectClassifier

	20 from crash.type_enums import CrashClient

	21 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

15 from model.crash.crash_config import CrashConfig	22 from model.crash.crash_config import CrashConfig

	23 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

16	24

17 # TODO(katesonia): Remove the default value after adding validity check to	25 # TODO(katesonia): Remove the default value after adding validity check to

18 # config.	26 # config.

19 _DEFAULT_TOP_N = 7	27 _DEFAULT_TOP_N = 7

20	28

	29 class FinditForChromeCrash(Findit):

	30 """Find culprits for crash reports from the Chrome Crash server."""

21	31

22 # TODO(wrengr): move this to its own file, so it can be shared. When we do	32 @classmethod

23 # so, we'll need to also pass in the 'solution' argument for the tag_dict.	33 def ClientID(cls): # pragma: no cover

24 class Culprit(namedtuple('Culprit',	34 logging.info('FinditForChromeCrash.ClientID: '

25 ['project', 'components', 'cls', 'regression_range'])):	35 ' the subclass %s forgot to override this method', cls.__name__)

	36 raise NotImplementedError()

26	37

27 # TODO(wrengr): better name for this method.

28 def ToDicts(self):

29 """Convert this object to a pair of anonymous dicts for JSON.

30

31 Returns:

32 (analysis_result_dict, tag_dict)

33 The analysis result is a dict like below:

34 {

35 # Indicate if Findit found any suspects_cls, project,

36 # components or regression_range.

37 "found": true,

38 "suspected_project": "chromium-v8", # Which project is most suspected.

39 "feedback_url": "https://.."

40 "suspected_cls": [

41 {

42 "revision": "commit-hash",

43 "url": "https://chromium.googlesource.com/chromium/src/+/...",

44 "review_url": "https://codereview.chromium.org/issue-number",

45 "project_path": "third_party/pdfium",

46 "author": "who@chromium.org",

47 "time": "2015-08-17 03:38:16",

48 "reason": "a plain string with '\n' as line break to expla..."

49 "reason": [('MinDistance', 1, 'minimum distance is 0.'),

50 ('TopFrame', 0.9, 'top frame is2nd frame.')],

51 "changed_files": [

52 {"file": "file_name1.cc",

53 "blame_url": "https://...",

54 "info": "minimum distance (LOC) 0, frame #2"},

55 {"file": "file_name2.cc",

56 "blame_url": "https://...",

57 "info": "minimum distance (LOC) 20, frame #4"},

58 ...

59 ],

60 "confidence": 0.60

61 },

62 ...,

63 ],

64 "regression_range": [ # Detected regression range.

65 "53.0.2765.0",

66 "53.0.2766.0"

67 ],

68 "suspected_components": [ # A list of crbug components to file bugs.

69 "Blink>JavaScript"

70 ]

71 }

72

73 The code review url might not always be available, because not all

74 commits go through code review. In that case, commit url should

75 be used instead.

76

77 The tag dict are allowed key/value pairs to tag the analysis result

78 for query and monitoring purpose on Findit side. For allowed keys,

79 please refer to crash_analysis.py and fracas_crash_analysis.py:

80 For results with normal culprit-finding algorithm: {

81 'found_suspects': True,

82 'has_regression_range': True,

83 'solution': 'core_algorithm',

84 }

85 For results using git blame without a regression range: {

86 'found_suspects': True,

87 'has_regression_range': False,

88 'solution': 'blame',

89 }

90 If nothing is found: {

91 'found_suspects': False,

92 }

93 """

94 cls_list = [result.ToDict() for result in self.cls]

95

96 # TODO(wrengr): reformulate the JSON stuff so we can drop fields which

97 # are empty; so that, in turn, we can get rid of the NullCulprit class.

98 return (

99 {

100 'found': (bool(self.project) or

101 bool(self.components) or

102 bool(cls_list) or

103 bool(self.regression_range)),

104 'regression_range': self.regression_range,

105 'suspected_project': self.project,

106 'suspected_components': self.components,

107 'suspected_cls': cls_list,

108 },

109 {

110 'found_suspects': bool(cls_list),

111 'found_project': bool(self.project),

112 'found_components': bool(self.components),

113 'has_regression_range': bool(self.regression_range),

114 'solution': 'core_algorithm',

115 }

116 )

117

118

119 # TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit

120 # can, in fact, return None; or else (b) reformulate the JSON stuff so

121 # that the various fields are optional, so that we can just use

122 # Culprit('', [], [], None) directly.

123 class NullCulprit(object):

124 """A helper class so FindCulprit doesn't return None.

125

126 This class is analogous to Culprit(None, [], [], None), except that the

127 result of the ToDicts method is more minimalistic."""

128

129 def ToDicts(self):

130 return (

131 {'found': False},

132 {'found_suspects': False,

133 'has_regression_range': False}

134 )

135

136

137 # TODO(wrengr): better name for this class. Given the "findit_for_*.py"

138 # file names, one might suspect that each of those files implements

139 # something analogous to this file, and hence we could make a superclass

140 # to factor out the common bits. However, in truth, all those files

141 # are unrelated and do completely different things.

142 class FinditForChromeCrash(object):

143 """Process crashes from Chrome crash server and find culprits for them.

144

145 Even though this class has only one method, it is helpful because it

146 allows us to cache things which should outlive each call to that method.

147 For example, we store a single ComponentClassifier object so that we

148 only compile the regexes for each Component object once, rather than

149 doing so on each call to FindCulprit. In addition, the class lets

150 us cache various configuration options so that we need not depend

151 on CrashConfig; thereby decoupling the analysis itself from UX concerns

152 about deciding how to run those analyses.

153 """

154 # TODO(wrengr): remove the dependency on CrashConfig entirely, by	38 # TODO(wrengr): remove the dependency on CrashConfig entirely, by

155 # passing the relevant data as arguments to this constructor.	39 # passing the relevant data as arguments to this constructor.

156 def __init__(self):	40 def __init__(self, pipeline_cls):

	41 super(FinditForChromeCrash, self).__init__(pipeline_cls)

157 crash_config = CrashConfig.Get()	42 crash_config = CrashConfig.Get()
	Sharu Jiang 2016/10/21 22:06:33 There is already a config property in Findit super There is already a config property in Findit superclass, should this be updated? Sharu Jiang 2016/10/24 19:38:23 ping :) Show quoted text On 2016/10/21 22:06:33, sharu jiang wrote: > There is already a config property in Findit superclass, should this be updated? ping :) wrengr 2016/10/24 22:39:00 The Findit.config property returns CrashConfig.Get Show quoted text On 2016/10/21 22:06:33, sharu jiang wrote: > There is already a config property in Findit superclass, should this be updated? The Findit.config property returns CrashConfig.Get().GetClientConfig(self.client_id). AFAIA, that only contains parameters for the ChangelistClassifier (for Cracas, Fracas, or Clusterfuzz); whereas here we need the parameters for the ComponentClassifier (i.e., the various names and regexes for each Component). If GetClientConfig actually does return parameters for the ComponentClassifier too, just let me know what to change lines 54--55 to (i.e., where this variable is used). (FWIW, stuff like this is why I'd like to decouple everything from the CrashConfig. It's all too tricky to keep everything in sync :)
158 component_classifier_config = crash_config.component_classifier	43 component_classifier_config = crash_config.component_classifier

159	44

160 # TODO(wrengr): why are these two different?	45 self._stacktrace_parser = ChromeCrashParser()

161 component_classifier_top_n = component_classifier_config['top_n']

162 self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)

163	46

164 self.component_classifier = ComponentClassifier(	47 # For how these two "top n" differ, see http://crbug.com/644476#c4

165 [Component(component_name, path_regex, function_regex)	48 self._azalea = Azalea(

166 for path_regex, function_regex, component_name	49 cl_classifier = ChangelistClassifier(

167 in component_classifier_config['path_function_component']],	50 top_n_frames = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)),
	Sharu Jiang 2016/10/21 22:06:33 Should be updated. Should be updated. wrengr 2016/10/22 00:18:49 To... what? Show quoted text On 2016/10/21 22:06:33, sharu jiang wrote: > Should be updated. To... what? Sharu Jiang 2016/10/24 19:38:23 We shouldn't use crash_config.fracas here, the con Show quoted text On 2016/10/22 00:18:49, wrengr wrote: > On 2016/10/21 22:06:33, sharu jiang wrote: > > Should be updated. > > To... what? We shouldn't use crash_config.fracas here, the config of cracas and fracas can be different, the crash_config.fracas should be updated to self.config. wrengr 2016/10/24 22:29:48 Done. Show quoted text On 2016/10/24 19:38:23, sharu jiang wrote: > On 2016/10/22 00:18:49, wrengr wrote: > > On 2016/10/21 22:06:33, sharu jiang wrote: > > > Should be updated. > > > > To... what? > > We shouldn't use crash_config.fracas here, the config of cracas and fracas can > be different, the crash_config.fracas should be updated to self.config. Done.
168 component_classifier_top_n)	51 component_classifier = ComponentClassifier(

	52 [Component(component_name, path_regex, function_regex)

	53 for path_regex, function_regex, component_name

	54 in component_classifier_config['path_function_component']],

	55 component_classifier_config['top_n']),

	56 project_classifier = ProjectClassifier())

169	57

170 # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.	58 # TODO(wrengr): we misplaced the coverage test; find it!

171 self.project_classifier = ProjectClassifier()	59 def _InitializeAnalysis(self, model, crash_data):

	60 super(FinditForChromeCrash, self)._InitializeAnalysis(model, crash_data)

	61 # N.B., these are attributes of ChromeCrashAnalysis, not CrashAnalysis.

	62 # TODO(wrengr): clean up this defaulting ugliness.

	63 customized_data = crash_data.get('customized_data', {})

	64 model.regression_range = customized_data.get('regression_range', None)

	65 model.channel = customized_data.get('channel', None)

	66 model.historical_metadata = customized_data.get('historical_metadata', [])

172	67

173 # TODO(wrengr): since this is the only method of interest, it would	68 # TODO(wrengr); if crash_data were a proper class rather than an

174 # be better IMO to rename it to __call__ to reduce verbosity.	69 # anonymous dict, then we could have regression_range be a property

175 def FindCulprit(self, signature, platform, stack_trace, crashed_version,	70 # that's automatically computed from historical_metadata; and then we

176 regression_range):	71 # could lift this implementation to the Findit base class.

177 """Finds culprits for a Chrome crash.	72 # TODO(wrengr): we misplaced the coverage test; find it!

	73 @ndb.transactional

	74 def _NeedsNewAnalysis(self, crash_data):

	75 crash_identifiers = crash_data['crash_identifiers']

	76 historical_metadata = crash_data['customized_data']['historical_metadata']

	77 model = self.GetAnalysis(crash_identifiers)

	78 # N.B., for mocking reasons, we must not call DetectRegressionRange

	79 # directly, but rather must access it indirectly through the module.

	80 new_regression_range = detect_regression_range.DetectRegressionRange(

	81 historical_metadata)

	82 if (model and not model.failed and

	83 new_regression_range == model.regression_range):

	84 logging.info('The analysis of %s has already been done.',

	85 repr(crash_identifiers))

	86 return False

178	87

179 Args:	88 if not model:

180 signature (str): The signature of a crash on the Chrome crash server.	89 model = self.CreateAnalysis(crash_identifiers)

181 platform (str): The platform name, could be 'win', 'mac', 'linux',

182 'android', 'ios', etc.

183 stack_trace (str): A string containing the stack trace of a crash.

184 crashed_version (str): The version of Chrome in which the crash occurred.

185 regression_range (list or None): [good_version, bad_revision] or None.

186	90

187 Returns:	91 crash_data['customized_data']['regression_range'] = new_regression_range

188 A Culprit object.	92 self._InitializeAnalysis(model, crash_data)

189 """	93 model.put()

190 crash_deps = chromium_deps.GetChromeDependency(crashed_version, platform)	94 return True

191 stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)

192 if not stacktrace:

193 logging.warning('Failed to parse the stacktrace %s', stack_trace)

194 # TODO(wrengr): refactor things so we don't need the NullCulprit class.

195 return NullCulprit()

196	95

197 # Get regression deps and crash deps.	96 # TODO(wrengr): we misplaced the coverage test; find it!

198 regression_deps_rolls = {}	97 def CheckPolicy(self, crash_data):

199 if regression_range:	98 # TODO(wrengr): Should we use brackets and let exceptions be thrown?

200 last_good_version, first_bad_version = regression_range	99 crash_identifiers = crash_data['crash_identifiers']

201 logging.info('Find regression range %s:%s', last_good_version,	100 platform = crash_data['platform']

202 first_bad_version)	101 signature = crash_data['signature']

203 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(	102 # TODO(wrengr): clean up this defaulting ugliness.

204 last_good_version, first_bad_version, platform)	103 channel = crash_data.get('customized_data', {}).get('channel', None)

	104 # TODO(katesonia): Remove the default value after adding validity check to

	105 # config.

	106 if platform not in self.config.get(

	107 'supported_platform_list_by_channel', {}).get(channel, []):

	108 # Bail out if either the channel or platform is not supported yet.

	109 logging.info('Analysis of channel %s, platform %s is not supported. '

	110 'No analysis is scheduled for %s',

	111 channel, platform, repr(crash_identifiers))

	112 return None

205	113

206 suspected_cls = findit_for_crash.FindItForCrash(	114 # TODO(katesonia): Remove the default value after adding validity check to

207 stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n)	115 # config.

	116 for blacklist_marker in self.config.get('signature_blacklist_markers', []):

	117 if blacklist_marker in signature:

	118 logging.info('%s signature is not supported. '

	119 'No analysis is scheduled for %s', blacklist_marker,

	120 repr(crash_identifiers))

	121 return None

208	122

209 crash_stack = stacktrace.crash_stack	123 # TODO(wrengr): should we clone \|crash_data\| rather than mutating it?

210 suspected_project = self.project_classifier.Classify(	124 crash_data['platform'] = self.RenamePlatform(platform)

211 suspected_cls, crash_stack)	125 return crash_data

212	126

213 suspected_components = self.component_classifier.Classify(

214 suspected_cls, crash_stack)

215	127

216 return Culprit(suspected_project, suspected_components, suspected_cls,	128 # TODO(wrengr): we misplaced the coverage test; find it!

217 regression_range)	129 class FinditForCracas(FinditForChromeCrash):

	130 @classmethod

	131 def ClientID(cls):

	132 return CrashClient.CRACAS

	133

	134 def CreateAnalysis(self, crash_identifiers):

	135 # TODO: inline CracasCrashAnalysis.Create stuff here.

	136 return CracasCrashAnalysis.Create(crash_identifiers)

	137

	138 def GetAnalysis(self, crash_identifiers):

	139 # TODO: inline CracasCrashAnalysis.Get stuff here.

	140 return CracasCrashAnalysis.Get(crash_identifiers)

	141

	142

	143 # TODO(wrengr): we misplaced the coverage test; find it!

	144 class FinditForFracas(FinditForChromeCrash):

	145 @classmethod

	146 def ClientID(cls):

	147 return CrashClient.FRACAS

	148

	149 def CreateAnalysis(self, crash_identifiers):

	150 # TODO: inline FracasCrashAnalysis.Create stuff here.

	151 return FracasCrashAnalysis.Create(crash_identifiers)

	152

	153 def GetAnalysis(self, crash_identifiers):

	154 # TODO: inline FracasCrashAnalysis.Get stuff here.

	155 return FracasCrashAnalysis.Get(crash_identifiers)

OLD	NEW

« appengine/findit/crash/findit.py ('K') | « appengine/findit/crash/findit.py ('k') | appengine/findit/crash/findit_for_client.py » ('j') | no next file with comments »