appengine/findit/crash/findit_for_chromecrash.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit_for_chromecrash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: trying to fix some tests Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import namedtuple

7	6

8 from common import chromium_deps	7 from common import chromium_deps

9 from crash import detect_regression_range	8 from crash.azalea import Azalea

10 from crash import findit_for_crash	9 from crash.changelist_classifier import ChangelistClassifier

11 from crash.chromecrash_parser import ChromeCrashParser	10 from crash.chromecrash_parser import ChromeCrashParser

12 from crash.project_classifier import ProjectClassifier

13 from crash.component_classifier import Component	11 from crash.component_classifier import Component

14 from crash.component_classifier import ComponentClassifier	12 from crash.component_classifier import ComponentClassifier

	13 from crash.crash_report import CrashReport

	14 from crash.culprit import NullCulprit

	15 from crash import detect_regression_range

	16 from crash.findit import Findit

	17 from crash.project_classifier import ProjectClassifier

	18 from crash.type_enums import CrashClient

	19 from model.crash.cracas_crash_analysis import CracasCrashAnalysis

15 from model.crash.crash_config import CrashConfig	20 from model.crash.crash_config import CrashConfig

	21 from model.crash.fracas_crash_analysis import FracasCrashAnalysis

16	22

17 # TODO(katesonia): Remove the default value after adding validity check to	23 # TODO(katesonia): Remove the default value after adding validity check to

18 # config.	24 # config.

19 _DEFAULT_TOP_N = 7	25 _DEFAULT_TOP_N = 7

20	26

21	27 class FinditForChromeCrash(Findit):

22 # TODO(wrengr): move this to its own file, so it can be shared. When we do	28 """Find culprits for crash reports from the Chrome Crash server."""

23 # so, we'll need to also pass in the 'solution' argument for the tag_dict.

24 class Culprit(namedtuple('Culprit',

25 ['project', 'components', 'cls', 'regression_range'])):

26

27 # TODO(wrengr): better name for this method.

28 def ToDicts(self):

29 """Convert this object to a pair of anonymous dicts for JSON.

30

31 Returns:

32 (analysis_result_dict, tag_dict)

33 The analysis result is a dict like below:

34 {

35 # Indicate if Findit found any suspects_cls, project,

36 # components or regression_range.

37 "found": true,

38 "suspected_project": "chromium-v8", # Which project is most suspected.

39 "feedback_url": "https://.."

40 "suspected_cls": [

41 {

42 "revision": "commit-hash",

43 "url": "https://chromium.googlesource.com/chromium/src/+/...",

44 "review_url": "https://codereview.chromium.org/issue-number",

45 "project_path": "third_party/pdfium",

46 "author": "who@chromium.org",

47 "time": "2015-08-17 03:38:16",

48 "reason": "a plain string with '\n' as line break to expla..."

49 "reason": [('MinDistance', 1, 'minimum distance is 0.'),

50 ('TopFrame', 0.9, 'top frame is2nd frame.')],

51 "changed_files": [

52 {"file": "file_name1.cc",

53 "blame_url": "https://...",

54 "info": "minimum distance (LOC) 0, frame #2"},

55 {"file": "file_name2.cc",

56 "blame_url": "https://...",

57 "info": "minimum distance (LOC) 20, frame #4"},

58 ...

59 ],

60 "confidence": 0.60

61 },

62 ...,

63 ],

64 "regression_range": [ # Detected regression range.

65 "53.0.2765.0",

66 "53.0.2766.0"

67 ],

68 "suspected_components": [ # A list of crbug components to file bugs.

69 "Blink>JavaScript"

70 ]

71 }

72

73 The code review url might not always be available, because not all

74 commits go through code review. In that case, commit url should

75 be used instead.

76

77 The tag dict are allowed key/value pairs to tag the analysis result

78 for query and monitoring purpose on Findit side. For allowed keys,

79 please refer to crash_analysis.py and fracas_crash_analysis.py:

80 For results with normal culprit-finding algorithm: {

81 'found_suspects': True,

82 'has_regression_range': True,

83 'solution': 'core_algorithm',

84 }

85 For results using git blame without a regression range: {

86 'found_suspects': True,

87 'has_regression_range': False,

88 'solution': 'blame',

89 }

90 If nothing is found: {

91 'found_suspects': False,

92 }

93 """

94 cls_list = [result.ToDict() for result in self.cls]

95

96 # TODO(wrengr): reformulate the JSON stuff so we can drop fields which

97 # are empty; so that, in turn, we can get rid of the NullCulprit class.

98 return (

99 {

100 'found': (bool(self.project) or

101 bool(self.components) or

102 bool(cls_list) or

103 bool(self.regression_range)),

104 'regression_range': self.regression_range,

105 'suspected_project': self.project,

106 'suspected_components': self.components,

107 'suspected_cls': cls_list,

108 },

109 {

110 'found_suspects': bool(cls_list),

111 'found_project': bool(self.project),

112 'found_components': bool(self.components),

113 'has_regression_range': bool(self.regression_range),

114 'solution': 'core_algorithm',

115 }

116 )

117

118

119 # TODO(wrengr): Either (a) reformulate the unit tests so that FindCulprit

120 # can, in fact, return None; or else (b) reformulate the JSON stuff so

121 # that the various fields are optional, so that we can just use

122 # Culprit('', [], [], None) directly.

123 class NullCulprit(object):

124 """A helper class so FindCulprit doesn't return None.

125

126 This class is analogous to Culprit(None, [], [], None), except that the

127 result of the ToDicts method is more minimalistic."""

128

129 def ToDicts(self):

130 return (

131 {'found': False},

132 {'found_suspects': False,

133 'has_regression_range': False}

134 )

135

136

137 # TODO(wrengr): better name for this class. Given the "findit_for_*.py"

138 # file names, one might suspect that each of those files implements

139 # something analogous to this file, and hence we could make a superclass

140 # to factor out the common bits. However, in truth, all those files

141 # are unrelated and do completely different things.

142 class FinditForChromeCrash(object):

143 """Process crashes from Chrome crash server and find culprits for them.

144

145 Even though this class has only one method, it is helpful because it

146 allows us to cache things which should outlive each call to that method.

147 For example, we store a single ComponentClassifier object so that we

148 only compile the regexes for each Component object once, rather than

149 doing so on each call to FindCulprit. In addition, the class lets

150 us cache various configuration options so that we need not depend

151 on CrashConfig; thereby decoupling the analysis itself from UX concerns

152 about deciding how to run those analyses.

153 """

154 # TODO(wrengr): remove the dependency on CrashConfig entirely, by	29 # TODO(wrengr): remove the dependency on CrashConfig entirely, by

155 # passing the relevant data as arguments to this constructor.	30 # passing the relevant data as arguments to this constructor.

156 def __init__(self):	31 def __init__(self):

	32 super(FinditForChromeCrash, self).__init__()

157 crash_config = CrashConfig.Get()	33 crash_config = CrashConfig.Get()

158 component_classifier_config = crash_config.component_classifier	34 component_classifier_config = crash_config.component_classifier

159	35

160 # TODO(wrengr): why are these two different?	36 self.chromecrash_parser = ChromeCrashParser()
	Sharu Jiang 2016/10/12 22:35:03 this can be renamed to more general name like stac this can be renamed to more general name like stacktrace_parser, since it's already inside findit_for_chromecrash.
161 component_classifier_top_n = component_classifier_config['top_n']

162 self._fracas_top_n = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)

163	37

164 self.component_classifier = ComponentClassifier(	38 # For how these two "top n" differ, see http://crbug.com/644476#c4

165 [Component(component_name, path_regex, function_regex)	39 self.azalea = Azalea(
	Sharu Jiang 2016/10/12 22:35:03 If we lift the FulCulprit to findit class, the aza If we lift the FulCulprit to findit class, the azalea can be lifted too. wrengr 2016/10/18 23:13:54 We can't move this creation of Azalea to the base Show quoted text On 2016/10/12 22:35:03, sharu jiang wrote: > If we lift the FulCulprit to findit class, the azalea can be lifted too. We can't move this creation of Azalea to the base Findit class, since it depends on client-specific details for constructing the different classifier objects. I could break the creation of the ComponentClassifier out into a private method, since that part seems fairly client agnostic. Dunno if it's worth it though.
166 for path_regex, function_regex, component_name	40 cl_classifier = ChangelistClassifier(

167 in component_classifier_config['path_function_component']],	41 top_n_frames = crash_config.fracas.get('top_n', _DEFAULT_TOP_N)),

168 component_classifier_top_n)	42 component_classifier = ComponentClassifier(

	43 [Component(component_name, path_regex, function_regex)

	44 for path_regex, function_regex, component_name

	45 in component_classifier_config['path_function_component']],

	46 component_classifier_config['top_n']),

	47 project_classifier = ProjectClassifier())

169	48

170 # TODO(wrengr); fix ProjectClassifier so it doesn't depend on CrashConfig.	49 # TODO(wrengr): In older versions of the code, this method used to

171 self.project_classifier = ProjectClassifier()	50 # also set \|channel\| and \|historical_metadata\|. However, the values

	51 # for those aren't clear from the one callsite for \|UpdateAnalysis\|

	52 # (i.e., in crash_pipeline.py)

	53 #def ResetAnalysis(self, model, report):

	54 # super(FinditForChromeCrash, self).ResetAnalysis(model, report)

172	55

173 # TODO(wrengr): since this is the only method of interest, it would	56 def CheckPolicy(self, crash_identifiers, report, channel):

174 # be better IMO to rename it to __call__ to reduce verbosity.	57 # TODO(katesonia): Remove the default value after adding validity check to

175 def FindCulprit(self, signature, platform, stack_trace, crashed_version,	58 # config.

176 regression_range):	59 if report.platform not in self.config.get(

177 """Finds culprits for a Chrome crash.	60 'supported_platform_list_by_channel', {}).get(channel, []):

	61 # Bail out if either the channel or platform is not supported yet.

	62 logging.info('Analysis of channel %s, platform %s is not supported. '

	63 'No analysis is scheduled for %s',

	64 channel, report.platform, repr(crash_identifiers))

	65 return None

178	66

179 Args:	67 # TODO(katesonia): Remove the default value after adding validity check to

180 signature (str): The signature of a crash on the Chrome crash server.	68 # config.

181 platform (str): The platform name, could be 'win', 'mac', 'linux',	69 for blacklist_marker in self.config.get('signature_blacklist_markers', []):

182 'android', 'ios', etc.	70 if blacklist_marker in report.signature:

183 stack_trace (str): A string containing the stack trace of a crash.	71 logging.info('%s signature is not supported. '

184 crashed_version (str): The version of Chrome in which the crash occurred.	72 'No analysis is scheduled for %s', blacklist_marker,

185 regression_range (list or None): [good_version, bad_revision] or None.	73 repr(crash_identifiers))

	74 return None

186	75

187 Returns:	76 return report._replace(platform=self.RenamePlatform(report.platform))

188 A Culprit object.	77

189 """	78 # TODO(wrengr): can this implementation be lifted to the Findit

190 crash_deps = chromium_deps.GetChromeDependency(crashed_version, platform)	79 # superclass? (perhaps by abstracting out the stacktrace parsing into
	Sharu Jiang 2016/10/12 17:46:29 This part can be lifted, since it can be shared by This part can be lifted, since it can be shared by fracas/cracas and clusterfuzz. wrengr 2016/10/12 21:14:51 Acknowledged. Show quoted text On 2016/10/12 17:46:29, sharu jiang wrote: > This part can be lifted, since it can be shared by fracas/cracas and > clusterfuzz. Acknowledged.
191 stacktrace = ChromeCrashParser().Parse(stack_trace, crash_deps, signature)	80 # a separate method)

	81 # TODO(wrengr): is the historical metadata stored in the model somewhere?

	82 def FindCulprit(self, model, **kwargs):

	83 crashed_version = model.crashed_version

	84 signature = model.signature

	85 platform = model.platform

	86

	87 stacktrace = self.chromecrash_parser.Parse(model.stack_trace,

	88 chromium_deps.GetChromeDependency(crashed_version, platform),

	89 signature)

192 if not stacktrace:	90 if not stacktrace:

193 logging.warning('Failed to parse the stacktrace %s', stack_trace)	91 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

194 # TODO(wrengr): refactor things so we don't need the NullCulprit class.	92 # TODO(wrengr): refactor things so we don't need the NullCulprit class.

195 return NullCulprit()	93 return NullCulprit()

196	94

197 # Get regression deps and crash deps.	95 regression_range = None

198 regression_deps_rolls = {}	96 historical_metadata = kwargs.get('historical_metadata', None)
	Sharu Jiang 2016/10/12 17:46:29 We already got regression range before running ana We already got regression range before running analysis while determining whether to rerun or not. wrengr 2016/10/12 21:14:51 Is the regression_range stored in the CrashAnalysi Show quoted text On 2016/10/12 17:46:29, sharu jiang wrote: > We already got regression range before running analysis while determining > whether to rerun or not. Is the regression_range stored in the CrashAnalysis model? Or should it be passed in as a separate argument? Sharu Jiang 2016/10/12 22:35:03 It should be stored in CrashAnalysis model before Show quoted text On 2016/10/12 21:14:51, wrengr wrote: > On 2016/10/12 17:46:29, sharu jiang wrote: > > We already got regression range before running analysis while determining > > whether to rerun or not. > > Is the regression_range stored in the CrashAnalysis model? Or should it be > passed in as a separate argument? It should be stored in CrashAnalysis model before running analysis. wrengr 2016/10/18 23:13:54 Done. Show quoted text On 2016/10/12 22:35:03, sharu jiang wrote: > On 2016/10/12 21:14:51, wrengr wrote: > > On 2016/10/12 17:46:29, sharu jiang wrote: > > > We already got regression range before running analysis while determining > > > whether to rerun or not. > > > > Is the regression_range stored in the CrashAnalysis model? Or should it be > > passed in as a separate argument? > > It should be stored in CrashAnalysis model before running analysis. Done.
199 if regression_range:	97 if historical_metadata is not None:

200 last_good_version, first_bad_version = regression_range	98 # N.B., because of the way we do mock testing, we must call

201 logging.info('Find regression range %s:%s', last_good_version,	99 # DetectRegressionRange indirectly through the module, and must not

202 first_bad_version)	100 # call it directly (causing the mock not to be used).

203 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(	101 regression_range = detect_regression_range.DetectRegressionRange(historica l_metadata)

204 last_good_version, first_bad_version, platform)

205	102

206 suspected_cls = findit_for_crash.FindItForCrash(	103 return self.azalea.FindCulprit(CrashReport(

207 stacktrace, regression_deps_rolls, crash_deps, self._fracas_top_n)	104 crashed_version = crashed_version,

	105 signature = signature,

	106 platform = platform,

	107 stacktrace = stacktrace,

	108 regression_range = regression_range))

208	109

209 crash_stack = stacktrace.crash_stack

210 suspected_project = self.project_classifier.Classify(

211 suspected_cls, crash_stack)

212	110

213 suspected_components = self.component_classifier.Classify(	111 class FinditForCracas(FinditForChromeCrash):

214 suspected_cls, crash_stack)	112 @classmethod

	113 def ClientID(cls):

	114 return CrashClient.CRACAS

215	115

216 return Culprit(suspected_project, suspected_components, suspected_cls,	116 def CreateAnalysis(self, crash_identifiers):

217 regression_range)	117 # TODO: inline CracasCrashAnalysis.Create stuff here.

	118 return CracasCrashAnalysis.Create(crash_identifiers)

	119

	120 def GetAnalysis(self, crash_identifiers):

	121 # TODO: inline CracasCrashAnalysis.Get stuff here.

	122 return CracasCrashAnalysis.Get(crash_identifiers)

	123

	124

	125 class FinditForFracas(FinditForChromeCrash):

	126 @classmethod

	127 def ClientID(cls):

	128 return CrashClient.FRACAS

	129

	130 def CreateAnalysis(self, crash_identifiers):

	131 # TODO: inline FracasCrashAnalysis.Create stuff here.

	132 return FracasCrashAnalysis.Create(crash_identifiers)

	133

	134 def GetAnalysis(self, crash_identifiers):

	135 # TODO: inline FracasCrashAnalysis.Get stuff here.

	136 return FracasCrashAnalysis.Get(crash_identifiers)

OLD	NEW