appengine/findit/crash/findit.py - Issue 2663063007: [Predator] Switch from anonymous dict to CrashData.

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2663063007: [Predator] Switch from anonymous dict to CrashData. (Closed)

Patch Set: Fix nits. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « appengine/findit/crash/crash_report_with_dependencies.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | appengine/findit/crash/loglinear/changelist_features/test/min_distance_test.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import copy	5 import copy

6 import json	6 import json

7 import logging	7 import logging

8	8

9 from google.appengine.ext import ndb	9 from google.appengine.ext import ndb

10	10

11 from common import appengine_util	11 from common import appengine_util

12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher	12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

13 from common import constants	13 from common import constants

14 from crash.component import Component	14 from crash.component import Component

15 from crash.component_classifier import ComponentClassifier	15 from crash.component_classifier import ComponentClassifier

16 from crash.crash_report import CrashReport

17 from crash.project import Project	16 from crash.project import Project

18 from crash.project_classifier import ProjectClassifier	17 from crash.project_classifier import ProjectClassifier

19 from libs import time_util	18 from libs import time_util

20 from model import analysis_status	19 from model import analysis_status

21 from model.crash.crash_config import CrashConfig

22	20

23	21

24 # TODO(http://crbug.com/659346): since most of our unit tests are	22 # TODO(http://crbug.com/659346): since most of our unit tests are

25 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.	23 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.

26 # However, now we're missing coverage for most of this file (due to the	24 # However, now we're missing coverage for most of this file (due to the

27 # buggy way coverage is computed). Need to add a bunch of new unittests	25 # buggy way coverage is computed). Need to add a bunch of new unittests

28 # to get coverage back up.	26 # to get coverage back up.

29	27

30 # TODO: this class depends on ndb stuff, and should therefore move to	28 # TODO: this class depends on ndb stuff, and should therefore move to

31 # cr-culprit-finder/service/predator as part of the big reorganization.	29 # cr-culprit-finder/service/predator as part of the big reorganization.

32 # This class should be renamed to avoid confustion between Findit and Predator.	30 # This class should be renamed to avoid confustion between Findit and Predator.

33 # Think of a good name (e.g.'PredatorApp') for this class.	31 # Think of a good name (e.g.'PredatorApp') for this class.

34 class Findit(object):	32 class Findit(object):

35	33

36 def __init__(self, get_repository, config):	34 def __init__(self, get_repository, config):

37 """	35 """

38 Args:	36 Args:

39 get_repository (callable): a function from DEP urls to ``Repository``	37 get_repository (callable): a function from DEP urls to ``Repository``

40 objects, so we can get changelogs and blame for each dep. Notably,	38 objects, so we can get changelogs and blame for each dep. Notably,

41 to keep the code here generic, we make no assumptions about	39 to keep the code here generic, we make no assumptions about

42 which subclass of ``Repository`` this function returns. Thus,	40 which subclass of ``Repository`` this function returns. Thus,

43 it is up to the caller to decide what class to return and handle	41 it is up to the caller to decide what class to return and handle

44 any other arguments that class may require (e.g., an http client	42 any other arguments that class may require (e.g., an http client

45 for ``GitilesRepository``).	43 for ``GitilesRepository``).

46 config (ndb.CrashConfig): Config for clients and project and component	44 config (ndb.CrashConfig): Config for clients and project and component

47 classifiers.	45 classifiers.

48 """	46 """

49 self._get_repository = get_repository	47 self._get_repository = get_repository

50 self._dep_fetcher = ChromeDependencyFetcher(get_repository)

51	48

52 # The top_n is the number of frames we want to check to get project	49 # The top_n is the number of frames we want to check to get project

53 # classifications.	50 # classifications.

54 projects = [Project(name, path_regexs, function_regexs, host_directories)	51 projects = [Project(name, path_regexs, function_regexs, host_directories)

55 for name, path_regexs, function_regexs, host_directories	52 for name, path_regexs, function_regexs, host_directories

56 in config.project_classifier['project_path_function_hosts']]	53 in config.project_classifier['project_path_function_hosts']]

57 self._project_classifier = ProjectClassifier(	54 self._project_classifier = ProjectClassifier(

58 projects, config.project_classifier['top_n'],	55 projects, config.project_classifier['top_n'],

59 config.project_classifier['non_chromium_project_rank_priority'])	56 config.project_classifier['non_chromium_project_rank_priority'])

60	57

	58 # The top_n is the number of frames we want to check to get component

	59 # classifications.

61 components = [Component(component_name, path_regex, function_regex)	60 components = [Component(component_name, path_regex, function_regex)

62 for path_regex, function_regex, component_name	61 for path_regex, function_regex, component_name

63 in config.component_classifier['path_function_component']]	62 in config.component_classifier['path_function_component']]

64 self._component_classifier = ComponentClassifier(	63 self._component_classifier = ComponentClassifier(

65 components, config.component_classifier['top_n'])	64 components, config.component_classifier['top_n'])

66	65

67 self._config = config	66 self._config = config

68	67

69 # TODO(http://crbug.com/659354): because self.client is volatile,

70 # we need some way of updating the Predator instance whenever the

71 # config changes. How to do that cleanly?

72 self._predator = None

73 self._stacktrace_parser = None

74

75 # This is a class method because it should be the same for all	68 # This is a class method because it should be the same for all

76 # instances of this class. We can in fact call class methods on	69 # instances of this class. We can in fact call class methods on

77 # instances (not just on the class itself), so we could in principle	70 # instances (not just on the class itself), so we could in principle

78 # get by with just this method. However, a @classmethod is treated	71 # get by with just this method. However, a @classmethod is treated

79 # syntactically like a method, thus we'd need to have the ``()`` at the	72 # syntactically like a method, thus we'd need to have the ``()`` at the

80 # end, unlike for a @property. Thus we have both the class method and	73 # end, unlike for a @property. Thus we have both the class method and

81 # the property, in order to simulate a class property.	74 # the property, in order to simulate a class property.

82 @classmethod	75 @classmethod

83 def _ClientID(cls): # pragma: no cover	76 def _ClientID(cls): # pragma: no cover

84 """Get the client id for this class.	77 """Get the client id for this class.

85	78

86 This class method is private. Unless you really need to access	79 This class method is private. Unless you really need to access

87 this method directly for some reason, you should use the ``client_id``	80 this method directly for some reason, you should use the ``client_id``

88 property instead.	81 property instead.

89	82

90 Returns:	83 Returns:

91 A string which is member of the CrashClient enumeration.	84 A string which is member of the CrashClient enumeration.

92 """	85 """

93 if cls is Findit:	86 if cls is Findit:

94 logging.warning('Findit is abstract, '	87 logging.warning('Findit is abstract, '

95 'but someone constructed an instance and called _ClientID')	88 'but someone constructed an instance and called _ClientID')

96 else:	89 else:

97 logging.warning('Findit subclass %s forgot to implement _ClientID',	90 logging.warning('Findit subclass %s forgot to implement _ClientID',

98 cls.__name__)	91 cls.__name__)

99 raise NotImplementedError()	92 raise NotImplementedError()

100	93

	94 def _Predator(self):

	95 raise NotImplementedError()

	96

101 @property	97 @property

102 def client_id(self):	98 def client_id(self):

103 """Get the client id from the class of this object.	99 """Get the client id from the class of this object.

104	100

105 N.B., this property is static and should not be overridden."""	101 N.B., this property is static and should not be overridden."""

106 return self._ClientID()	102 return self._ClientID()

107	103

108 # TODO(http://crbug.com/659354): can we remove the dependency on

109 # CrashConfig entirely? It'd be better to receive method calls

110 # whenever things change, so that we know the change happened (and

111 # what in particular changed) so that we can update our internal state

112 # as appropriate.

113 @property	104 @property

114 def client_config(self):	105 def client_config(self):

115 """Get the current value of the client config for the class of this object.	106 """Get the current value of the client config for the class of this object.

116	107

117 N.B., this property is volatile and may change asynchronously.	108 N.B., this property is volatile and may change asynchronously.

118	109

119 If the event of an error this method will return ``None``. That we do	110 If the event of an error this method will return ``None``. That we do

120 not return the empty dict is intentional. All of the circumstances	111 not return the empty dict is intentional. All of the circumstances

121 which would lead to this method returning ``None`` indicate	112 which would lead to this method returning ``None`` indicate

122 underlying bugs in code elsewhere. (E.g., creating instances of	113 underlying bugs in code elsewhere. (E.g., creating instances of

123 abstract classes, implementing a subclass which is not suppported by	114 abstract classes, implementing a subclass which is not suppported by

124 ``CrashConfig``, etc.) Because we return ``None``, any subsequent	115 ``CrashConfig``, etc.) Because we return ``None``, any subsequent

125 calls to ``__getitem__`` or ``get`` will raise method-missing errors;	116 calls to ``__getitem__`` or ``get`` will raise method-missing errors;

126 which serve to highlight the underlying bug. Whereas, if we silently	117 which serve to highlight the underlying bug. Whereas, if we silently

127 returned the empty dict, then calls to ``get`` would "work" just	118 returned the empty dict, then calls to ``get`` would "work" just

128 fine; thereby masking the underlying bug!	119 fine; thereby masking the underlying bug!

129 """	120 """

130 return self._config.GetClientConfig(self.client_id)	121 return self._config.GetClientConfig(self.client_id)

131	122

132 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or	123 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or

133 # something like that.	124 # something like that.

134 def RenamePlatform(self, platform):	125 def RenamePlatform(self, platform):

135 """Remap the platform to a different one, based on the config."""	126 """Remap the platform to a different one, based on the config."""

136 # TODO(katesonia): Remove the default value after adding validity check to	127 return self.client_config['platform_rename'].get(platform, platform)

137 # config.

138 return self.client_config.get('platform_rename', {}).get(platform, platform)

139

140 def CheckPolicy(self, crash_data): # pylint: disable=W0613

141 """Check whether this client supports analyzing the given report.

142

143 Some clients only support analysis for crashes on certain platforms

144 or channels, etc. This method checks to see whether this client can

145 analyze the given crash. The default implementation on the Findit

146 base class returns None for everything, so that unsupported clients

147 reject everything, as expected.

148

149 Args:

150 crash_data (JSON): ??

151

152 Returns:

153 If satisfied, we return the ``crash_data`` which may have had some

154 fields modified. Otherwise returns None.

155 """

156 return None

157	128

158 # TODO(http://crbug.com/644476): rename this to something like	129 # TODO(http://crbug.com/644476): rename this to something like

159 # _NewAnalysis, since it only does the "allocation" and needs to/will	130 # _NewAnalysis, since it only does the "allocation" and needs to/will

160 # be followed up with _InitializeAnalysis anyways.	131 # be followed up with _InitializeAnalysis anyways.

161 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613	132 def CreateAnalysis(self, crash_identifiers): # pragma: no cover

162 return None	133 raise NotImplementedError()

163	134

164 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613	135 def GetAnalysis(self, crash_identifiers): # pragma: no cover

165 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists.	136 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists.

166	137

167 Args:	138 Args:

168 crash_identifiers (JSON): ??	139 crash_identifiers (JSON): ??

169	140

170 Returns:	141 Returns:

171 If a CrashAnalysis ndb.Model already exists for the	142 If a CrashAnalysis ndb.Model already exists for the

172 ``crash_identifiers``, then we return it. Otherwise, returns None.	143 ``crash_identifiers``, then we return it. Otherwise, returns None.

173 """	144 """

174 return None	145 raise NotImplementedError()

175	146

176 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.	147 def GetCrashData(self, raw_crash_data): # pragma: no cover

177 # TODO(http://crbug.com/659346): coverage tests for this class, not	148 raise NotImplementedError()

178 # just for FinditForFracas.

179 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover

180 """(Re)Initialize a CrashAnalysis ndb.Model, but do not ``put()`` it yet.

181	149

182 This method is only ever called from _NeedsNewAnalysis which is only	150 def _CheckPolicy(self, crash_data): # pylint: disable=W0613

183 ever called from ScheduleNewAnalysis. It is used for filling in the	151 """Check whether this client supports analyzing the given report.

184 fields of a CrashAnalysis ndb.Model for the first time (though it

185 can also be used to re-initialize a given CrashAnalysis). Subclasses

186 should extend (not override) this to (re)initialize any

187 client-specific fields they may have."""

188 # Get rid of any previous values there may have been.

189 model.Reset()

190	152

191 # Set the version.	153 Some clients only support analysis for crashes on certain platforms

192 # ``handlers.crash.test.crash_handler_test.testAnalysisScheduled``	154 or channels, etc. This method checks to see whether this client can

193 # provides and expects this field to be called 'chrome_version',	155 analyze the given crash. The default implementation on the Findit

194 # whereas everyone else (e.g., in ``crash.test.crash_pipeline_test``	156 base class returns None for everything, so that unsupported clients

195 # the tests ``testAnalysisNeededIfNoAnalysisYet``,	157 reject everything, as expected.

196 # ``testRunningAnalysisNoSuspectsFound``, ``testRunningAnalysis``,

197 # ``testAnalysisNeededIfLastOneFailed``,

198 # ``testRunningAnalysisWithSuspectsCls``) expects it to be called

199 # 'crashed_version'. The latter is the better/more general name,

200 # so the former needs to be changed in order to get rid of this

201 # defaulting ugliness.

202 model.crashed_version = crash_data.get('crashed_version',

203 crash_data.get('chrome_version', None))

204	158

205 # Set (other) common properties.	159 Args:

206 model.stack_trace = crash_data['stack_trace']	160 raw_crash_data (JSON): ??

207 model.signature = crash_data['signature']

208 model.platform = crash_data['platform']

209 # TODO(wrengr): The only reason to have _InitializeAnalysis as a

210 # method of the Findit class rather than as a method on CrashAnalysis

211 # is so we can assert that crash_data['client_id'] == self.client_id.

212 # So, either we should do that, or else we should move this to be

213 # a method on CrashAnalysis.

214 model.client_id = self.client_id

215 model.regression_range = crash_data.get('regression_range', None)

216	161

217 # Set progress properties.	162 Returns:

218 model.status = analysis_status.PENDING	163 Boolean to indicate whether the crash passed the policy check.

219 model.requested_time = time_util.GetUTCNow()	164 """

	165 if not self.client_config:

	166 logging.info('No configuration of client %s, analysis is not supported',

	167 self.client_id)

	168 return True

	169

	170 for blacklist_marker in self.client_config['signature_blacklist_markers']:

	171 if blacklist_marker in crash_data.signature:

	172 logging.info('%s signature is not supported.', blacklist_marker)

	173 return False

	174

	175 return True

220	176

221 @ndb.transactional	177 @ndb.transactional

222 def _NeedsNewAnalysis(self, crash_data):	178 def NeedsNewAnalysis(self, crash_data):

223 raise NotImplementedError()	179 """Checks if the crash needs new anlysis.

224	180

225 # TODO(wrengr): does the parser actually need the version, signature,	181 If a new analysis needs to be scheduled, initialize a ``CrashAnalysis``

226 # and platform? If not, then we should be able to just pass the string	182 model using crash_data and put it in datastore for later culprit

227 # to be parsed (which would make a lot more sense than passing the	183 analyzing.

228 # whole model).

229 # TODO(http://crbug.com/659346): coverage tests for this class, not

230 # just for FinditForFracas.

231 def ParseStacktrace(self, model): # pragma: no cover

232 """Parse a CrashAnalysis's ``stack_trace`` string into a Stacktrace object.

233	184

234 Args:	185 Args:

235 model (CrashAnalysis): The model containing the stack_trace string	186 crash_data (CrashData): Crash buffer that contains all the information

236 to be parsed.	187 about the crash.

237	188

238 Returns:	189 Returns:

239 On success, returns a Stacktrace object; on failure, returns None.	190 Boolean of whether a new analysis needs to be scheduled.

240 """	191 """

241 # Use up-to-date ``top_n`` in self.client_config to filter top n frames.	192 # Check policy and modify the raw_crash_data as needed.

242 stacktrace = self._stacktrace_parser.Parse(	193 if not self._CheckPolicy(crash_data):

243 model.stack_trace,	194 logging.info('The analysis of %s is not supported, task will not be '

244 self._dep_fetcher.GetDependency(model.crashed_version, model.platform),	195 'scheduled.', str(crash_data.identifiers))

245 model.signature,	196 return False

246 self.client_config.get('top_n'))

247 if not stacktrace:

248 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

249 return None

250	197

251 return stacktrace	198 # Rename platform if configured.

	199 crash_data.platform = self.RenamePlatform(crash_data.platform)

252	200

253 def ProcessResultForPublishing(self, result, key):	201 model = self.GetAnalysis(crash_data.identifiers)

	202 # N.B., for mocking reasons, we must not call DetectRegressionRange

	203 # directly, but rather must access it indirectly through the module.
	wrengr (wrong one) 2017/02/11 00:11:34 This comment is no longer relevant, since we don't This comment is no longer relevant, since we don't call DetectRegressionRange here anymore Sharu Jiang 2017/02/11 00:41:20 Done. Show quoted text On 2017/02/11 00:11:34, wrengr (wrong one) wrote: > This comment is no longer relevant, since we don't call DetectRegressionRange > here anymore Done.
	204 if (model and not model.failed and

	205 crash_data.regression_range == (tuple(model.regression_range)

	206 if model.regression_range else

	207 model.regression_range)):
	wrengr (wrong one) 2017/02/11 00:11:34 this seems a bit strange/convoluted. Why not do so this seems a bit strange/convoluted. Why not do something like ``tuple(model.regression_range or [])``? Sharu Jiang 2017/02/11 00:41:20 If a crash doesn't have regression range, it shoul Show quoted text On 2017/02/11 00:11:34, wrengr (wrong one) wrote: > this seems a bit strange/convoluted. Why not do something like > ``tuple(model.regression_range or [])``? If a crash doesn't have regression range, it should be None instead of empty tuple. wrengr (wrong one) 2017/02/11 01:14:54 ah. Maybe make the else-branch explicitly return Show quoted text On 2017/02/11 00:41:20, Sharu Jiang wrote: > On 2017/02/11 00:11:34, wrengr (wrong one) wrote: > > this seems a bit strange/convoluted. Why not do something like > > ``tuple(model.regression_range or [])``? > > If a crash doesn't have regression range, it should be None instead of empty > tuple. ah. Maybe make the else-branch explicitly return None rather than returning model.regression_range? Or maybe make the regression_range property so that it's already a tuple-or-None so we don't need to do anything here? Sharu Jiang 2017/02/13 18:40:05 Done. Show quoted text On 2017/02/11 01:14:54, wrengr (wrong one) wrote: > On 2017/02/11 00:41:20, Sharu Jiang wrote: > > On 2017/02/11 00:11:34, wrengr (wrong one) wrote: > > > this seems a bit strange/convoluted. Why not do something like > > > ``tuple(model.regression_range or [])``? > > > > If a crash doesn't have regression range, it should be None instead of empty > > tuple. > > ah. > > Maybe make the else-branch explicitly return None rather than returning > model.regression_range? Or maybe make the regression_range property so that it's > already a tuple-or-None so we don't need to do anything here? Done.
	208 logging.info('The analysis of %s has already been done.',

	209 repr(crash_data.identifiers))

	210 return False

	211

	212 model = model or self.CreateAnalysis(crash_data.identifiers)

	213 model.Initialize(crash_data)

	214 model.put()

	215 return True

	216

	217 def ProcessResultForPublishing(self, result, key): # pragma: no cover

254 """Client specific processing of result data for publishing."""	218 """Client specific processing of result data for publishing."""

255 raise NotImplementedError()	219 raise NotImplementedError()

256	220

257 def GetPublishableResult(self, crash_identifiers, analysis):	221 def GetPublishableResult(self, crash_identifiers, analysis):

258 """Convert a culprit result into a publishable result for client.	222 """Converts a culprit result into a publishable result for client.

259	223

260 Note, this function must be called by a concrete subclass of CrashAnalysis	224 Note, this function must be called by a concrete subclass of CrashAnalysis

261 which implements the ProcessResultForPublishing method.	225 which implements the ProcessResultForPublishing method.

262	226

263 Args:	227 Args:

264 crash_identifiers (dict): Dict containing identifiers that can uniquely	228 crash_identifiers (dict): Dict containing identifiers that can uniquely

265 identify CrashAnalysis entity.	229 identify CrashAnalysis entity.

266 analysis (CrashAnalysis model): Model containing culprit result and other	230 analysis (CrashAnalysis model): Model containing culprit result and other

267 analysis information.	231 analysis information.

268	232

(...skipping 15 matching lines...) Expand all Loading...
284 'client_id': self.client_id,	248 'client_id': self.client_id,

285 'result': result,	249 'result': result,

286 }	250 }

287	251

288 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``;	252 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``;

289 # we should be able to adjust things so that we only need to take in	253 # we should be able to adjust things so that we only need to take in

290 # ``crash_identifiers``, or a CrashReport, rather than taking in the	254 # ``crash_identifiers``, or a CrashReport, rather than taking in the

291 # whole model. And/or, we should just inline this there.	255 # whole model. And/or, we should just inline this there.

292 # TODO(http://crbug.com/659346): coverage tests for this class, not	256 # TODO(http://crbug.com/659346): coverage tests for this class, not

293 # just for FinditForFracas.	257 # just for FinditForFracas.

294 def FindCulprit(self, model): # pragma: no cover	258 def FindCulprit(self, crash_report): # pragma: no cover

295 """Given a CrashAnalysis ndb.Model, return a Culprit."""	259 """Given a CrashAnalysis ndb.Model, returns a Culprit."""

296 stacktrace = self.ParseStacktrace(model)	260 return self._Predator().FindCulprit(crash_report)

297 if stacktrace is None:

298 return None

299

300 return self._predator.FindCulprit(CrashReport(

301 crashed_version = model.crashed_version,

302 signature = model.signature,

303 platform = model.platform,

304 stacktrace = stacktrace,

305 regression_range = model.regression_range))

OLD	NEW