appengine/findit/crash/findit.py - Issue 2663063007: [Predator] Switch from anonymous dict to CrashData.

Side by Side Diff: appengine/findit/crash/findit.py

Issue 2663063007: [Predator] Switch from anonymous dict to CrashData. (Closed)

Patch Set: Rebase. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import copy	5 import copy

6 import json	6 import json

7 import logging	7 import logging

8	8

9 from google.appengine.ext import ndb	9 from google.appengine.ext import ndb

10	10

11 from common import appengine_util	11 from common import appengine_util

12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher	12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

13 from common import constants	13 from common import constants

14 from crash.crash_report import CrashReport	14 from crash.component import Component

	15 from crash.component_classifier import ComponentClassifier

	16 from crash.project import Project

	17 from crash.project_classifier import ProjectClassifier

15 from libs import time_util	18 from libs import time_util

16 from model import analysis_status	19 from model import analysis_status

17 from model.crash.crash_config import CrashConfig

18	20

19	21

20 # TODO(http://crbug.com/659346): since most of our unit tests are	22 # TODO(http://crbug.com/659346): since most of our unit tests are

21 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.	23 # FinditForFracas-specific, wrengr moved them to findit_for_chromecrash_test.py.

22 # However, now we're missing coverage for most of this file (due to the	24 # However, now we're missing coverage for most of this file (due to the

23 # buggy way coverage is computed). Need to add a bunch of new unittests	25 # buggy way coverage is computed). Need to add a bunch of new unittests

24 # to get coverage back up.	26 # to get coverage back up.

25	27

26 # TODO: this class depends on ndb stuff, and should therefore move to	28 # TODO: this class depends on ndb stuff, and should therefore move to

27 # cr-culprit-finder/service/predator as part of the big reorganization.	29 # cr-culprit-finder/service/predator as part of the big reorganization.

28 # This class should be renamed to avoid confustion between Findit and Predator.	30 # This class should be renamed to avoid confustion between Findit and Predator.

29 # Think of a good name (e.g.'PredatorApp') for this class.	31 # Think of a good name (e.g.'PredatorApp') for this class.

30 class Findit(object):	32 class Findit(object):

31	33

32 def __init__(self, get_repository):	34 def __init__(self, get_repository, config):

33 """	35 """

34 Args:	36 Args:

35 get_repository (callable): a function from DEP urls to ``Repository``	37 get_repository (callable): a function from DEP urls to ``Repository``

36 objects, so we can get changelogs and blame for each dep. Notably,	38 objects, so we can get changelogs and blame for each dep. Notably,

37 to keep the code here generic, we make no assumptions about	39 to keep the code here generic, we make no assumptions about

38 which subclass of ``Repository`` this function returns. Thus,	40 which subclass of ``Repository`` this function returns. Thus,

39 it is up to the caller to decide what class to return and handle	41 it is up to the caller to decide what class to return and handle

40 any other arguments that class may require (e.g., an http client	42 any other arguments that class may require (e.g., an http client

41 for ``GitilesRepository``).	43 for ``GitilesRepository``).

	44 config (ndb.CrashConfig): Config for clients and project and component

	45 classifiers.

42 """	46 """

43 self._get_repository = get_repository	47 self._get_repository = get_repository

44 self._dep_fetcher = ChromeDependencyFetcher(get_repository)	48

45 # TODO(http://crbug.com/659354): because self.client is volatile,	49 # The top_n is the number of frames we want to check to get project

46 # we need some way of updating the Predator instance whenever the	50 # classifications.

47 # config changes. How to do that cleanly?	51 projects = [Project(name, path_regexs, function_regexs, host_directories)

48 self._predator = None	52 for name, path_regexs, function_regexs, host_directories

49 self._stacktrace_parser = None	53 in config.project_classifier['project_path_function_hosts']]

	54 self._project_classifier = ProjectClassifier(

	55 projects, config.project_classifier['top_n'],

	56 config.project_classifier['non_chromium_project_rank_priority'])
	wrengr 2017/02/03 18:34:47 newline between 56 and 57? newline between 56 and 57? Sharu Jiang 2017/02/03 23:13:38 Done. Show quoted text On 2017/02/03 18:34:47, wrengr wrote: > newline between 56 and 57? Done.
	57 # The top_n is the number of frames we want to check to get component

	58 # classifications.

	59 components = [Component(component_name, path_regex, function_regex)

	60 for path_regex, function_regex, component_name

	61 in config.component_classifier['path_function_component']]

	62 self._component_classifier = ComponentClassifier(

	63 components, config.component_classifier['top_n'])

	64

	65 # Get client-specific config.

	66 self._client_config = config.GetClientConfig(self.client_id)

50	67

51 # This is a class method because it should be the same for all	68 # This is a class method because it should be the same for all

52 # instances of this class. We can in fact call class methods on	69 # instances of this class. We can in fact call class methods on

53 # instances (not just on the class itself), so we could in principle	70 # instances (not just on the class itself), so we could in principle

54 # get by with just this method. However, a @classmethod is treated	71 # get by with just this method. However, a @classmethod is treated

55 # syntactically like a method, thus we'd need to have the ``()`` at the	72 # syntactically like a method, thus we'd need to have the ``()`` at the

56 # end, unlike for a @property. Thus we have both the class method and	73 # end, unlike for a @property. Thus we have both the class method and

57 # the property, in order to simulate a class property.	74 # the property, in order to simulate a class property.

58 @classmethod	75 @classmethod

59 def _ClientID(cls): # pragma: no cover	76 def _ClientID(cls): # pragma: no cover

60 """Get the client id for this class.	77 """Get the client id for this class.

61	78

62 This class method is private. Unless you really need to access	79 This class method is private. Unless you really need to access

63 this method directly for some reason, you should use the ``client_id``	80 this method directly for some reason, you should use the ``client_id``

64 property instead.	81 property instead.

65	82

66 Returns:	83 Returns:

67 A string which is member of the CrashClient enumeration.	84 A string which is member of the CrashClient enumeration.

68 """	85 """

69 if cls is Findit:	86 if cls is Findit:

70 logging.warning('Findit is abstract, '	87 logging.warning('Findit is abstract, '

71 'but someone constructed an instance and called _ClientID')	88 'but someone constructed an instance and called _ClientID')

72 else:	89 else:

73 logging.warning('Findit subclass %s forgot to implement _ClientID',	90 logging.warning('Findit subclass %s forgot to implement _ClientID',

74 cls.__name__)	91 cls.__name__)

75 raise NotImplementedError()	92 raise NotImplementedError()

76	93

	94 def _Predator(self):

	95 raise NotImplementedError()
	wrengr 2017/02/03 18:34:46 What's this for? What's this for? Sharu Jiang 2017/02/03 23:13:37 Since the ``FindCulprit`` will use self._predator, Show quoted text On 2017/02/03 18:34:46, wrengr wrote: > What's this for? Since the ``FindCulprit`` will use self._predator, so I want to force people to implement it in subclasses and not call it on this abstract class.
	96

77 @property	97 @property

78 def client_id(self):	98 def client_id(self):

79 """Get the client id from the class of this object.	99 """Get the client id from the class of this object.

80	100

81 N.B., this property is static and should not be overridden."""	101 N.B., this property is static and should not be overridden."""

82 return self._ClientID()	102 return self._ClientID()

83	103

84 # TODO(http://crbug.com/659354): can we remove the dependency on

85 # CrashConfig entirely? It'd be better to receive method calls

86 # whenever things change, so that we know the change happened (and

87 # what in particular changed) so that we can update our internal state

88 # as appropriate.

89 @property	104 @property

90 def config(self):	105 def client_config(self):

91 """Get the current value of the client config for the class of this object.	106 """Get the current value of the client config for the class of this object.

92	107

93 N.B., this property is volatile and may change asynchronously.	108 N.B., this property is volatile and may change asynchronously.

94	109

95 If the event of an error this method will return ``None``. That we do	110 If the event of an error this method will return ``None``. That we do

96 not return the empty dict is intentional. All of the circumstances	111 not return the empty dict is intentional. All of the circumstances

97 which would lead to this method returning ``None`` indicate	112 which would lead to this method returning ``None`` indicate

98 underlying bugs in code elsewhere. (E.g., creating instances of	113 underlying bugs in code elsewhere. (E.g., creating instances of

99 abstract classes, implementing a subclass which is not suppported by	114 abstract classes, implementing a subclass which is not suppported by

100 ``CrashConfig``, etc.) Because we return ``None``, any subsequent	115 ``CrashConfig``, etc.) Because we return ``None``, any subsequent

101 calls to ``__getitem__`` or ``get`` will raise method-missing errors;	116 calls to ``__getitem__`` or ``get`` will raise method-missing errors;

102 which serve to highlight the underlying bug. Whereas, if we silently	117 which serve to highlight the underlying bug. Whereas, if we silently

103 returned the empty dict, then calls to ``get`` would "work" just	118 returned the empty dict, then calls to ``get`` would "work" just

104 fine; thereby masking the underlying bug!	119 fine; thereby masking the underlying bug!

105 """	120 """

106 return CrashConfig.Get().GetClientConfig(self.client_id)	121 return self._client_config

107	122

108 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or	123 # TODO(http://crbug.com/644476): rename to CanonicalizePlatform or

109 # something like that.	124 # something like that.

110 def RenamePlatform(self, platform):	125 def RenamePlatform(self, platform):

111 """Remap the platform to a different one, based on the config."""	126 """Remap the platform to a different one, based on the config."""

112 # TODO(katesonia): Remove the default value after adding validity check to	127 return self.client_config['platform_rename'].get(platform, platform)
	wrengr 2017/02/03 18:34:46 would be nice if ``platform_rename`` were a proper would be nice if ``platform_rename`` were a property of the config rather than a key Sharu Jiang 2017/02/03 23:13:38 Now the client config are all dicts, later we may Show quoted text On 2017/02/03 18:34:46, wrengr wrote: > would be nice if ``platform_rename`` were a property of the config rather than a > key Now the client config are all dicts, later we may refactor it.
113 # config.

114 return self.config.get('platform_rename', {}).get(platform, platform)

115	128

116 def CheckPolicy(self, crash_data): # pylint: disable=W0613	129 # TODO(http://crbug.com/644476): rename this to something like

	130 # _NewAnalysis, since it only does the "allocation" and needs to/will

	131 # be followed up with _InitializeAnalysis anyways.

	132 def CreateAnalysis(self, crash_identifiers): # pragma: no cover

	133 raise NotImplementedError()

	134

	135 def GetAnalysis(self, crash_identifiers): # pragma: no cover

	136 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists.

	137

	138 Args:

	139 crash_identifiers (JSON): ??

	140

	141 Returns:

	142 If a CrashAnalysis ndb.Model already exists for the

	143 ``crash_identifiers``, then we return it. Otherwise, returns None.

	144 """

	145 raise NotImplementedError()

	146

	147 def GetCrashBuffer(self, crash_data): # pragma: no cover

	148 raise NotImplementedError()

	149

	150 def _CheckPolicy(self, crash_buffer): # pylint: disable=W0613

117 """Check whether this client supports analyzing the given report.	151 """Check whether this client supports analyzing the given report.

118	152

119 Some clients only support analysis for crashes on certain platforms	153 Some clients only support analysis for crashes on certain platforms

120 or channels, etc. This method checks to see whether this client can	154 or channels, etc. This method checks to see whether this client can

121 analyze the given crash. The default implementation on the Findit	155 analyze the given crash. The default implementation on the Findit

122 base class returns None for everything, so that unsupported clients	156 base class returns None for everything, so that unsupported clients

123 reject everything, as expected.	157 reject everything, as expected.

124	158

125 Args:	159 Args:

126 crash_data (JSON): ??	160 crash_data (JSON): ??

127	161

128 Returns:	162 Returns:

129 If satisfied, we return the ``crash_data`` which may have had some	163 If satisfied, we return the ``crash_data`` which may have had some

130 fields modified. Otherwise returns None.	164 fields modified. Otherwise returns None.

131 """	165 """

132 return None	166 if not self.client_config:

	167 logging.info('No configuration of client %s, analysis is not supported',

	168 self.client_id)

	169 return False

133	170

134 # TODO(http://crbug.com/644476): rename this to something like	171 for blacklist_marker in self.client_config['signature_blacklist_markers']:

135 # _NewAnalysis, since it only does the "allocation" and needs to/will	172 if blacklist_marker in crash_buffer.signature:

136 # be followed up with _InitializeAnalysis anyways.	173 logging.info('%s signature is not supported.', blacklist_marker)

137 def CreateAnalysis(self, crash_identifiers): # pylint: disable=W0613	174 return False

138 return None

139	175

140 def GetAnalysis(self, crash_identifiers): # pylint: disable=W0613	176 return True

141 """Returns the CrashAnalysis for the ``crash_identifiers``, if one exists.	177

	178 @ndb.transactional

	179 def NeedsNewAnalysis(self, crash_buffer):

	180 """Checks if the crash needs new anlysis.

	181

	182 If a new analysis needs to be scheduled, initialize a ``CrashAnalysis``

	183 model using crash_buffer and put it in datastore for later culprit

	184 analyzing.

142	185

143 Args:	186 Args:

144 crash_identifiers (JSON): ??	187 crash_buffer (CrashBuffer): Crash buffer that contains all the information

	188 about the crash.

145	189

146 Returns:	190 Returns:

147 If a CrashAnalysis ndb.Model already exists for the	191 Boolean of whether a new analysis needs to be scheduled.

148 ``crash_identifiers``, then we return it. Otherwise, returns None.

149 """	192 """

150 return None	193 # Check policy and modify the crash_data as needed.

	194 if not self._CheckPolicy(crash_buffer):

	195 logging.info('The analysis of %s is not supported, task will not be '

	196 'scheduled.', str(crash_buffer.identifiers))

	197 return False

151	198

152 # TODO(wrengr): this should be a method on CrashAnalysis, not on Findit.	199 # Rename platform if configured.

153 # TODO(http://crbug.com/659346): coverage tests for this class, not	200 crash_buffer.platform = self.RenamePlatform(crash_buffer.platform)

154 # just for FinditForFracas.

155 def _InitializeAnalysis(self, model, crash_data): # pragma: no cover

156 """(Re)Initialize a CrashAnalysis ndb.Model, but do not ``put()`` it yet.

157	201

158 This method is only ever called from _NeedsNewAnalysis which is only	202 model = self.GetAnalysis(crash_buffer.identifiers)

159 ever called from ScheduleNewAnalysis. It is used for filling in the	203 # N.B., for mocking reasons, we must not call DetectRegressionRange

160 fields of a CrashAnalysis ndb.Model for the first time (though it	204 # directly, but rather must access it indirectly through the module.

161 can also be used to re-initialize a given CrashAnalysis). Subclasses	205 if (model and not model.failed and

162 should extend (not override) this to (re)initialize any	206 crash_buffer.regression_range == (tuple(model.regression_range)

163 client-specific fields they may have."""	207 if model.regression_range else

164 # Get rid of any previous values there may have been.	208 model.regression_range)):

165 model.Reset()	209 logging.info('The analysis of %s has already been done.',

	210 repr(crash_buffer.identifiers))

	211 return False

166	212

167 # Set the version.	213 model = model or self.CreateAnalysis(crash_buffer.identifiers)

168 # ``handlers.crash.test.crash_handler_test.testAnalysisScheduled``	214 model.Initialize(crash_buffer)

169 # provides and expects this field to be called 'chrome_version',	215 model.put()

170 # whereas everyone else (e.g., in ``crash.test.crash_pipeline_test``	216 return True

171 # the tests ``testAnalysisNeededIfNoAnalysisYet``,

172 # ``testRunningAnalysisNoSuspectsFound``, ``testRunningAnalysis``,

173 # ``testAnalysisNeededIfLastOneFailed``,

174 # ``testRunningAnalysisWithSuspectsCls``) expects it to be called

175 # 'crashed_version'. The latter is the better/more general name,

176 # so the former needs to be changed in order to get rid of this

177 # defaulting ugliness.

178 model.crashed_version = crash_data.get('crashed_version',

179 crash_data.get('chrome_version', None))

180	217

181 # Set (other) common properties.	218 def ProcessResultForPublishing(self, result, key): # pragma: no cover

182 model.stack_trace = crash_data['stack_trace']

183 model.signature = crash_data['signature']

184 model.platform = crash_data['platform']

185 # TODO(wrengr): The only reason to have _InitializeAnalysis as a

186 # method of the Findit class rather than as a method on CrashAnalysis

187 # is so we can assert that crash_data['client_id'] == self.client_id.

188 # So, either we should do that, or else we should move this to be

189 # a method on CrashAnalysis.

190 model.client_id = self.client_id

191 model.regression_range = crash_data.get('regression_range', None)

192

193 # Set progress properties.

194 model.status = analysis_status.PENDING

195 model.requested_time = time_util.GetUTCNow()

196

197 @ndb.transactional

198 def _NeedsNewAnalysis(self, crash_data):

199 raise NotImplementedError()

200

201 # TODO(wrengr): does the parser actually need the version, signature,

202 # and platform? If not, then we should be able to just pass the string

203 # to be parsed (which would make a lot more sense than passing the

204 # whole model).

205 # TODO(http://crbug.com/659346): coverage tests for this class, not

206 # just for FinditForFracas.

207 def ParseStacktrace(self, model): # pragma: no cover

208 """Parse a CrashAnalysis's ``stack_trace`` string into a Stacktrace object.

209

210 Args:

211 model (CrashAnalysis): The model containing the stack_trace string

212 to be parsed.

213

214 Returns:

215 On success, returns a Stacktrace object; on failure, returns None.

216 """

217 # Use up-to-date ``top_n`` in self.config to filter top n frames.

218 stacktrace = self._stacktrace_parser.Parse(

219 model.stack_trace,

220 self._dep_fetcher.GetDependency(model.crashed_version, model.platform),

221 model.signature,

222 self.config.get('top_n'))

223 if not stacktrace:

224 logging.warning('Failed to parse the stacktrace %s', model.stack_trace)

225 return None

226

227 return stacktrace

228

229 def ProcessResultForPublishing(self, result, key):

230 """Client specific processing of result data for publishing."""	219 """Client specific processing of result data for publishing."""

231 raise NotImplementedError()	220 raise NotImplementedError()

232	221

233 def GetPublishableResult(self, crash_identifiers, analysis):	222 def GetPublishableResult(self, crash_identifiers, analysis):

234 """Convert a culprit result into a publishable result for client.	223 """Converts a culprit result into a publishable result for client.

235	224

236 Note, this function must be called by a concrete subclass of CrashAnalysis	225 Note, this function must be called by a concrete subclass of CrashAnalysis

237 which implements the ProcessResultForPublishing method.	226 which implements the ProcessResultForPublishing method.

238	227

239 Args:	228 Args:

240 crash_identifiers (dict): Dict containing identifiers that can uniquely	229 crash_identifiers (dict): Dict containing identifiers that can uniquely

241 identify CrashAnalysis entity.	230 identify CrashAnalysis entity.

242 analysis (CrashAnalysis model): Model containing culprit result and other	231 analysis (CrashAnalysis model): Model containing culprit result and other

243 analysis information.	232 analysis information.

244	233

(...skipping 15 matching lines...) Expand all Loading...
260 'client_id': self.client_id,	249 'client_id': self.client_id,

261 'result': result,	250 'result': result,

262 }	251 }

263	252

264 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``;	253 # TODO(wrengr): This is only called by ``CrashAnalysisPipeline.run``;

265 # we should be able to adjust things so that we only need to take in	254 # we should be able to adjust things so that we only need to take in

266 # ``crash_identifiers``, or a CrashReport, rather than taking in the	255 # ``crash_identifiers``, or a CrashReport, rather than taking in the

267 # whole model. And/or, we should just inline this there.	256 # whole model. And/or, we should just inline this there.

268 # TODO(http://crbug.com/659346): coverage tests for this class, not	257 # TODO(http://crbug.com/659346): coverage tests for this class, not

269 # just for FinditForFracas.	258 # just for FinditForFracas.

270 def FindCulprit(self, model): # pragma: no cover	259 def FindCulprit(self, crash_report): # pragma: no cover

271 """Given a CrashAnalysis ndb.Model, return a Culprit."""	260 """Given a CrashAnalysis ndb.Model, returns a Culprit."""

272 stacktrace = self.ParseStacktrace(model)	261 return self._Predator().FindCulprit(crash_report)

273 if stacktrace is None:

274 return None

275

276 return self._predator.FindCulprit(CrashReport(

277 crashed_version = model.crashed_version,

278 signature = model.signature,

279 platform = model.platform,

280 stacktrace = stacktrace,

281 regression_range = model.regression_range))

OLD	NEW

« no previous file with comments | « appengine/findit/crash/crash_report_with_dependencies.py ('k') | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »