Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(459)

Side by Side Diff: appengine/findit/crash/loglinear/test/changelist_classifier_test.py

Issue 2613153006: [Predator] Add TouchCrashedFileMetaFeature. (Closed)
Patch Set: Add comments. Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import copy 5 import copy
6 import logging 6 import logging
7 import math 7 import math
8 import pprint 8 import pprint
9 9
10 from common.dependency import Dependency 10 from common.dependency import Dependency
11 from common.dependency import DependencyRoll 11 from common.dependency import DependencyRoll
12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 12 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
13 import crash.changelist_classifier as scorer_changelist_classifier 13 import crash.changelist_classifier as scorer_changelist_classifier
14 from crash.crash_report import CrashReport 14 from crash.crash_report import CrashReport
15 from crash.loglinear.changelist_classifier import LogLinearChangelistClassifier 15 from crash.loglinear.changelist_classifier import LogLinearChangelistClassifier
16 from crash.loglinear.changelist_features.min_distance import MinDistanceFeature 16 from crash.loglinear.changelist_features.touch_crashed_file_meta import (
17 from crash.loglinear.changelist_features.top_frame_index import ( 17 TouchCrashedFileMetaFeature)
18 TopFrameIndexFeature)
19 from crash.loglinear.feature import WrapperMetaFeature 18 from crash.loglinear.feature import WrapperMetaFeature
20 from crash.loglinear.weight import Weight 19 from crash.loglinear.weight import Weight
21 from crash.loglinear.weight import MetaWeight 20 from crash.loglinear.weight import MetaWeight
22 from crash.suspect import AnalysisInfo 21 from crash.suspect import AnalysisInfo
23 from crash.suspect import Suspect 22 from crash.suspect import Suspect
24 from crash.suspect import StackInfo 23 from crash.suspect import StackInfo
25 from crash.stacktrace import CallStack 24 from crash.stacktrace import CallStack
26 from crash.stacktrace import StackFrame 25 from crash.stacktrace import StackFrame
27 from crash.stacktrace import Stacktrace 26 from crash.stacktrace import Stacktrace
28 from crash.test.crash_test_suite import CrashTestSuite 27 from crash.test.crash_test_suite import CrashTestSuite
29 from crash.type_enums import CallStackFormatType 28 from crash.type_enums import CallStackFormatType
30 from crash.type_enums import LanguageType 29 from crash.type_enums import LanguageType
30 from libs.gitiles.blame import Blame
31 from libs.gitiles.blame import Region
31 from libs.gitiles.change_log import ChangeLog 32 from libs.gitiles.change_log import ChangeLog
32 from libs.gitiles.gitiles_repository import GitilesRepository 33 from libs.gitiles.gitiles_repository import GitilesRepository
33 34
34 DUMMY_CHANGELOG1 = ChangeLog.FromDict({ 35 DUMMY_CHANGELOG1 = ChangeLog.FromDict({
35 'author': { 36 'author': {
36 'name': 'r@chromium.org', 37 'name': 'r@chromium.org',
37 'email': 'r@chromium.org', 38 'email': 'r@chromium.org',
38 'time': 'Thu Mar 31 21:24:43 2016', 39 'time': 'Thu Mar 31 21:24:43 2016',
39 }, 40 },
40 'committer': { 41 'committer': {
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
120 DUMMY_REPORT = CrashReport( 121 DUMMY_REPORT = CrashReport(
121 None, None, None, Stacktrace(DUMMY_CALLSTACKS, DUMMY_CALLSTACKS[0]), 122 None, None, None, Stacktrace(DUMMY_CALLSTACKS, DUMMY_CALLSTACKS[0]),
122 (None, None)) 123 (None, None))
123 124
124 125
125 class LogLinearChangelistClassifierTest(CrashTestSuite): 126 class LogLinearChangelistClassifierTest(CrashTestSuite):
126 127
127 def setUp(self): 128 def setUp(self):
128 super(LogLinearChangelistClassifierTest, self).setUp() 129 super(LogLinearChangelistClassifierTest, self).setUp()
129 meta_weight = MetaWeight({ 130 meta_weight = MetaWeight({
130 'MinDistance': Weight(1.), 131 'TouchCrashedFileMeta': MetaWeight({
131 'TopFrameIndex': Weight(1.), 132 'MinDistance': Weight(1.),
133 'TopFrameIndex': Weight(1.),
134 'TouchCrashedFile': Weight(1.),
135 })
132 }) 136 })
133 meta_feature = WrapperMetaFeature([MinDistanceFeature(), 137 get_repository = GitilesRepository.Factory(self.GetMockHttpClient())
134 TopFrameIndexFeature()]) 138 meta_feature = WrapperMetaFeature(
139 [TouchCrashedFileMetaFeature(get_repository)])
135 140
136 self.changelist_classifier = LogLinearChangelistClassifier( 141 self.changelist_classifier = LogLinearChangelistClassifier(
137 GitilesRepository.Factory(self.GetMockHttpClient()), 142 get_repository, meta_feature, meta_weight)
138 meta_feature, meta_weight)
139 143
140 # TODO(http://crbug.com/659346): why do these mocks give coverage 144 # TODO(http://crbug.com/659346): why do these mocks give coverage
141 # failures? That's almost surely hiding a bug in the tests themselves. 145 # failures? That's almost surely hiding a bug in the tests themselves.
142 def testFindItForCrashNoRegressionRange(self): # pragma: no cover 146 def testFindItForCrashNoRegressionRange(self): # pragma: no cover
143 self.mock(ChromeDependencyFetcher, 'GetDependencyRollsDict', lambda *_: {}) 147 self.mock(ChromeDependencyFetcher, 'GetDependencyRollsDict', lambda *_: {})
144 self.mock(ChromeDependencyFetcher, 'GetDependency', lambda *_: {}) 148 self.mock(ChromeDependencyFetcher, 'GetDependency', lambda *_: {})
145 # N.B., for this one test we really do want regression_range=None. 149 # N.B., for this one test we really do want regression_range=None.
146 report = DUMMY_REPORT._replace(regression_range=None) 150 report = CrashReport(None, None, None, Stacktrace(DUMMY_CALLSTACKS,
151 DUMMY_CALLSTACKS[0]),
152 None)
147 self.assertListEqual(self.changelist_classifier(report), []) 153 self.assertListEqual(self.changelist_classifier(report), [])
148 154
149 def testFindItForCrashNoMatchFound(self): 155 def testFindItForCrashNoMatchFound(self):
150 self.mock(scorer_changelist_classifier, 'FindSuspects', lambda *_: []) 156 self.mock(scorer_changelist_classifier, 'FindSuspects', lambda *_: [])
151 self.assertListEqual(self.changelist_classifier(DUMMY_REPORT), []) 157 self.assertListEqual(self.changelist_classifier(DUMMY_REPORT), [])
152 158
153 self.mock(scorer_changelist_classifier, 'FindSuspects', lambda *_: None) 159 self.mock(scorer_changelist_classifier, 'FindSuspects', lambda *_: None)
154 self.assertListEqual(self.changelist_classifier(DUMMY_REPORT), []) 160 self.assertListEqual(self.changelist_classifier(DUMMY_REPORT), [])
155 161
156 def testFindItForCrash(self): 162 def testFindItForCrash(self):
157 suspect1 = Suspect(DUMMY_CHANGELOG1, 'src/') 163 suspect1 = Suspect(DUMMY_CHANGELOG1, 'src/')
164 suspect2 = Suspect(DUMMY_CHANGELOG3, 'src/')
165
166 a_cc_blame = Blame('6', 'src/')
167 a_cc_blame.AddRegions([Region(0, 10, suspect1.changelog.revision,
168 suspect1.changelog.author.name,
169 suspect1.changelog.author.email,
170 suspect1.changelog.author.time)])
171 f_cc_blame = Blame('6', 'src/')
172 f_cc_blame.AddRegions([Region(21, 10, suspect2.changelog.revision,
173 suspect2.changelog.author.name,
174 suspect2.changelog.author.email,
175 suspect2.changelog.author.time)])
176 url_to_blame = {'6/a.cc': a_cc_blame,
177 '6/f.cc': f_cc_blame}
178
179 def _MockGetBlame(_, path, revision):
180 revision_path = '%s/%s' % (revision, path)
181 return url_to_blame.get(revision_path)
182
183 self.mock(GitilesRepository, 'GetBlame', _MockGetBlame)
184 self.mock(scorer_changelist_classifier,
185 'GetChangeLogsForFilesGroupedByDeps',
186 lambda *_: (None, None))
187 self.mock(scorer_changelist_classifier, 'FindSuspects',
188 lambda *_: [suspect1, suspect2])
158 frame1 = StackFrame(0, 'src/', 'func', 'a.cc', 'src/a.cc', [1]) 189 frame1 = StackFrame(0, 'src/', 'func', 'a.cc', 'src/a.cc', [1])
159 frame2 = StackFrame(1, 'src/', 'func', 'a.cc', 'src/a.cc', [7]) 190 frame2 = StackFrame(1, 'src/', 'func', 'a.cc', 'src/a.cc', [7])
160 suspect1.file_to_stack_infos = { 191 frame3 = StackFrame(15, 'src/', 'func', 'f.cc', 'src/f.cc', [1])
161 'a.cc': [StackInfo(frame1, 0), StackInfo(frame2, 0)]
162 }
163 suspect1.file_to_analysis_info = {
164 'a.cc': AnalysisInfo(min_distance=0, min_distance_frame=frame1)
165 }
166
167 suspect2 = Suspect(DUMMY_CHANGELOG3, 'src/')
168 frame3 = StackFrame(5, 'src/', 'func', 'f.cc', 'src/f.cc', [1])
169 suspect2.file_to_stack_infos = {
170 'f.cc': [StackInfo(frame3, 0)]
171 }
172 suspect2.file_to_analysis_info = {
173 'a.cc': AnalysisInfo(min_distance=20, min_distance_frame=frame3)
174 }
175
176 self.mock(scorer_changelist_classifier, 'FindSuspects', lambda *_:
177 [suspect1, suspect2])
178 self.mock(ChromeDependencyFetcher, 'GetDependencyRollsDict', lambda *_:
179 {'src/': DependencyRoll('src/', 'https://repo', '1', '2')})
180 self.mock(ChromeDependencyFetcher, 'GetDependency', lambda *_:
181 {'src/': Dependency('src/', 'https://repo', '2')})
182
183 # N.B., In order to get complete coverage for the code computing
184 # ``dep_to_file_to_stack_infos`` we must (a) have frames on at
185 # least one stack, (b) have frames with dependencies in the
186 # CrashReportWithDependency's ``dependencies``, and (c) have frames
187 # with dependencies *not* in CrashReportWithDependency.
188 frame4 = StackFrame(3, 'src/dep1', 'func', 'f.cc', 'src/dep1/f.cc', [1]) 192 frame4 = StackFrame(3, 'src/dep1', 'func', 'f.cc', 'src/dep1/f.cc', [1])
189 stack0 = CallStack(0, [frame1, frame2]) 193 stacks = [CallStack(0, frame_list=[frame1, frame2, frame3, frame4])]
190 stack1 = CallStack(1, [frame3, frame4]) 194 stacktrace = Stacktrace(stacks, stacks[0])
191 report = DUMMY_REPORT._replace( 195 report = CrashReport('6', 'sig', 'win', stacktrace, ('0', '4'))
192 stacktrace=Stacktrace([stack0, stack1], stack0)) 196 self.mock(ChromeDependencyFetcher, 'GetDependency',
193 197 lambda *_: {'src/': Dependency('src/', 'https://repo', '6')})
194 # TODO(katesonia): this mocking is to cover up a bug where 198 self.mock(ChromeDependencyFetcher, 'GetDependencyRollsDict',
195 # ``_SendRequestForJsonResponse`` returns None (due to MockHttpClient 199 lambda *_: {'src/': DependencyRoll('src/', 'https://repo',
196 # returning 404), which in turn causes ``GitilesRepository.GetChangeLogs`` 200 '0', '4')})
197 # to raise an exception. Really we should fix the bug rather than
198 # hiding it.
199 self.mock(
200 scorer_changelist_classifier,
201 'GetChangeLogsForFilesGroupedByDeps',
202 lambda *_: ({}, []))
203 201
204 suspects = self.changelist_classifier(report) 202 suspects = self.changelist_classifier(report)
205 self.assertTrue(suspects, 203 self.assertTrue(suspects,
206 "Expected suspects, but the classifier didn't return any") 204 'Expected suspects, but the classifier didn\'t return any')
207 205
208 expected_suspects = [ 206 expected_suspects = [
209 { 207 {
210 'review_url': 'https://codereview.chromium.org/3281', 208 'author': 'r@chromium.org',
211 'url': 'https://repo.test/+/3',
212 'author': 'e@chromium.org',
213 'time': 'Thu Apr 1 21:24:43 2016',
214 'project_path': 'src/',
215 'revision': '3',
216 'confidence': math.log(0.2857142857142857 * 0.6),
217 'reasons': ('MinDistance: -0.510826 -- Minimum distance is 20\n'
218 'TopFrameIndex: -1.252763 -- Top frame is #5'),
219 'changed_files': [ 209 'changed_files': [
220 { 210 {
211 'blame_url': None,
221 'file': 'a.cc', 212 'file': 'a.cc',
222 'blame_url': None, 213 'info': ('Distance from touched lines and crashed lines is '
223 'info': 'Minimum distance (LOC) 20, frame #5', 214 '0, in frame #0')
224 }], 215 }
225 }, { 216 ],
217 'confidence': 0.,
218 'project_path': 'src/',
219 'reasons': ('MinDistance: 0.000000 -- Minimum distance is '
220 '0\nTopFrameIndex: 0.000000 -- Top frame is #0\n'
221 'TouchCrashedFile: 0.000000 -- Touched files - a.cc'),
226 'review_url': 'https://codereview.chromium.org/3281', 222 'review_url': 'https://codereview.chromium.org/3281',
227 'url': 'https://repo.test/+/1', 223 'revision': '1',
228 'author': 'r@chromium.org',
229 'time': 'Thu Mar 31 21:24:43 2016', 224 'time': 'Thu Mar 31 21:24:43 2016',
230 'project_path': 'src/', 225 'url': 'https://repo.test/+/1'
231 'revision': '1',
232 'confidence': 0.,
233 'reasons': ('MinDistance: 0.000000 -- Minimum distance is 0\n'
234 'TopFrameIndex: 0.000000 -- Top frame is #0'),
235 'changed_files': [
236 {
237 'file': 'a.cc',
238 'blame_url': None,
239 'info': 'Minimum distance (LOC) 0, frame #0',
240 }],
241 }, 226 },
242 ] 227 ]
243 self.assertListEqual([suspect.ToDict() for suspect in suspects], 228 self.assertListEqual([suspect.ToDict() for suspect in suspects],
244 expected_suspects) 229 expected_suspects)
245 230
246 def testFinditForCrashFilterZeroConfidenceSuspects(self): 231 def testFinditForCrashFilterZeroConfidenceSuspects(self):
247 def _MockFindSuspects(*_): 232 suspect1 = Suspect(DUMMY_CHANGELOG1, 'src/')
248 suspect1 = Suspect(DUMMY_CHANGELOG1, 'src/') 233 suspect2 = Suspect(DUMMY_CHANGELOG3, 'src/')
249 frame1 = StackFrame(0, 'src/', 'func', 'a.cc', 'src/a.cc', [1])
250 frame2 = StackFrame(1, 'src/', 'func', 'a.cc', 'src/a.cc', [7])
251 suspect1.file_to_stack_infos = {
252 'a.cc': [StackInfo(frame1, 0), StackInfo(frame2, 0)]
253 }
254 suspect1.file_to_analysis_info = {
255 'a.cc': AnalysisInfo(min_distance=1, min_distance_frame=frame1)
256 }
257 234
258 suspect2 = Suspect(DUMMY_CHANGELOG3, 'src/') 235 a_cc_blame = Blame('6', 'src/')
259 frame3 = StackFrame(15, 'src/', 'func', 'f.cc', 'src/f.cc', [1]) 236 a_cc_blame.AddRegions([Region(0, 10, suspect1.changelog.revision,
260 suspect2.file_to_stack_infos = { 237 suspect1.changelog.author.name,
261 'f.cc': [StackInfo(frame3, 0)] 238 suspect1.changelog.author.email,
262 } 239 suspect1.changelog.author.time)])
263 suspect2.file_to_analysis_info = { 240 f_cc_blame = Blame('6', 'src/')
264 'f.cc': AnalysisInfo(min_distance=20, min_distance_frame=frame3) 241 f_cc_blame.AddRegions([Region(21, 10, suspect2.changelog.revision,
265 } 242 suspect2.changelog.author.name,
243 suspect2.changelog.author.email,
244 suspect2.changelog.author.time)])
245 url_to_blame = {'6/a.cc': a_cc_blame,
246 '6/f.cc': f_cc_blame}
266 247
267 suspect3 = Suspect(DUMMY_CHANGELOG3, 'src/') 248 def _MockGetBlame(_, path, revision):
268 frame4 = StackFrame(3, 'src/', 'func', 'ff.cc', 'src/ff.cc', [1]) 249 revision_path = '%s/%s' % (revision, path)
269 suspect3.file_to_stack_infos = { 250 return url_to_blame.get(revision_path)
270 'f.cc': [StackInfo(frame4, 0)]
271 }
272 suspect3.file_to_analysis_info = {
273 'f.cc': AnalysisInfo(min_distance=60, min_distance_frame=frame4)
274 }
275 251
276 return [suspect1, suspect2, suspect3] 252 self.mock(GitilesRepository, 'GetBlame', _MockGetBlame)
253 self.mock(scorer_changelist_classifier,
254 'GetChangeLogsForFilesGroupedByDeps',
255 lambda *_: (None, None))
256 self.mock(scorer_changelist_classifier, 'FindSuspects',
257 lambda *_: [suspect1, suspect2])
258 frame1 = StackFrame(0, 'src/', 'func', 'a.cc', 'src/a.cc', [1])
259 frame2 = StackFrame(1, 'src/', 'func', 'a.cc', 'src/a.cc', [7])
260 frame3 = StackFrame(15, 'src/', 'func', 'f.cc', 'src/f.cc', [1])
261 stacks = [CallStack(0, frame_list=[frame1, frame2, frame3])]
262 stacktrace = Stacktrace(stacks, stacks[0])
263 report = CrashReport('6', 'sig', 'win', stacktrace, ('0', '4'))
264 self.mock(ChromeDependencyFetcher, 'GetDependency',
265 lambda *_: {'src/': Dependency('src/', 'https://repo', '6')})
266 self.mock(ChromeDependencyFetcher, 'GetDependencyRollsDict',
267 lambda *_: {'src/': DependencyRoll('src/', 'https://repo',
268 '0', '4')})
277 269
278 self.mock(scorer_changelist_classifier, 'FindSuspects', _MockFindSuspects) 270 suspects = self.changelist_classifier(report)
279
280 suspects = self.changelist_classifier(DUMMY_REPORT)
281 self.assertTrue(suspects, 271 self.assertTrue(suspects,
282 "Expected suspects, but the classifier didn't return any") 272 'Expected suspects, but the classifier didn\'t return any')
283 273
284 expected_suspects = [ 274 expected_suspects = [
285 { 275 {
286 'author': 'r@chromium.org', 276 'author': 'r@chromium.org',
287 'changed_files': [ 277 'changed_files': [
288 { 278 {
289 'blame_url': None, 279 'blame_url': None,
290 'file': 'a.cc', 280 'file': 'a.cc',
291 'info': 'Minimum distance (LOC) 1, frame #0' 281 'info': ('Distance from touched lines and crashed lines is '
282 '0, in frame #0')
292 } 283 }
293 ], 284 ],
294 'confidence': math.log(0.98), 285 'confidence': 0.,
295 'project_path': 'src/', 286 'project_path': 'src/',
296 'reasons': ('MinDistance: -0.020203 -- Minimum distance is 1\n' 287 'reasons': ('MinDistance: 0.000000 -- Minimum distance is '
297 'TopFrameIndex: 0.000000 -- Top frame is #0'), 288 '0\nTopFrameIndex: 0.000000 -- Top frame is #0\n'
289 'TouchCrashedFile: 0.000000 -- Touched files - a.cc'),
298 'review_url': 'https://codereview.chromium.org/3281', 290 'review_url': 'https://codereview.chromium.org/3281',
299 'revision': '1', 291 'revision': '1',
300 'time': 'Thu Mar 31 21:24:43 2016', 292 'time': 'Thu Mar 31 21:24:43 2016',
301 'url': 'https://repo.test/+/1' 293 'url': 'https://repo.test/+/1'
302 }, 294 },
303 ] 295 ]
304 self.assertListEqual([suspect.ToDict() for suspect in suspects], 296 self.assertListEqual([suspect.ToDict() for suspect in suspects],
305 expected_suspects) 297 expected_suspects)
306
307 def testFinditForCrashAllSuspectsWithZeroConfidences(self):
308 """Test that we filter out suspects with too-large frame indices.
309
310 In the mock suspects below we return frames with indices
311 15, 20, 21 which are all larger than the ``max_top_n`` of
312 ``TopFrameIndexFeature``. Therefore we should get a score of zero
313 for that feature, which should cause the suspects to be filtered out.
314 """
315 def _MockFindSuspects(*_):
316 suspect1 = Suspect(DUMMY_CHANGELOG1, 'src/')
317 frame1 = StackFrame(20, 'src/', '', 'func', 'a.cc', [1])
318 frame2 = StackFrame(21, 'src/', '', 'func', 'a.cc', [7])
319 suspect1.file_to_stack_infos = {
320 'a.cc': [StackInfo(frame1, 0), StackInfo(frame2, 0)]
321 }
322 suspect1.file_to_analysis_info = {
323 'a.cc': AnalysisInfo(min_distance=1, min_distance_frame=frame1)
324 }
325
326 suspect2 = Suspect(DUMMY_CHANGELOG3, 'src/')
327 frame3 = StackFrame(15, 'src/', '', 'func', 'f.cc', [1])
328 suspect2.file_to_stack_infos = {
329 'f.cc': [StackInfo(frame3, 0)]
330 }
331 suspect2.min_distance = 20
332 suspect2.file_to_analysis_info = {
333 'f.cc': AnalysisInfo(min_distance=20, min_distance_frame=frame3)
334 }
335
336 return [suspect1, suspect2]
337
338 self.mock(scorer_changelist_classifier, 'FindSuspects', _MockFindSuspects)
339
340 suspects = self.changelist_classifier(DUMMY_REPORT)
341 self.assertFalse(suspects, 'Expected zero suspects, but found some:\n%s'
342 % pprint.pformat([suspect.ToDict() for suspect in suspects]))
OLDNEW
« no previous file with comments | « appengine/findit/crash/loglinear/feature.py ('k') | appengine/findit/crash/loglinear/test/feature_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698