appengine/findit/crash/changelist_classifier.py - Issue 2588513002: [Predator] renamed "Result" to "Suspect"

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2588513002: [Predator] renamed "Result" to "Suspect" (Closed)

Patch Set: Removing redundant import Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import defaultdict	6 from collections import defaultdict

7 from collections import namedtuple	7 from collections import namedtuple

8	8

9 from common import chrome_dependency_fetcher	9 from common import chrome_dependency_fetcher

10 from crash import crash_util	10 from crash import crash_util

11 from crash.results import MatchResults	11 from crash.suspect import Suspect

	12 from crash.suspect import Suspects

12 from crash.scorers.aggregated_scorer import AggregatedScorer	13 from crash.scorers.aggregated_scorer import AggregatedScorer

13 from crash.scorers.min_distance import MinDistance	14 from crash.scorers.min_distance import MinDistance

14 from crash.scorers.top_frame_index import TopFrameIndex	15 from crash.scorers.top_frame_index import TopFrameIndex

15 from crash.stacktrace import CallStack	16 from crash.stacktrace import CallStack

16 from crash.stacktrace import Stacktrace	17 from crash.stacktrace import Stacktrace

17 from libs.gitiles.diff import ChangeType	18 from libs.gitiles.diff import ChangeType

18	19

19	20

20 class ChangelistClassifier(namedtuple('ChangelistClassifier',	21 class ChangelistClassifier(namedtuple('ChangelistClassifier',

21 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])):	22 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])):

22 __slots__ = ()	23 __slots__ = ()

23	24

24 def __new__(cls, repository,	25 def __new__(cls, repository,

25 top_n_frames, top_n_results=3, confidence_threshold=0.999):	26 top_n_frames, top_n_results=3, confidence_threshold=0.999):

26 """Args:	27 """Args:

27 repository (Repository): the Git repository for getting CLs to classify.	28 repository (Repository): the Git repository for getting CLs to classify.

28 top_n_frames (int): how many frames of each callstack to look at.	29 top_n_frames (int): how many frames of each callstack to look at.

29 top_n_results (int): maximum number of results to return.	30 top_n_results (int): maximum number of suspects to return.

30 confidence_threshold (float): In [0,1], above which we only return	31 confidence_threshold (float): In [0,1], above which we only return

31 the first result.	32 the first suspect.

32 """	33 """

33 return super(cls, ChangelistClassifier).__new__(cls,	34 return super(cls, ChangelistClassifier).__new__(cls,

34 repository, top_n_frames, top_n_results, confidence_threshold)	35 repository, top_n_frames, top_n_results, confidence_threshold)

35	36

36 def __str__(self): # pragma: no cover	37 def __str__(self): # pragma: no cover

37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'	38 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'

38 % (self.__class__.__name__,	39 % (self.__class__.__name__,

39 self.top_n_frames,	40 self.top_n_frames,

40 self.top_n_results,	41 self.top_n_results,

41 self.confidence_threshold))	42 self.confidence_threshold))

42	43

43 def __call__(self, report):	44 def __call__(self, report):

44 """Finds changelists suspected of being responsible for the crash report.	45 """Finds changelists suspected of being responsible for the crash report.

45	46

46 Args:	47 Args:

47 report (CrashReport): the report to be analyzed.	48 report (CrashReport): the report to be analyzed.

48	49

49 Returns:	50 Returns:

50 List of Results, sorted by confidence from highest to lowest.	51 List of ``Suspect``s, sorted by confidence from highest to lowest.

51 """	52 """

52 if not report.regression_range:	53 if not report.regression_range:

53 logging.warning('ChangelistClassifier.__call__: Missing regression range '	54 logging.warning('ChangelistClassifier.__call__: Missing regression range '

54 'for report: %s', str(report))	55 'for report: %s', str(report))

55 return []	56 return []

56 last_good_version, first_bad_version = report.regression_range	57 last_good_version, first_bad_version = report.regression_range

57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',	58 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

58 last_good_version, first_bad_version)	59 last_good_version, first_bad_version)

59	60

60 # Restrict analysis to just the top n frames in each callstack.	61 # Restrict analysis to just the top n frames in each callstack.

(...skipping 28 matching lines...) Expand all Loading...
89 continue	90 continue

90 regression_deps_rolls[dep_path] = dep_roll	91 regression_deps_rolls[dep_path] = dep_roll

91	92

92 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(	93 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

93 regression_deps_rolls, stack_deps, self.repository)	94 regression_deps_rolls, stack_deps, self.repository)

94 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(	95 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

95 stacktrace, stack_deps)	96 stacktrace, stack_deps)

96	97

97 # TODO: argument order is inconsistent from others. Repository should	98 # TODO: argument order is inconsistent from others. Repository should

98 # be last argument.	99 # be last argument.

99 results = FindMatchResults(dep_to_file_to_changelogs,	100 suspects = FindSuspects(dep_to_file_to_changelogs,

100 dep_to_file_to_stack_infos,	101 dep_to_file_to_stack_infos,

101 stack_deps, self.repository, ignore_cls)	102 stack_deps, self.repository, ignore_cls)

102 if not results:	103 if not suspects:

103 return []	104 return []

104	105

105 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.	106 # Set confidence, reasons, and changed_files.

106 # Set result.confidence, result.reasons and result.changed_files.

107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])	107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

108 map(aggregated_scorer.Score, results)	108 map(aggregated_scorer.Score, suspects)

109	109

110 # Filter all the 0 confidence results.	110 # Filter all the 0 confidence results.

111 results = filter(lambda r: r.confidence != 0, results)	111 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)

112 if not results:	112 if not suspects:

113 return []	113 return []

114	114

115 sorted_results = sorted(results, key=lambda r: -r.confidence)	115 suspects.sort(key=lambda suspect: -suspect.confidence)

116	116

117 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold	117 max_results = (1 if suspects[0].confidence > self.confidence_threshold

118 else self.top_n_results)	118 else self.top_n_results)

119	119

120 return sorted_results[:max_results]	120 return suspects[:max_results]

121	121

122	122

123 def GetDepsInCrashStack(crash_stack, crash_deps):	123 def GetDepsInCrashStack(crash_stack, crash_deps):

124 """Gets Dependencies in crash stack."""	124 """Gets Dependencies in crash stack."""

125 if not crash_stack:	125 if not crash_stack:

126 return {}	126 return {}

127	127

128 stack_deps = {}	128 stack_deps = {}

129 for frame in crash_stack:	129 for frame in crash_stack:

130 if frame.dep_path:	130 if frame.dep_path:

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
257 if frame.dep_path not in stack_deps:	257 if frame.dep_path not in stack_deps:

258 continue	258 continue

259	259

260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((	260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((

261 frame, callstack.priority))	261 frame, callstack.priority))

262	262

263 return dep_to_file_to_stack_infos	263 return dep_to_file_to_stack_infos

264	264

265	265

266 # TODO(katesonia): Remove the repository argument after refatoring cl committed.	266 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

267 def FindMatchResults(dep_to_file_to_changelogs,	267 def FindSuspects(dep_to_file_to_changelogs,

268 dep_to_file_to_stack_infos,	268 dep_to_file_to_stack_infos,

269 stack_deps, repository,	269 stack_deps, repository,

270 ignore_cls=None):	270 ignore_cls=None):

271 """Finds results by matching stacktrace and changelogs in regression range.	271 """Finds suspects by matching stacktrace and changelogs in regression range.

272	272

273 This method only applies to those crashes with regression range.	273 This method only applies to those crashes with regression range.

274	274

275 Args:	275 Args:

276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

277 to ChangeLogs that touched this file.	277 to ChangeLogs that touched this file.

278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path	278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

279 to a list of stack information of this file. A file may occur in several	279 to a list of stack information of this file. A file may occur in several

280 frames, one stack info consist of a StackFrame and the callstack priority	280 frames, one stack info consist of a StackFrame and the callstack priority

281 of it.	281 of it.

282 stack_deps (dict): Represents all the dependencies shown in the crash stack.	282 stack_deps (dict): Represents all the dependencies shown in the crash stack.

283 repository (Repository): Repository to get changelogs and blame from.	283 repository (Repository): Repository to get changelogs and blame from.

284 ignore_cls (set): Set of reverted revisions.	284 ignore_cls (set): Set of reverted revisions.

285	285

286 Returns:	286 Returns:

287 A list of MatchResult instances with confidence and reason unset.	287 A list of ``Suspect`` instances with confidence and reason unset.

288 """	288 """

289 match_results = MatchResults(ignore_cls)	289 suspects = Suspects(ignore_cls)

290	290

291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():	291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():

292 file_to_changelogs = dep_to_file_to_changelogs[dep]	292 file_to_changelogs = dep_to_file_to_changelogs[dep]

293	293

294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():	294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():

295 for touched_file_path, changelogs in file_to_changelogs.iteritems():	295 for touched_file_path, changelogs in file_to_changelogs.iteritems():

296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):	296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):

297 continue	297 continue

298	298

299 repository.repo_url = stack_deps[dep].repo_url	299 repository.repo_url = stack_deps[dep].repo_url

300 blame = repository.GetBlame(touched_file_path,	300 blame = repository.GetBlame(touched_file_path,

301 stack_deps[dep].revision)	301 stack_deps[dep].revision)

302	302

303 # Generate/update each result(changelog) in changelogs, blame is used	303 # Generate/update each suspect(changelog) in changelogs, blame is used

304 # to calculate distance between touched lines and crashed lines in file.	304 # to calculate distance between touched lines and crashed lines in file.

305 match_results.GenerateMatchResults(	305 suspects.GenerateSuspects(

306 touched_file_path, dep, stack_infos, changelogs, blame)	306 touched_file_path, dep, stack_infos, changelogs, blame)

307	307

308 return match_results.values()	308 return suspects.values()

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/crash/component_classifier.py » ('j') | no next file with comments »