appengine/findit/crash/changelist_classifier.py - Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it (Closed)

Patch Set: added todo for https://crbug.com/677224 Created 3 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import defaultdict	6 from collections import defaultdict

7 from collections import namedtuple	7 from collections import namedtuple

8	8

9 from common import chrome_dependency_fetcher	9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

10 from crash import crash_util	10 from crash import crash_util

11 from crash.suspect import StackInfo	11 from crash.suspect import StackInfo

12 from crash.suspect import Suspect	12 from crash.suspect import Suspect

13 from crash.suspect import SuspectMap	13 from crash.suspect import SuspectMap

14 from crash.scorers.aggregated_scorer import AggregatedScorer	14 from crash.scorers.aggregated_scorer import AggregatedScorer

15 from crash.scorers.min_distance import MinDistance	15 from crash.scorers.min_distance import MinDistance

16 from crash.scorers.top_frame_index import TopFrameIndex	16 from crash.scorers.top_frame_index import TopFrameIndex

17 from crash.stacktrace import CallStack	17 from crash.stacktrace import CallStack

18 from crash.stacktrace import Stacktrace	18 from crash.stacktrace import Stacktrace

19 from libs.gitiles.diff import ChangeType	19 from libs.gitiles.diff import ChangeType

20	20

21	21

22 class ChangelistClassifier(namedtuple('ChangelistClassifier',	22 class ChangelistClassifier(namedtuple('ChangelistClassifier',

23 ['repository', 'top_n_results', 'confidence_threshold'])):	23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])):

24 __slots__ = ()	24 __slots__ = ()

25	25

26 def __new__(cls, repository, top_n_results=3, confidence_threshold=0.999):	26 def __new__(cls, repository, get_repository, top_n_results=3,

	27 confidence_threshold=0.999):

27 """Args:	28 """Args:

28 repository (Repository): the Git repository for getting CLs to classify.	29 repository (Repository): the Git repository of the main project

	30 we're trying to classify CLs from.

	31 get_repository (callable): a function from DEP urls to ``Repository``

	32 objects, so we can get changelogs and blame for each dep. Notably,

	33 to keep the code here generic, we make no assumptions about

	34 which subclass of ``Repository`` this function returns. Thus,

	35 it is up to the caller to decide what class to return and handle

	36 any other arguments that class may require (e.g., an http client

	37 for ``GitilesRepository``).

29 top_n_results (int): maximum number of results to return.	38 top_n_results (int): maximum number of results to return.

30 confidence_threshold (float): In [0,1], above which we only return	39 confidence_threshold (float): In [0,1], above which we only return

31 the first suspect.	40 the first suspect.

32 """	41 """

33 return super(cls, ChangelistClassifier).__new__(	42 return super(cls, ChangelistClassifier).__new__(

34 cls, repository, top_n_results, confidence_threshold)	43 cls, repository, get_repository, top_n_results, confidence_threshold)

35	44

36 def __str__(self): # pragma: no cover	45 def __str__(self): # pragma: no cover

37 return ('%s(top_n_results=%d, confidence_threshold=%g)'	46 return ('%s(top_n_results=%d, confidence_threshold=%g)'

38 % (self.__class__.__name__,	47 % (self.__class__.__name__,

39 self.top_n_results,	48 self.top_n_results,

40 self.confidence_threshold))	49 self.confidence_threshold))

41	50

42 def __call__(self, report):	51 def __call__(self, report):

43 """Finds changelists suspected of being responsible for the crash report.	52 """Finds changelists suspected of being responsible for the crash report.

44	53

45 This function assumes the report's stacktrace has already had any necessary	54 This function assumes the report's stacktrace has already had any necessary

46 preprocessing (like filtering or truncating) applied.	55 preprocessing (like filtering or truncating) applied.

47	56

48 Args:	57 Args:

49 report (CrashReport): the report to be analyzed.	58 report (CrashReport): the report to be analyzed.

50	59

51 Returns:	60 Returns:

52 List of ``Suspect``s, sorted by confidence from highest to lowest.	61 List of ``Suspect``s, sorted by confidence from highest to lowest.

53 """	62 """

54 if not report.regression_range:	63 if not report.regression_range:

55 logging.warning('ChangelistClassifier.__call__: Missing regression range '	64 logging.warning('ChangelistClassifier.__call__: Missing regression range '

56 'for report: %s', str(report))	65 'for report: %s', str(report))

57 return []	66 return []

58 last_good_version, first_bad_version = report.regression_range	67 last_good_version, first_bad_version = report.regression_range

59 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',	68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

60 last_good_version, first_bad_version)	69 last_good_version, first_bad_version)

61	70

	71 dependency_fetcher = ChromeDependencyFetcher(self.repository)

	72

62 # We are only interested in the deps in crash stack (the callstack that	73 # We are only interested in the deps in crash stack (the callstack that

63 # caused the crash).	74 # caused the crash).

64 # TODO(wrengr): we may want to receive the crash deps as an argument,	75 # TODO(wrengr): we may want to receive the crash deps as an argument,

65 # so that when this method is called via Findit.FindCulprit, we avoid	76 # so that when this method is called via Findit.FindCulprit, we avoid

66 # doing redundant work creating it.	77 # doing redundant work creating it.

67 stack_deps = GetDepsInCrashStack(	78 stack_deps = GetDepsInCrashStack(

68 report.stacktrace.crash_stack,	79 report.stacktrace.crash_stack,

69 chrome_dependency_fetcher.ChromeDependencyFetcher(	80 dependency_fetcher.GetDependency(

70 self.repository).GetDependency(report.crashed_version,	81 report.crashed_version, report.platform))

71 report.platform))

72	82

73 # Get dep and file to changelogs, stack_info and blame dicts.	83 # Get dep and file to changelogs, stack_info and blame dicts.

74 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher(	84 dep_rolls = dependency_fetcher.GetDependencyRollsDict(

75 self.repository).GetDependencyRollsDict(	85 last_good_version, first_bad_version, report.platform)

76 last_good_version, first_bad_version, report.platform)

77	86

78 # Regression of a dep added/deleted (old_revision/new_revision is None) can	87 # Regression of a dep added/deleted (old_revision/new_revision is None) can

79 # not be known for sure and this case rarely happens, so just filter them	88 # not be known for sure and this case rarely happens, so just filter them

80 # out.	89 # out.

81 regression_deps_rolls = {}	90 regression_deps_rolls = {}

82 for dep_path, dep_roll in dep_rolls.iteritems():	91 for dep_path, dep_roll in dep_rolls.iteritems():

83 if not dep_roll.old_revision or not dep_roll.new_revision:	92 if not dep_roll.old_revision or not dep_roll.new_revision:

84 logging.info('Skip %s denpendency %s',	93 logging.info('Skip %s denpendency %s',

85 'added' if dep_roll.new_revision else 'deleted', dep_path)	94 'added' if dep_roll.new_revision else 'deleted', dep_path)

86 continue	95 continue

87 regression_deps_rolls[dep_path] = dep_roll	96 regression_deps_rolls[dep_path] = dep_roll

88	97

89 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(	98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

90 regression_deps_rolls, stack_deps, self.repository)	99 regression_deps_rolls, stack_deps, self.repository)

91 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(	100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

92 report.stacktrace, stack_deps)	101 report.stacktrace, stack_deps)

93	102

94 # TODO: argument order is inconsistent from others. Repository should

95 # be last argument.

96 suspects = FindSuspects(dep_to_file_to_changelogs,	103 suspects = FindSuspects(dep_to_file_to_changelogs,

97 dep_to_file_to_stack_infos,	104 dep_to_file_to_stack_infos,

98 stack_deps, self.repository, ignore_cls)	105 stack_deps, self.get_repository, ignore_cls)

99 if not suspects:	106 if not suspects:

100 return []	107 return []

101	108

102 # Set confidence, reasons, and changed_files.	109 # Set confidence, reasons, and changed_files.

103 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])	110 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

104 map(aggregated_scorer.Score, suspects)	111 map(aggregated_scorer.Score, suspects)

105	112

106 # Filter all the 0 confidence results.	113 # Filter all the 0 confidence results.

107 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)	114 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)

108 if not suspects:	115 if not suspects:

(...skipping 138 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
247 # We only care about those dependencies in crash stack.	254 # We only care about those dependencies in crash stack.

248 if frame.dep_path not in stack_deps:	255 if frame.dep_path not in stack_deps:

249 continue	256 continue

250	257

251 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(	258 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(

252 StackInfo(frame, callstack.priority))	259 StackInfo(frame, callstack.priority))

253	260

254 return dep_to_file_to_stack_infos	261 return dep_to_file_to_stack_infos

255	262

256	263

257 # TODO(katesonia): Remove the repository argument after refatoring cl committed.	264 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
	Martin Barbella 2016/12/29 21:51:05 Is this now done, or was additional refactoring pl Is this now done, or was additional refactoring planned? Sharu Jiang 2016/12/29 21:58:20 No, this TODO is about moving this function to be Show quoted text On 2016/12/29 21:51:05, Martin Barbella wrote: > Is this now done, or was additional refactoring planned? No, this TODO is about moving this function to be a method of ChangelistClassifier.
258 def FindSuspects(dep_to_file_to_changelogs,	265 def FindSuspects(dep_to_file_to_changelogs,

259 dep_to_file_to_stack_infos,	266 dep_to_file_to_stack_infos,

260 stack_deps, repository,	267 stack_deps, get_repository,

261 ignore_cls=None):	268 ignore_cls=None):

262 """Finds suspects by matching stacktrace and changelogs in regression range.	269 """Finds suspects by matching stacktrace and changelogs in regression range.

263	270

264 This method only applies to those crashes with regression range.	271 This method only applies to those crashes with regression range.

265	272

266 Args:	273 Args:

267 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	274 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

268 to ChangeLogs that touched this file.	275 to ChangeLogs that touched this file.

269 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path	276 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

270 to a list of stack information of this file. A file may occur in several	277 to a list of stack information of this file. A file may occur in several

271 frames, one stack info consist of a StackFrame and the callstack priority	278 frames, one stack info consist of a StackFrame and the callstack priority

272 of it.	279 of it.

273 stack_deps (dict): Represents all the dependencies shown in the crash stack.	280 stack_deps (dict): Represents all the dependencies shown in the crash stack.

274 repository (Repository): Repository to get changelogs and blame from.	281 get_repository (callable): a function from urls to ``Repository``

	282 objects, so we can get changelogs and blame for each dep.

275 ignore_cls (set): Set of reverted revisions.	283 ignore_cls (set): Set of reverted revisions.

276	284

277 Returns:	285 Returns:

278 A list of ``Suspect`` instances with confidence and reason unset.	286 A list of ``Suspect`` instances with confidence and reason unset.

279 """	287 """

280 suspects = SuspectMap(ignore_cls)	288 suspects = SuspectMap(ignore_cls)

281	289

282 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():	290 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():

283 file_to_changelogs = dep_to_file_to_changelogs[dep]	291 file_to_changelogs = dep_to_file_to_changelogs[dep]

284	292

285 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():	293 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():

286 for touched_file_path, changelogs in file_to_changelogs.iteritems():	294 for touched_file_path, changelogs in file_to_changelogs.iteritems():

287 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):	295 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):

288 continue	296 continue

289	297

290 repository.repo_url = stack_deps[dep].repo_url	298 repository = get_repository(stack_deps[dep].repo_url)

291 blame = repository.GetBlame(touched_file_path,	299 blame = repository.GetBlame(touched_file_path,

292 stack_deps[dep].revision)	300 stack_deps[dep].revision)

293	301

294 # Generate/update each suspect(changelog) in changelogs, blame is used	302 # Generate/update each suspect(changelog) in changelogs, blame is used

295 # to calculate distance between touched lines and crashed lines in file.	303 # to calculate distance between touched lines and crashed lines in file.

296 suspects.GenerateSuspects(	304 suspects.GenerateSuspects(

297 touched_file_path, dep, stack_infos, changelogs, blame)	305 touched_file_path, dep, stack_infos, changelogs, blame)

298	306

299 return suspects.values()	307 return suspects.values()

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »