appengine/findit/crash/changelist_classifier.py - Issue 2605943002: Removing the mutation in the factories for getting dep repositories

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2605943002: Removing the mutation in the factories for getting dep repositories (Closed)

Patch Set: Added the Factory method to CachedGitilesRepository Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « appengine/findit/common/test/chrome_dependency_fetcher_test.py ('k') | appengine/findit/crash/crash_pipeline.py » ('j') | appengine/findit/crash/crash_pipeline.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 from collections import defaultdict	6 from collections import defaultdict

7 from collections import namedtuple	7 from collections import namedtuple

8	8

9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher	9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

10 from crash import crash_util	10 from crash import crash_util

11 from crash.suspect import StackInfo	11 from crash.suspect import StackInfo

12 from crash.suspect import Suspect	12 from crash.suspect import Suspect

13 from crash.suspect import SuspectMap	13 from crash.suspect import SuspectMap

14 from crash.scorers.aggregated_scorer import AggregatedScorer	14 from crash.scorers.aggregated_scorer import AggregatedScorer

15 from crash.scorers.min_distance import MinDistance	15 from crash.scorers.min_distance import MinDistance

16 from crash.scorers.top_frame_index import TopFrameIndex	16 from crash.scorers.top_frame_index import TopFrameIndex

17 from crash.stacktrace import CallStack	17 from crash.stacktrace import CallStack

18 from crash.stacktrace import Stacktrace	18 from crash.stacktrace import Stacktrace

19 from libs.gitiles.diff import ChangeType	19 from libs.gitiles.diff import ChangeType

20	20

21	21

22 class ChangelistClassifier(namedtuple('ChangelistClassifier',	22 class ChangelistClassifier(namedtuple('ChangelistClassifier',

23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])):	23 ['get_repository', 'top_n_results', 'confidence_threshold'])):

24 __slots__ = ()	24 __slots__ = ()

25	25

26 def __new__(cls, repository, get_repository, top_n_results=3,	26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999):

27 confidence_threshold=0.999):

28 """Args:	27 """Args:

29 repository (Repository): the Git repository of the main project

30 we're trying to classify CLs from.

31 get_repository (callable): a function from DEP urls to ``Repository``	28 get_repository (callable): a function from DEP urls to ``Repository``

32 objects, so we can get changelogs and blame for each dep. Notably,	29 objects, so we can get changelogs and blame for each dep. Notably,

33 to keep the code here generic, we make no assumptions about	30 to keep the code here generic, we make no assumptions about

34 which subclass of ``Repository`` this function returns. Thus,	31 which subclass of ``Repository`` this function returns. Thus,

35 it is up to the caller to decide what class to return and handle	32 it is up to the caller to decide what class to return and handle

36 any other arguments that class may require (e.g., an http client	33 any other arguments that class may require (e.g., an http client

37 for ``GitilesRepository``).	34 for ``GitilesRepository``).

38 top_n_results (int): maximum number of results to return.	35 top_n_results (int): maximum number of results to return.

39 confidence_threshold (float): In [0,1], above which we only return	36 confidence_threshold (float): In [0,1], above which we only return

40 the first suspect.	37 the first suspect.

41 """	38 """

42 return super(cls, ChangelistClassifier).__new__(	39 return super(cls, ChangelistClassifier).__new__(

43 cls, repository, get_repository, top_n_results, confidence_threshold)	40 cls, get_repository, top_n_results, confidence_threshold)

44	41

45 def __str__(self): # pragma: no cover	42 def __str__(self): # pragma: no cover

46 return ('%s(top_n_results=%d, confidence_threshold=%g)'	43 return ('%s(top_n_results=%d, confidence_threshold=%g)'

47 % (self.__class__.__name__,	44 % (self.__class__.__name__,

48 self.top_n_results,	45 self.top_n_results,

49 self.confidence_threshold))	46 self.confidence_threshold))

50	47

51 def __call__(self, report):	48 def __call__(self, report):

52 """Finds changelists suspected of being responsible for the crash report.	49 """Finds changelists suspected of being responsible for the crash report.

53	50

54 This function assumes the report's stacktrace has already had any necessary	51 This function assumes the report's stacktrace has already had any necessary

55 preprocessing (like filtering or truncating) applied.	52 preprocessing (like filtering or truncating) applied.

56	53

57 Args:	54 Args:

58 report (CrashReport): the report to be analyzed.	55 report (CrashReport): the report to be analyzed.

59	56

60 Returns:	57 Returns:

61 List of ``Suspect``s, sorted by confidence from highest to lowest.	58 List of ``Suspect``s, sorted by confidence from highest to lowest.

62 """	59 """

63 if not report.regression_range:	60 if not report.regression_range:

64 logging.warning('ChangelistClassifier.__call__: Missing regression range '	61 logging.warning('ChangelistClassifier.__call__: Missing regression range '

65 'for report: %s', str(report))	62 'for report: %s', str(report))

66 return []	63 return []

67 last_good_version, first_bad_version = report.regression_range	64 last_good_version, first_bad_version = report.regression_range

68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',	65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

69 last_good_version, first_bad_version)	66 last_good_version, first_bad_version)

70	67

71 dependency_fetcher = ChromeDependencyFetcher(self.repository)	68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository)

72	69

73 # We are only interested in the deps in crash stack (the callstack that	70 # We are only interested in the deps in crash stack (the callstack that

74 # caused the crash).	71 # caused the crash).

75 # TODO(wrengr): we may want to receive the crash deps as an argument,	72 # TODO(wrengr): we may want to receive the crash deps as an argument,

76 # so that when this method is called via Findit.FindCulprit, we avoid	73 # so that when this method is called via Findit.FindCulprit, we avoid

77 # doing redundant work creating it.	74 # doing redundant work creating it.

78 stack_deps = GetDepsInCrashStack(	75 stack_deps = GetDepsInCrashStack(

79 report.stacktrace.crash_stack,	76 report.stacktrace.crash_stack,

80 dependency_fetcher.GetDependency(	77 dependency_fetcher.GetDependency(

81 report.crashed_version, report.platform))	78 report.crashed_version, report.platform))

82	79

83 # Get dep and file to changelogs, stack_info and blame dicts.	80 # Get dep and file to changelogs, stack_info and blame dicts.

84 dep_rolls = dependency_fetcher.GetDependencyRollsDict(	81 dep_rolls = dependency_fetcher.GetDependencyRollsDict(

85 last_good_version, first_bad_version, report.platform)	82 last_good_version, first_bad_version, report.platform)

86	83

87 # Regression of a dep added/deleted (old_revision/new_revision is None) can	84 # Regression of a dep added/deleted (old_revision/new_revision is None) can

88 # not be known for sure and this case rarely happens, so just filter them	85 # not be known for sure and this case rarely happens, so just filter them

89 # out.	86 # out.

90 regression_deps_rolls = {}	87 regression_deps_rolls = {}

91 for dep_path, dep_roll in dep_rolls.iteritems():	88 for dep_path, dep_roll in dep_rolls.iteritems():

92 if not dep_roll.old_revision or not dep_roll.new_revision:	89 if not dep_roll.old_revision or not dep_roll.new_revision:

93 logging.info('Skip %s denpendency %s',	90 logging.info('Skip %s denpendency %s',

94 'added' if dep_roll.new_revision else 'deleted', dep_path)	91 'added' if dep_roll.new_revision else 'deleted', dep_path)

95 continue	92 continue

96 regression_deps_rolls[dep_path] = dep_roll	93 regression_deps_rolls[dep_path] = dep_roll

97	94

98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(	95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

99 regression_deps_rolls, stack_deps, self.repository)	96 regression_deps_rolls, stack_deps, self.get_repository)

100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(	97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

101 report.stacktrace, stack_deps)	98 report.stacktrace, stack_deps)

102	99

103 suspects = FindSuspects(dep_to_file_to_changelogs,	100 suspects = FindSuspects(dep_to_file_to_changelogs,

104 dep_to_file_to_stack_infos,	101 dep_to_file_to_stack_infos,

105 stack_deps, self.get_repository, ignore_cls)	102 stack_deps, self.get_repository, ignore_cls)

106 if not suspects:	103 if not suspects:

107 return []	104 return []

108	105

109 # Set confidence, reasons, and changed_files.	106 # Set confidence, reasons, and changed_files.

(...skipping 21 matching lines...) Expand all Loading...
131 stack_deps = {}	128 stack_deps = {}

132 for frame in crash_stack:	129 for frame in crash_stack:

133 if frame.dep_path:	130 if frame.dep_path:

134 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]	131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]

135	132

136 return stack_deps	133 return stack_deps

137	134

138	135

139 # TODO(katesonia): Remove the repository argument after refatoring cl committed.	136 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

140 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,	137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

141 repository):	138 get_repository):

142 """Gets a dict containing files touched by changelogs for deps in stack_deps.	139 """Gets a dict containing files touched by changelogs for deps in stack_deps.

143	140

144 Regression ranges for each dep is determined by regression_deps_rolls.	141 Regression ranges for each dep is determined by regression_deps_rolls.

145 Changelogs which were reverted are returned in a reverted_cls set.	142 Changelogs which were reverted are returned in a reverted_cls set.

146	143

147 Args:	144 Args:

148 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in	145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

149 regression range.	146 regression range.

150 stack_deps (dict): Represents all the dependencies shown in	147 stack_deps (dict): Represents all the dependencies shown in

151 the crash stack.	148 the crash stack.

152 repository (Repository): Repository to get changelogs from.	149 get_repository (callable): a function from DEP urls to ``Repository``

	150 objects, so we can get changelogs and blame for each dep. Notably,

	151 to keep the code here generic, we make no assumptions about

	152 which subclass of ``Repository`` this function returns. Thus,

	153 it is up to the caller to decide what class to return and handle

	154 any other arguments that class may require (e.g., an http client

	155 for ``GitilesRepository``).

153	156

154 Returns:	157 Returns:

155 A tuple (dep_to_file_to_changelogs, reverted_cls).	158 A tuple (dep_to_file_to_changelogs, reverted_cls).

156	159

157 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

158 to ChangeLogs that touched this file.	161 to ChangeLogs that touched this file.

159 For example:	162 For example:

160 {	163 {

161 'src/': {	164 'src/': {

162 'a.cc': [	165 'a.cc': [

(...skipping 29 matching lines...) Expand all Loading...
192 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))	195 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))

193 reverted_cls = set()	196 reverted_cls = set()

194	197

195 for dep in stack_deps:	198 for dep in stack_deps:

196 # If a dep is not in regression range, than it cannot be the dep of	199 # If a dep is not in regression range, than it cannot be the dep of

197 # culprits.	200 # culprits.

198 dep_roll = regression_deps_rolls.get(dep)	201 dep_roll = regression_deps_rolls.get(dep)

199 if not dep_roll:	202 if not dep_roll:

200 continue	203 continue

201	204

202 repository.repo_url = dep_roll.repo_url	205 repository = get_repository(dep_roll.repo_url)

203 changelogs = repository.GetChangeLogs(dep_roll.old_revision,	206 changelogs = repository.GetChangeLogs(dep_roll.old_revision,

204 dep_roll.new_revision)	207 dep_roll.new_revision)

205	208

206 for changelog in changelogs or []:	209 for changelog in changelogs or []:

207 # When someone reverts, we need to skip both the CL doing	210 # When someone reverts, we need to skip both the CL doing

208 # the reverting as well as the CL that got reverted. If	211 # the reverting as well as the CL that got reverted. If

209 # ``reverted_revision`` is true, then this CL reverts another one,	212 # ``reverted_revision`` is true, then this CL reverts another one,

210 # so we skip it and save the CL it reverts in ``reverted_cls`` to	213 # so we skip it and save the CL it reverts in ``reverted_cls`` to

211 # be filtered out later.	214 # be filtered out later.

212 if changelog.reverted_revision:	215 if changelog.reverted_revision:

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
298 repository = get_repository(stack_deps[dep].repo_url)	301 repository = get_repository(stack_deps[dep].repo_url)

299 blame = repository.GetBlame(touched_file_path,	302 blame = repository.GetBlame(touched_file_path,

300 stack_deps[dep].revision)	303 stack_deps[dep].revision)

301	304

302 # Generate/update each suspect(changelog) in changelogs, blame is used	305 # Generate/update each suspect(changelog) in changelogs, blame is used

303 # to calculate distance between touched lines and crashed lines in file.	306 # to calculate distance between touched lines and crashed lines in file.

304 suspects.GenerateSuspects(	307 suspects.GenerateSuspects(

305 touched_file_path, dep, stack_infos, changelogs, blame)	308 touched_file_path, dep, stack_infos, changelogs, blame)

306	309

307 return suspects.values()	310 return suspects.values()

OLD	NEW