appengine/findit/crash/changelist_classifier.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Finally fixed the mock tests! Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 import logging

5 from collections import defaultdict	6 from collections import defaultdict

6	7

	8 from common import chrome_dependency_fetcher

7 from common.diff import ChangeType	9 from common.diff import ChangeType

8 from common.git_repository import GitRepository	10 from common.git_repository import GitRepository

9 from common.http_client_appengine import HttpClientAppengine	11 from common.http_client_appengine import HttpClientAppengine

10 from crash import crash_util	12 from crash import crash_util

11 from crash.stacktrace import CallStack

12 from crash.stacktrace import Stacktrace

13 from crash.results import MatchResults	13 from crash.results import MatchResults

14 from crash.scorers.aggregated_scorer import AggregatedScorer	14 from crash.scorers.aggregated_scorer import AggregatedScorer

15 from crash.scorers.min_distance import MinDistance	15 from crash.scorers.min_distance import MinDistance

16 from crash.scorers.top_frame_index import TopFrameIndex	16 from crash.scorers.top_frame_index import TopFrameIndex

	17 from crash.stacktrace import CallStack

	18 from crash.stacktrace import Stacktrace

	19

	20 # TODO(wrengr): make this a namedtuple.

	21 class ChangelistClassifier(object):

	22 def __init__(self, repository,

	23 top_n_frames, top_n_results=3, confidence_threshold=0.999):

	24 """Args:

	25 repository (Repository): the Git repository for getting CLs to classify.

	26 top_n_frames (int): how many frames of each callstack to look at.

	27 top_n_results (int): maximum number of results to return.

	28 confidence_threshold (float): In [0,1], above which we only return

	29 the first result.

	30 """

	31 self._repository = repository

	32 self.top_n_frames = top_n_frames

	33 self.top_n_results = top_n_results

	34 self.confidence_threshold = confidence_threshold

	35

	36 def __str__(self): # pragma: no cover

	37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'

	38 % (self.__class__.__name__,

	39 self.top_n_frames,

	40 self.top_n_results,

	41 self.confidence_threshold))

	42

	43 def __call__(self, report):

	44 """Finds changelists suspected of being responsible for the crash report.

	45

	46 Args:

	47 report (CrashReport): the report to be analyzed.

	48

	49 Returns:

	50 List of Results, sorted by confidence from highest to lowest.

	51 """

	52 if not report.regression_range:

	53 logging.warning('ChangelistClassifier.__call__: Missing regression range '

	54 'for report: %s', str(report))

	55 return []

	56 last_good_version, first_bad_version = report.regression_range

	57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

	58 last_good_version, first_bad_version)

	59

	60 # Restrict analysis to just the top n frames in each callstack.

	61 # TODO(wrengr): move this to be a Stacktrace method?

	62 stacktrace = Stacktrace([

	63 CallStack(stack.priority,

	64 format_type=stack.format_type,

	65 language_type=stack.language_type,

	66 frame_list=stack[:self.top_n_frames])

	67 for stack in report.stacktrace])

	68

	69 # We are only interested in the deps in crash stack (the callstack that

	70 # caused the crash).

	71 # TODO(wrengr): we may want to receive the crash deps as an argument,

	72 # so that when this method is called via Findit.FindCulprit, we avoid

	73 # doing redundant work creating it.

	74 stack_deps = GetDepsInCrashStack(

	75 report.stacktrace.crash_stack,

	76 chrome_dependency_fetcher.ChromeDependencyFetcher(self._repository

	77 ).GetDependency(report.crashed_version, report.platform))

	78

	79 # Get dep and file to changelogs, stack_info and blame dicts.

	80 regression_deps_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher(

	81 self._repository).GetDependencyRollsDict(

	82 last_good_version, first_bad_version, report.platform)

	83 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

	84 regression_deps_rolls, stack_deps, self._repository)

	85 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

	86 stacktrace, stack_deps)

	87

	88 # TODO: argument order is inconsistent from others. Repository should

	89 # be last argument.

	90 results = FindMatchResults(dep_to_file_to_changelogs,

	91 dep_to_file_to_stack_infos,

	92 stack_deps, self._repository, ignore_cls)

	93 if not results:

	94 return []

	95

	96 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.

	97 # Set result.confidence, result.reasons and result.changed_files.

	98 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

	99 map(aggregated_scorer.Score, results)

	100

	101 # Filter all the 0 confidence results.

	102 results = filter(lambda r: r.confidence != 0, results)

	103 if not results:

	104 return []

	105

	106 sorted_results = sorted(results, key=lambda r: -r.confidence)

	107

	108 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold

	109 else self.top_n_results)

	110

	111 return sorted_results[:max_results]

17	112

18	113

19 def GetDepsInCrashStack(crash_stack, crash_deps):	114 def GetDepsInCrashStack(crash_stack, crash_deps):

20 """Gets Dependencies in crash stack."""	115 """Gets Dependencies in crash stack."""

21 if not crash_stack:	116 if not crash_stack:

22 return {}	117 return {}

23	118

24 stack_deps = {}	119 stack_deps = {}

25 for frame in crash_stack:	120 for frame in crash_stack:

26 if frame.dep_path:	121 if frame.dep_path:

27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]	122 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]

28	123

29 return stack_deps	124 return stack_deps

30	125

31	126

32 # TODO(katesonia): Remove the repository argument after refatoring cl committed.	127 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

33 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,	128 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

34 repository):	129 repository):

35 """Gets a dict containing files touched by changelogs for deps in stack_deps.	130 """Gets a dict containing files touched by changelogs for deps in stack_deps.

36	131

37 Regression ranges for each dep is determined by regression_deps_rolls.	132 Regression ranges for each dep is determined by regression_deps_rolls.

38 Those changelogs got reverted should be returned in a ignore_cls set.	133 Changelogs which were reverted are returned in a reverted_cls set.

39	134

40 Args:	135 Args:

41 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in	136 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

42 regression range.	137 regression range.

43 stack_deps (dict): Represents all the dependencies shown in	138 stack_deps (dict): Represents all the dependencies shown in

44 the crash stack.	139 the crash stack.

45 repository (Repository): Repository to get changelogs from.	140 repository (Repository): Repository to get changelogs from.

46	141

47 Returns:	142 Returns:

48 A tuple (dep_to_file_to_changelogs, ignore_cls).	143 A tuple (dep_to_file_to_changelogs, reverted_cls).

49	144

50 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	145 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

51 to ChangeLogs that touched this file.	146 to ChangeLogs that touched this file.

52 For example:	147 For example:

53 {	148 {

54 'src/': {	149 'src/': {

55 'a.cc': [	150 'a.cc': [

56 ChangeLog.FromDict({	151 ChangeLog.FromDict({

57 'author_name': 'test@chromium.org',	152 'author_name': 'test@chromium.org',

58 'message': 'dummy',	153 'message': 'dummy',

(...skipping 14 matching lines...) Expand all Loading...
73 'https://repo.test/+/bcfd',	168 'https://repo.test/+/bcfd',

74 'code_review_url': 'https://codereview.chromium.org/3281',	169 'code_review_url': 'https://codereview.chromium.org/3281',

75 'committer_name': 'example@chromium.org',	170 'committer_name': 'example@chromium.org',

76 'revision': 'bcfd',	171 'revision': 'bcfd',

77 'reverted_revision': None	172 'reverted_revision': None

78 }),	173 }),

79 ]	174 ]

80 }	175 }

81 }	176 }

82	177

83 ignore_cls (set): A set of reverted revisions.	178 reverted_cls (set): A set of reverted revisions.

84 """	179 """

85 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))	180 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))

86 ignore_cls = set()	181 reverted_cls = set()

87	182

88 for dep in stack_deps:	183 for dep in stack_deps:

89 # If a dep is not in regression range, than it cannot be the dep of	184 # If a dep is not in regression range, than it cannot be the dep of

90 # culprits.	185 # culprits.

91 if dep not in regression_deps_rolls:	186 dep_roll = regression_deps_rolls.get(dep)

	187 if not dep_roll:

92 continue	188 continue

93	189

94 dep_roll = regression_deps_rolls[dep]	190 dep_roll = regression_deps_rolls[dep]

95	191

96 repository.repo_url = dep_roll.repo_url	192 repository.repo_url = dep_roll.repo_url

97 changelogs = repository.GetChangeLogs(dep_roll.old_revision,	193 changelogs = repository.GetChangeLogs(dep_roll.old_revision,

98 dep_roll.new_revision)	194 dep_roll.new_revision)

99	195

100 for changelog in changelogs:	196 for changelog in changelogs:

	197 # When someone reverts, we need to skip both the CL doing

	198 # the reverting as well as the CL that got reverted. If

	199 # \|reverted_revision\| is true, then this CL reverts another one,

	200 # so we skip it and save the CL it reverts in \|reverted_cls\| to

	201 # be filtered out later.

101 if changelog.reverted_revision:	202 if changelog.reverted_revision:

102 # Skip reverting cls and add reverted revisions to ignore_cls to later	203 reverted_cls.add(changelog.reverted_revision)

103 # filter those reverted revisions.

104 ignore_cls.add(changelog.reverted_revision)

105 continue	204 continue

106	205

107 for touched_file in changelog.touched_files:	206 for touched_file in changelog.touched_files:

108 if touched_file.change_type == ChangeType.DELETE:	207 if touched_file.change_type == ChangeType.DELETE:

109 continue	208 continue

110	209

111 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)	210 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)

112	211

113 return dep_to_file_to_changelogs, ignore_cls	212 return dep_to_file_to_changelogs, reverted_cls

114	213

115	214

116 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):	215 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):

117 """Gets a dict containing all the stack information of files in stacktrace.	216 """Gets a dict containing all the stack information of files in stacktrace.

118	217

119 Only gets stack informations for files grouped by deps in stack_deps.	218 Only gets stack informations for files grouped by deps in stack_deps.

120	219

121 Args:	220 Args:

122 stacktrace (Stacktrace): Parsed stacktrace object.	221 stacktrace (Stacktrace): Parsed stacktrace object.

123 stack_deps (dict): Represents all the dependencies show in	222 stack_deps (dict): Represents all the dependencies show in

124 the crash stack.	223 the crash stack.

125	224

126 Returns:	225 Returns:

127 A dict, maps dep path to a dict mapping file path to a list of stack	226 A dict, maps dep path to a dict mapping file path to a list of stack

128 inforamtion of this file. A file may occur in several frames, one stack info	227 information of this file. A file may occur in several frames, one

129 consist of a StackFrame and the callstack priority of it.	228 stack info consist of a StackFrame and the callstack priority of it.

130	229

131 For example:	230 For example:

132 {	231 {

133 'src/': {	232 'src/': {

134 'a.cc': [	233 'a.cc': [

135 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),	234 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),

136 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),	235 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),

137 ]	236 ]

138 }	237 }

139 }	238 }

(...skipping 18 matching lines...) Expand all Loading...
158 stack_deps, repository,	257 stack_deps, repository,

159 ignore_cls=None):	258 ignore_cls=None):

160 """Finds results by matching stacktrace and changelogs in regression range.	259 """Finds results by matching stacktrace and changelogs in regression range.

161	260

162 This method only applies to those crashes with regression range.	261 This method only applies to those crashes with regression range.

163	262

164 Args:	263 Args:

165 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	264 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

166 to ChangeLogs that touched this file.	265 to ChangeLogs that touched this file.

167 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path	266 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

168 to a list of stack inforamtion of this file. A file may occur in several	267 to a list of stack information of this file. A file may occur in several

169 frames, one stack info consist of a StackFrame and the callstack priority	268 frames, one stack info consist of a StackFrame and the callstack priority

170 of it.	269 of it.

171 stack_deps (dict): Represents all the dependencies shown in the crash stack.	270 stack_deps (dict): Represents all the dependencies shown in the crash stack.

172 repository (Repository): Repository to get changelogs and blame from.	271 repository (Repository): Repository to get changelogs and blame from.

173 ignore_cls (set): Set of reverted revisions.	272 ignore_cls (set): Set of reverted revisions.

174	273

175 Returns:	274 Returns:

176 A list of MatchResult instances with confidence and reason unset.	275 A list of MatchResult instances with confidence and reason unset.

177 """	276 """

178 match_results = MatchResults(ignore_cls)	277 match_results = MatchResults(ignore_cls)

179	278

180 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():	279 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():

181 file_to_changelogs = dep_to_file_to_changelogs[dep]	280 file_to_changelogs = dep_to_file_to_changelogs[dep]

182 repository.repo_url = stack_deps[dep].repo_url	281 repository.repo_url = stack_deps[dep].repo_url

183	282

184 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():	283 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():

185 for touched_file_path, changelogs in file_to_changelogs.iteritems():	284 for touched_file_path, changelogs in file_to_changelogs.iteritems():

186 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):	285 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):

187 continue	286 continue

188	287

189 blame = repository.GetBlame(crashed_file_path,	288 blame = repository.GetBlame(crashed_file_path,

190 stack_deps[dep].revision)	289 stack_deps[dep].revision)

191	290

192 # Generate/update each result(changelog) in changelogs, blame is used	291 # Generate/update each result(changelog) in changelogs, blame is used

193 # to calculate distance between touched lines and crashed lines in file.	292 # to calculate distance between touched lines and crashed lines in file.

194 match_results.GenerateMatchResults(	293 match_results.GenerateMatchResults(

195 crashed_file_path, dep, stack_infos, changelogs, blame)	294 crashed_file_path, dep, stack_infos, changelogs, blame)

196	295

197 return match_results.values()	296 return match_results.values()

198

199

200 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

201 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n,

202 repository):

203 """Finds culprit results for crash.

204

205 Args:

206 stacktrace (Stactrace): Parsed Stactrace object.

207 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

208 regression range.

209 crashed_deps (dict of Dependencys): Represents all the dependencies of

210 crashed revision.

211 top_n (int): Top n frames of each stack to be analyzed.

212 repository (Repository): Repository to get changelogs and blame from.

213

214 Returns:

215 List of Results, sorted by confidence from highest to lowest.

216 """

217 if not regression_deps_rolls:

218 return []

219

220 # Findit will only analyze the top n frames in each callstacks.

221 stack_trace = Stacktrace([

222 CallStack(stack.priority,

223 format_type=stack.format_type,

224 language_type=stack.language_type,

225 frame_list=stack[:top_n])

226 for stack in stacktrace])

227

228 # We are only interested in the deps in crash stack (the callstack that

229 # caused the crash).

230 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)

231

232 # Get dep and file to changelogs, stack_info and blame dicts.

233 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

234 regression_deps_rolls, stack_deps, repository)

235 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

236 stack_trace, stack_deps)

237

238 results = FindMatchResults(dep_to_file_to_changelogs,

239 dep_to_file_to_stack_infos,

240 stack_deps, repository, ignore_cls)

241

242 if not results:

243 return []

244

245 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

246

247 # Set result.confidence, result.reasons and result.changed_files.

248 map(aggregated_scorer.Score, results)

249

250 # Filter all the 0 confidence results.

251 results = filter(lambda r: r.confidence != 0, results)

252 if not results:

253 return []

254

255 sorted_results = sorted(results, key=lambda r: -r.confidence)

256

257 if sorted_results[0].confidence > 0.999:

258 return sorted_results[:1]

259

260 return sorted_results[:3]

OLD	NEW

« no previous file with comments | « appengine/findit/crash/azalea.py ('k') | appengine/findit/crash/classifier.py » ('j') | no next file with comments »