appengine/findit/crash/changelist_classifier.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: trying to fix some tests Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 import logging

5 from collections import defaultdict	6 from collections import defaultdict

6	7

	8 from common import chromium_deps

7 from common.diff import ChangeType	9 from common.diff import ChangeType

8 from common.git_repository import GitRepository	10 from common.git_repository import GitRepository

9 from common.http_client_appengine import HttpClientAppengine	11 from common.http_client_appengine import HttpClientAppengine

10 from crash import crash_util	12 from crash import crash_util

11 from crash.stacktrace import CallStack

12 from crash.stacktrace import Stacktrace

13 from crash.results import MatchResults	13 from crash.results import MatchResults

14 from crash.scorers.aggregated_scorer import AggregatedScorer	14 from crash.scorers.aggregated_scorer import AggregatedScorer

15 from crash.scorers.min_distance import MinDistance	15 from crash.scorers.min_distance import MinDistance

16 from crash.scorers.top_frame_index import TopFrameIndex	16 from crash.scorers.top_frame_index import TopFrameIndex

	17 from crash.stacktrace import CallStack

	18 from crash.stacktrace import Stacktrace

	19

	20 class ChangelistClassifier(object):

	21 def __init__(self, top_n_frames, top_n_results=3, confidence_threshold=0.999):

	22 """Args:

	23 top_n_frames (int): how many frames of each callstack to look at.

	24 top_n_results (int): maximum number of results to return.

	25 confidence_threshold (float): In [0,1], above which we only return

	26 the first result.

	27 """

	28 self.top_n_frames = top_n_frames

	29 self.top_n_results = top_n_results

	30 self.confidence_threshold = confidence_threshold

	31

	32 def __call__(self, report):

	33 """Finds changelists suspected of being responsible for the crash report.

	34

	35 Args:

	36 report (CrashReport): the report to be analyzed.

	37

	38 Returns:

	39 List of Results, sorted by confidence from highest to lowest.

	40 """

	41 if not report.regression_range:

	42 return []

	43 last_good_version, first_bad_version = report.regression_range

	44 logging.info('Regression range %s:%s', last_good_version, first_bad_version)

	45

	46 # Restrict analysis to just the top n frames in each callstack.

	47 # TODO(wrengr): move this to be a Stacktrace method?

	48 stacktrace = Stacktrace([

	49 CallStack(stack.priority,

	50 format_type=stack.format_type,

	51 language_type=stack.language_type,

	52 frame_list=stack[:self.top_n_frames])

	53 for stack in report.stacktrace])

	54

	55 # We are only interested in the deps in crash stack (the callstack that

	56 # caused the crash).

	57 # TODO(wrengr): we may want to receive the crash deps as an argument,

	58 # so that when this method is called via Findit.FindCulprit, we avoid

	59 # doing redundant work creating it.

	60 stack_deps = GetDepsInCrashStack(report.stacktrace.crash_stack,

	61 chromium_deps.GetChromeDependency(

	62 report.crashed_version, report.platform))

	63

	64 # Get dep and file to changelogs, stack_info and blame dicts.

	65 regression_deps_rolls = chromium_deps.GetDEPSRollsDict(

	66 last_good_version, first_bad_version, report.platform)

	67 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

	68 regression_deps_rolls, stack_deps)

	69 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

	70 stacktrace, stack_deps)

	71

	72 results = FindMatchResults(dep_to_file_to_changelogs,

	73 dep_to_file_to_stack_infos,

	74 stack_deps, ignore_cls)

	75 if not results:

	76 return []

	77

	78 # TODO(wrengr): we should be able to do this map/filter/sort in one pass.

	79 # Set result.confidence, result.reasons and result.changed_files.

	80 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

	81 map(aggregated_scorer.Score, results)

	82

	83 # Filter all the 0 confidence results.

	84 results = filter(lambda r: r.confidence != 0, results)

	85 if not results:

	86 return []

	87

	88 sorted_results = sorted(results, key=lambda r: -r.confidence)

	89

	90 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold

	91 else self.top_n_results)

	92

	93 return sorted_results[:max_results]

17	94

18	95

19 def GetDepsInCrashStack(crash_stack, crash_deps):	96 def GetDepsInCrashStack(crash_stack, crash_deps):

20 """Gets Dependencies in crash stack."""	97 """Gets Dependencies in crash stack."""

21 if not crash_stack:	98 if not crash_stack:

22 return {}	99 return {}

23	100

24 stack_deps = {}	101 stack_deps = {}

25 for frame in crash_stack:	102 for frame in crash_stack:

26 if frame.dep_path:	103 if frame.dep_path:

27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]	104 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]

28	105

29 return stack_deps	106 return stack_deps

30	107

31	108 # TODO(wrengr): come up with a design to clean up these

	109 # FooForFilesGroupedByDeps functions.

32 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):	110 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps):

33 """Gets a dict containing files touched by changelogs for deps in stack_deps.	111 """Gets a dict containing files touched by changelogs for deps in stack_deps.

34	112

35 Regression ranges for each dep is determined by regression_deps_rolls.	113 Regression ranges for each dep is determined by regression_deps_rolls.

36 Those changelogs got reverted should be returned in a ignore_cls set.	114 Changelogs which were reverted are returned in a reverted_cls set.

37	115

38 Args:	116 Args:

39 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in	117 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

40 regression range.	118 regression range.

41 stack_deps (dict): Represents all the dependencies shown in	119 stack_deps (dict): Represents all the dependencies shown in

42 the crash stack.	120 the crash stack.

43	121

44 Returns:	122 Returns:

45 A tuple (dep_to_file_to_changelogs, ignore_cls).	123 A tuple (dep_to_file_to_changelogs, reverted_cls).

46	124

47 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	125 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

48 to ChangeLogs that touched this file.	126 to ChangeLogs that touched this file.

49 For example:	127 For example:

50 {	128 {

51 'src/': {	129 'src/': {

52 'a.cc': [	130 'a.cc': [

53 ChangeLog.FromDict({	131 ChangeLog.FromDict({

54 'author_name': 'test@chromium.org',	132 'author_name': 'test@chromium.org',

55 'message': 'dummy',	133 'message': 'dummy',

(...skipping 14 matching lines...) Expand all Loading...
70 'https://repo.test/+/bcfd',	148 'https://repo.test/+/bcfd',

71 'code_review_url': 'https://codereview.chromium.org/3281',	149 'code_review_url': 'https://codereview.chromium.org/3281',

72 'committer_name': 'example@chromium.org',	150 'committer_name': 'example@chromium.org',

73 'revision': 'bcfd',	151 'revision': 'bcfd',

74 'reverted_revision': None	152 'reverted_revision': None

75 }),	153 }),

76 ]	154 ]

77 }	155 }

78 }	156 }

79	157

80 ignore_cls (set): A set of reverted revisions.	158 reverted_cls (set): A set of reverted revisions.

81 """	159 """

82 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))	160 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))

83 ignore_cls = set()	161 reverted_cls = set()

84	162

85 for dep in stack_deps:	163 for dep in stack_deps:

86 # If a dep is not in regression range, than it cannot be the dep of	164 # If a dep is not in regression range, than it cannot be the dep of

87 # culprits.	165 # culprits.

88 if dep not in regression_deps_rolls:	166 dep_roll = regression_deps_rolls.get(dep)

	167 if not dep_roll:

89 continue	168 continue

90	169

91 dep_roll = regression_deps_rolls[dep]

92

93 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())	170 git_repository = GitRepository(dep_roll.repo_url, HttpClientAppengine())

94 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,	171 changelogs = git_repository.GetChangeLogs(dep_roll.old_revision,

95 dep_roll.new_revision)	172 dep_roll.new_revision)

96	173

97 for changelog in changelogs:	174 for changelog in changelogs:

	175 # When someone reverts, we need to skip both the CL doing

	176 # the reverting as well as the CL that got reverted. If

	177 # \|reverted_revision\| is true, then this CL reverts another one,

	178 # so we skip it and save the CL it reverts in \|reverted_cls\| to

	179 # be filtered out later.

98 if changelog.reverted_revision:	180 if changelog.reverted_revision:

99 # Skip reverting cls and add reverted revisions to ignore_cls to later	181 reverted_cls.add(changelog.reverted_revision)

100 # filter those reverted revisions.

101 ignore_cls.add(changelog.reverted_revision)

102 continue	182 continue

103	183

104 for touched_file in changelog.touched_files:	184 for touched_file in changelog.touched_files:

105 if touched_file.change_type == ChangeType.DELETE:	185 if touched_file.change_type == ChangeType.DELETE:

106 continue	186 continue

107	187

108 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)	188 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)

109	189

110 return dep_to_file_to_changelogs, ignore_cls	190 return dep_to_file_to_changelogs, reverted_cls

111	191

112	192

113 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):	193 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):

114 """Gets a dict containing all the stack information of files in stacktrace.	194 """Gets a dict containing all the stack information of files in stacktrace.

115	195

116 Only gets stack informations for files grouped by deps in stack_deps.	196 Only gets stack informations for files grouped by deps in stack_deps.

117	197

118 Args:	198 Args:

119 stacktrace (Stacktrace): Parsed stacktrace object.	199 stacktrace (Stacktrace): Parsed stacktrace object.

120 stack_deps (dict): Represents all the dependencies show in	200 stack_deps (dict): Represents all the dependencies show in

121 the crash stack.	201 the crash stack.

122	202

123 Returns:	203 Returns:

124 A dict, maps dep path to a dict mapping file path to a list of stack	204 A dict, maps dep path to a dict mapping file path to a list of stack

125 inforamtion of this file. A file may occur in several frames, one stack info	205 information of this file. A file may occur in several frames, one

126 consist of a StackFrame and the callstack priority of it.	206 stack info consist of a StackFrame and the callstack priority of it.

127	207

128 For example:	208 For example:

129 {	209 {

130 'src/': {	210 'src/': {

131 'a.cc': [	211 'a.cc': [

132 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),	212 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),

133 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),	213 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),

134 ]	214 ]

135 }	215 }

136 }	216 }

(...skipping 17 matching lines...) Expand all Loading...
154 stack_deps,	234 stack_deps,

155 ignore_cls=None):	235 ignore_cls=None):

156 """Finds results by matching stacktrace and changelogs in regression range.	236 """Finds results by matching stacktrace and changelogs in regression range.

157	237

158 This method only applies to those crashes with regression range.	238 This method only applies to those crashes with regression range.

159	239

160 Args:	240 Args:

161 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path	241 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

162 to ChangeLogs that touched this file.	242 to ChangeLogs that touched this file.

163 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path	243 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

164 to a list of stack inforamtion of this file. A file may occur in several	244 to a list of stack information of this file. A file may occur in several

165 frames, one stack info consist of a StackFrame and the callstack priority	245 frames, one stack info consist of a StackFrame and the callstack priority

166 of it.	246 of it.

167 stack_deps (dict): Represents all the dependencies shown in the crash stack.	247 stack_deps (dict): Represents all the dependencies shown in the crash stack.

168 ignore_cls (set): Set of reverted revisions.	248 ignore_cls (set): Set of reverted revisions.

169	249

170 Returns:	250 Returns:

171 A list of MatchResult instances with confidence and reason unset.	251 A list of MatchResult instances with confidence and reason unset.

172 """	252 """

173 match_results = MatchResults(ignore_cls)	253 match_results = MatchResults(ignore_cls)

174	254

175 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():	255 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():

176 file_to_changelogs = dep_to_file_to_changelogs[dep]	256 file_to_changelogs = dep_to_file_to_changelogs[dep]

177 git_repository = GitRepository(stack_deps[dep].repo_url,	257 git_repository = GitRepository(stack_deps[dep].repo_url,

178 HttpClientAppengine())	258 HttpClientAppengine())

179	259

180 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():	260 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():

181 for touched_file_path, changelogs in file_to_changelogs.iteritems():	261 for touched_file_path, changelogs in file_to_changelogs.iteritems():

182 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):	262 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):

183 continue	263 continue

184	264

185 blame = git_repository.GetBlame(crashed_file_path,	265 blame = git_repository.GetBlame(crashed_file_path,

186 stack_deps[dep].revision)	266 stack_deps[dep].revision)

187	267

188 # Generate/update each result(changelog) in changelogs, blame is used	268 # Generate/update each result(changelog) in changelogs, blame is used

189 # to calculate distance between touched lines and crashed lines in file.	269 # to calculate distance between touched lines and crashed lines in file.

190 match_results.GenerateMatchResults(	270 match_results.GenerateMatchResults(

191 crashed_file_path, dep, stack_infos, changelogs, blame)	271 crashed_file_path, dep, stack_infos, changelogs, blame)

192	272

193 return match_results.values()	273 return match_results.values()

194

195

196 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n):

197 """Finds culprit results for crash.

198

199 Args:

200 stacktrace (Stactrace): Parsed Stactrace object.

201 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

202 regression range.

203 crashed_deps (dict of Dependencys): Represents all the dependencies of

204 crashed revision.

205 top_n (int): Top n frames of each stack to be analyzed.

206

207 Returns:

208 List of Results, sorted by confidence from highest to lowest.

209 """

210 if not regression_deps_rolls:

211 return []

212

213 # Findit will only analyze the top n frames in each callstacks.

214 stack_trace = Stacktrace([

215 CallStack(stack.priority,

216 format_type=stack.format_type,

217 language_type=stack.language_type,

218 frame_list=stack[:top_n])

219 for stack in stacktrace])

220

221 # We are only interested in the deps in crash stack (the callstack that

222 # caused the crash).

223 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)

224

225 # Get dep and file to changelogs, stack_info and blame dicts.

226 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

227 regression_deps_rolls, stack_deps)

228 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

229 stack_trace, stack_deps)

230

231 results = FindMatchResults(dep_to_file_to_changelogs,

232 dep_to_file_to_stack_infos,

233 stack_deps, ignore_cls)

234

235 if not results:

236 return []

237

238 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

239

240 # Set result.confidence, result.reasons and result.changed_files.

241 map(aggregated_scorer.Score, results)

242

243 # Filter all the 0 confidence results.

244 results = filter(lambda r: r.confidence != 0, results)

245 if not results:

246 return []

247

248 sorted_results = sorted(results, key=lambda r: -r.confidence)

249

250 if sorted_results[0].confidence > 0.999:

251 return sorted_results[:1]

252

253 return sorted_results[:3]

OLD	NEW

« appengine/findit/crash/azalea.py ('K') | « appengine/findit/crash/azalea.py ('k') | appengine/findit/crash/classifier.py » ('j') | appengine/findit/crash/crash_pipeline.py » ('J')