appengine/findit/crash/findit_for_crash.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Side by Side Diff: appengine/findit/crash/findit_for_crash.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Finally fixed the mock tests! Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 from collections import defaultdict

6

7 from common.diff import ChangeType

8 from common.git_repository import GitRepository

9 from common.http_client_appengine import HttpClientAppengine

10 from crash import crash_util

11 from crash.stacktrace import CallStack

12 from crash.stacktrace import Stacktrace

13 from crash.results import MatchResults

14 from crash.scorers.aggregated_scorer import AggregatedScorer

15 from crash.scorers.min_distance import MinDistance

16 from crash.scorers.top_frame_index import TopFrameIndex

17

18

19 def GetDepsInCrashStack(crash_stack, crash_deps):

20 """Gets Dependencies in crash stack."""

21 if not crash_stack:

22 return {}

23

24 stack_deps = {}

25 for frame in crash_stack:

26 if frame.dep_path:

27 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]

28

29 return stack_deps

30

31

32 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

33 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

34 repository):

35 """Gets a dict containing files touched by changelogs for deps in stack_deps.

36

37 Regression ranges for each dep is determined by regression_deps_rolls.

38 Those changelogs got reverted should be returned in a ignore_cls set.

39

40 Args:

41 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

42 regression range.

43 stack_deps (dict): Represents all the dependencies shown in

44 the crash stack.

45 repository (Repository): Repository to get changelogs from.

46

47 Returns:

48 A tuple (dep_to_file_to_changelogs, ignore_cls).

49

50 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

51 to ChangeLogs that touched this file.

52 For example:

53 {

54 'src/': {

55 'a.cc': [

56 ChangeLog.FromDict({

57 'author_name': 'test@chromium.org',

58 'message': 'dummy',

59 'committer_email': 'example@chromium.org',

60 'commit_position': 175976,

61 'author_email': 'example@chromium.org',

62 'touched_files': [

63 {

64 'change_type': 'add',

65 'new_path': 'a.cc',

66 'old_path': 'b/a.cc'

67 },

68 ...

69 ],

70 'author_time': 'Thu Mar 31 21:24:43 2016',

71 'committer_time': 'Thu Mar 31 21:28:39 2016',

72 'commit_url':

73 'https://repo.test/+/bcfd',

74 'code_review_url': 'https://codereview.chromium.org/3281',

75 'committer_name': 'example@chromium.org',

76 'revision': 'bcfd',

77 'reverted_revision': None

78 }),

79 ]

80 }

81 }

82

83 ignore_cls (set): A set of reverted revisions.

84 """

85 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))

86 ignore_cls = set()

87

88 for dep in stack_deps:

89 # If a dep is not in regression range, than it cannot be the dep of

90 # culprits.

91 if dep not in regression_deps_rolls:

92 continue

93

94 dep_roll = regression_deps_rolls[dep]

95

96 repository.repo_url = dep_roll.repo_url

97 changelogs = repository.GetChangeLogs(dep_roll.old_revision,

98 dep_roll.new_revision)

99

100 for changelog in changelogs:

101 if changelog.reverted_revision:

102 # Skip reverting cls and add reverted revisions to ignore_cls to later

103 # filter those reverted revisions.

104 ignore_cls.add(changelog.reverted_revision)

105 continue

106

107 for touched_file in changelog.touched_files:

108 if touched_file.change_type == ChangeType.DELETE:

109 continue

110

111 dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)

112

113 return dep_to_file_to_changelogs, ignore_cls

114

115

116 def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):

117 """Gets a dict containing all the stack information of files in stacktrace.

118

119 Only gets stack informations for files grouped by deps in stack_deps.

120

121 Args:

122 stacktrace (Stacktrace): Parsed stacktrace object.

123 stack_deps (dict): Represents all the dependencies show in

124 the crash stack.

125

126 Returns:

127 A dict, maps dep path to a dict mapping file path to a list of stack

128 inforamtion of this file. A file may occur in several frames, one stack info

129 consist of a StackFrame and the callstack priority of it.

130

131 For example:

132 {

133 'src/': {

134 'a.cc': [

135 (StackFrame(0, 'src/', '', 'func', 'a.cc', [1]), 0),

136 (StackFrame(2, 'src/', '', 'func', 'a.cc', [33]), 0),

137 ]

138 }

139 }

140 """

141 dep_to_file_to_stack_infos = defaultdict(lambda: defaultdict(list))

142

143 for callstack in stacktrace:

144 for frame in callstack:

145 # We only care about those dependencies in crash stack.

146 if frame.dep_path not in stack_deps:

147 continue

148

149 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((

150 frame, callstack.priority))

151

152 return dep_to_file_to_stack_infos

153

154

155 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

156 def FindMatchResults(dep_to_file_to_changelogs,

157 dep_to_file_to_stack_infos,

158 stack_deps, repository,

159 ignore_cls=None):

160 """Finds results by matching stacktrace and changelogs in regression range.

161

162 This method only applies to those crashes with regression range.

163

164 Args:

165 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

166 to ChangeLogs that touched this file.

167 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

168 to a list of stack inforamtion of this file. A file may occur in several

169 frames, one stack info consist of a StackFrame and the callstack priority

170 of it.

171 stack_deps (dict): Represents all the dependencies shown in the crash stack.

172 repository (Repository): Repository to get changelogs and blame from.

173 ignore_cls (set): Set of reverted revisions.

174

175 Returns:

176 A list of MatchResult instances with confidence and reason unset.

177 """

178 match_results = MatchResults(ignore_cls)

179

180 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():

181 file_to_changelogs = dep_to_file_to_changelogs[dep]

182 repository.repo_url = stack_deps[dep].repo_url

183

184 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():

185 for touched_file_path, changelogs in file_to_changelogs.iteritems():

186 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):

187 continue

188

189 blame = repository.GetBlame(crashed_file_path,

190 stack_deps[dep].revision)

191

192 # Generate/update each result(changelog) in changelogs, blame is used

193 # to calculate distance between touched lines and crashed lines in file.

194 match_results.GenerateMatchResults(

195 crashed_file_path, dep, stack_infos, changelogs, blame)

196

197 return match_results.values()

198

199

200 # TODO(katesonia): Remove the repository argument after refatoring cl committed.

201 def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n,

202 repository):

203 """Finds culprit results for crash.

204

205 Args:

206 stacktrace (Stactrace): Parsed Stactrace object.

207 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

208 regression range.

209 crashed_deps (dict of Dependencys): Represents all the dependencies of

210 crashed revision.

211 top_n (int): Top n frames of each stack to be analyzed.

212 repository (Repository): Repository to get changelogs and blame from.

213

214 Returns:

215 List of Results, sorted by confidence from highest to lowest.

216 """

217 if not regression_deps_rolls:

218 return []

219

220 # Findit will only analyze the top n frames in each callstacks.

221 stack_trace = Stacktrace([

222 CallStack(stack.priority,

223 format_type=stack.format_type,

224 language_type=stack.language_type,

225 frame_list=stack[:top_n])

226 for stack in stacktrace])

227

228 # We are only interested in the deps in crash stack (the callstack that

229 # caused the crash).

230 stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)

231

232 # Get dep and file to changelogs, stack_info and blame dicts.

233 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

234 regression_deps_rolls, stack_deps, repository)

235 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

236 stack_trace, stack_deps)

237

238 results = FindMatchResults(dep_to_file_to_changelogs,

239 dep_to_file_to_stack_infos,

240 stack_deps, repository, ignore_cls)

241

242 if not results:

243 return []

244

245 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

246

247 # Set result.confidence, result.reasons and result.changed_files.

248 map(aggregated_scorer.Score, results)

249

250 # Filter all the 0 confidence results.

251 results = filter(lambda r: r.confidence != 0, results)

252 if not results:

253 return []

254

255 sorted_results = sorted(results, key=lambda r: -r.confidence)

256

257 if sorted_results[0].confidence > 0.999:

258 return sorted_results[:1]

259

260 return sorted_results[:3]

OLD	NEW

« no previous file with comments | « appengine/findit/crash/findit_for_clusterfuzz.py ('k') | appengine/findit/crash/occurrence.py » ('j') | no next file with comments »