appengine/findit/crash/suspect.py - Issue 2707603002: [Predator] Generate all changelogs in regression ranges instead of only matched changelogs

Side by Side Diff: appengine/findit/crash/suspect.py

Issue 2707603002: [Predator] Generate all changelogs in regression ranges instead of only matched changelogs (Closed)

Patch Set: . Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from collections import namedtuple	5 from collections import namedtuple

6	6

7 from libs.gitiles.commit_util import DistanceBetweenLineRanges	7 from libs.gitiles.commit_util import DistanceBetweenLineRanges

8	8

9	9

10 # TODO(wrengr): we should change things to use integers with None as

11 # \"infinity\", rather than using floats.

12 # TODO(http://crbug.com/644476): this class needs a better name.

13 class AnalysisInfo(namedtuple('AnalysisInfo',

14 ['min_distance', 'min_distance_frame'])):

15 __slots__ = ()

16

17 def __str__(self): # pragma: no cover

18 return ('%s(min_distance = %d, min_distance_frame = %s)'

19 % (self.__class__.__name__, self.min_distance, self.min_distance_frame))

20

21

22 # TODO(wrengr): it's not clear why the ``priority`` is stored at all,

23 # given that every use in this file discards it. ``Result.file_to_stack_infos``

24 # should just store pointers directly to the frames themselves rather

25 # than needing this intermediate object.

26 # TODO(http://crbug.com/644476): this class needs a better name.

27 class StackInfo(namedtuple('StackInfo', ['frame', 'priority'])):

28 """Pair of a frame and the ``priority`` of the ``CallStack`` it came from."""

29 __slots__ = ()

30

31 def __str__(self): # pragma: no cover

32 return ('%s(frame = %s, priority = %f)'

33 % (self.__class__.__name__, self.frame, self.priority))

34

35

36 # TODO(wrengr): maybe break this into separate unanalyzed suspect,	10 # TODO(wrengr): maybe break this into separate unanalyzed suspect,

37 # and analyzed suspect; so we can distinguish the input to	11 # and analyzed suspect; so we can distinguish the input to

38 # ``ChangelistClassifier`` from the output of it (which will amend each	12 # ``ChangelistClassifier`` from the output of it (which will amend each

39 # suspect with extra metadata like the confidence and reasons).	13 # suspect with extra metadata like the confidence and reasons).

40 class Suspect(object):	14 class Suspect(object):

41 """A suspected changelog to be classified as a possible ``Culprit``.	15 """A suspected changelog to be classified as a possible ``Culprit``.

42	16

43 That is, for each ``CrashReport`` the ``Predator.FindCulprit`` method	17 That is, for each ``CrashReport`` the ``Predator.FindCulprit`` method

44 receives, it will generate a bunch of these suspects and then inspect	18 receives, it will generate a bunch of these suspects and then inspect

45 them to determine the ``Culprit`` it returns.	19 them to determine the ``Culprit`` it returns.

46 """	20 """

47	21

48 def __init__(self, changelog, dep_path,	22 def __init__(self, changelog, dep_path,

49 confidence=None, reasons=None, changed_files=None):	23 confidence=None, reasons=None, changed_files=None):

50 if not isinstance(confidence, (int, float, type(None))): # pragma: no cover	24 if not isinstance(confidence, (int, float, type(None))): # pragma: no cover

51 raise TypeError(	25 raise TypeError(

52 'In the ``confidence`` argument to the Result constructor, '	26 'In the ``confidence`` argument to the Result constructor, '

53 'expected a number or None, but got a %s object instead.'	27 'expected a number or None, but got a %s object instead.'

54 % confidence.__class__.__name__)	28 % confidence.__class__.__name__)

55 self.changelog = changelog	29 self.changelog = changelog

56 self.dep_path = dep_path	30 self.dep_path = dep_path

57 self.confidence = None if confidence is None else float(confidence)	31 self.confidence = None if confidence is None else float(confidence)

58 self.reasons = reasons	32 self.reasons = reasons

59 self.changed_files = changed_files	33 self.changed_files = changed_files

60	34

61 # TODO(katesonia): These file_to_* should be deprecated once we deprecate

62 # the scorer-based changelist_classifier.

63 # TODO(wrengr): (a) make these two fields private/readonly

64 # TODO(wrengr): (b) zip them together.

65 # TODO(wrengr): (c) move them to the relevant features instead.

66 self.file_to_stack_infos = {}

67 self.file_to_analysis_info = {}

68

69 def ToDict(self):	35 def ToDict(self):

70 return {	36 return {

71 'url': self.changelog.commit_url,	37 'url': self.changelog.commit_url,

72 'review_url': self.changelog.code_review_url,	38 'review_url': self.changelog.code_review_url,

73 'revision': self.changelog.revision,	39 'revision': self.changelog.revision,

74 'project_path': self.dep_path,	40 'project_path': self.dep_path,

75 'author': self.changelog.author.email,	41 'author': self.changelog.author.email,

76 'time': str(self.changelog.author.time),	42 'time': str(self.changelog.author.time),

77 'reasons': self.reasons,	43 'reasons': self.reasons,

78 'changed_files': self.changed_files,	44 'changed_files': self.changed_files,

79 'confidence': self.confidence,	45 'confidence': self.confidence,

80 }	46 }

81	47

82 # TODO(katesonia): This is unusable for logging because in all the

83 # cases that need logging it returns the empty string! We should print

84 # this out in a more useful way (e.g., how CrashConfig is printed)

85 # so that callers don't have to use ``str(result.ToDict())`` instead. If

86 # we want a method that does what this one does, we should give it a

87 # different name that indicates what it's actually printing out.

88 def ToString(self):	48 def ToString(self):

89 if not self.file_to_stack_infos:	49 return str(self.ToDict())

90 return ''

91

92 lines = []

93 for file_path, stack_infos in self.file_to_stack_infos.iteritems():

94 line_parts = []

95 for frame, _ in stack_infos:

96 line_parts.append('frame #%d' % frame.index)

97

98 lines.append('Changed file %s crashed in %s' % (

99 file_path, ', '.join(line_parts)))

100

101 return '\n'.join(lines)

102	50

103 def __str__(self):	51 def __str__(self):

104 return self.ToString()	52 return self.ToString()

105

106

107 def _UpdateSuspect(self, file_path, stack_infos, blame):

108 """Updates the ``Suspect`` with file path and its stack_infos and blame.

109

110 When a file_path is found both shown in stacktrace and touched by

111 the revision of this result, update result with the information of

112 this file.

113

114 Inserts the file path and its stack infos, and updates the min distance

115 if less distance is found between touched lines of this result and

116 crashed lines in the file path.

117

118 Args:

119 file_path (str): File path of the crashed file.

120 stack_infos (list of StackInfo): List of the frames of this file

121 together with their callstack priorities.

122 blame (Blame): Blame oject of this file.

123 """

124 self.file_to_stack_infos[file_path] = stack_infos

125

126 if not blame:

127 return

128

129 min_distance = float('inf')

130 min_distance_frame = stack_infos[0].frame

131 for region in blame:

132 if region.revision != self.changelog.revision:

133 continue

134

135 region_start = region.start

136 region_end = region_start + region.count - 1

137 for frame, _ in stack_infos:

138 frame_start = frame.crashed_line_numbers[0]

139 frame_end = frame.crashed_line_numbers[-1]

140 distance = DistanceBetweenLineRanges((frame_start, frame_end),

141 (region_start, region_end))

142 if distance < min_distance:

143 min_distance = distance

144 min_distance_frame = frame

145

146 self.file_to_analysis_info[file_path] = AnalysisInfo(

147 min_distance = min_distance,

148 min_distance_frame = min_distance_frame,

149 )

150

151

152 class SuspectMap(dict):

153 """A map from revisions to the ``Suspect`` object for that revision."""

154

155 def __init__(self, ignore_cls=None):

156 super(SuspectMap, self).__init__()

157 self._ignore_cls = ignore_cls

158

159 def GenerateSuspects(self, file_path, dep_path, stack_infos, changelogs,

160 blame):

161 """Compute suspects from a list of CLs, and store them in this map.

162

163 Suspects are generated based on newly found file path, its stack_infos,

164 and all the changelogs that touched this file in the dep in regression

165 ranges, those reverted changelogs should be ignored.

166

167 Args:

168 file_path (str): File path of the crashed file.

169 dep_path (str): Path of the dependency of the file.

170 stack_infos (list): List of stack_info dicts, represents frames of this

171 file and the callstack priorities of those frames.

172 changelogs (list): List of Changelog objects in the dep in regression

173 range which touched the file.

174 blame (Blame): Blame of the file.

175 """

176 for changelog in changelogs:

177 if self._ignore_cls and changelog.revision in self._ignore_cls:

178 continue

179

180 try:

181 suspect = self[changelog.revision]

182 except KeyError:

183 suspect = Suspect(changelog, dep_path)

184 self[changelog.revision] = suspect

185

186 suspect._UpdateSuspect(file_path, stack_infos, blame)

OLD	NEW

« no previous file with comments | « appengine/findit/crash/stacktrace.py ('k') | appengine/findit/crash/test/changelist_classifier_test.py » ('j') | no next file with comments »