appengine/findit/crash/suspect.py - Issue 2588513002: [Predator] renamed "Result" to "Suspect"

Side by Side Diff: appengine/findit/crash/suspect.py

Issue 2588513002: [Predator] renamed "Result" to "Suspect" (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« appengine/findit/crash/loglinear/changelist_features/test/min_distance_test.py ('K') | « appengine/findit/crash/scorers/test/top_frame_index_test.py ('k') | appengine/findit/crash/test/changelist_classifier_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from collections import namedtuple	5 from collections import namedtuple

6	6

7	7

8 # TODO(wrengr): we should change things to use integers with None as	8 # TODO(wrengr): we should change things to use integers with None as

9 # \"infinity\", rather than using floats.	9 # \"infinity\", rather than using floats.

10 # TODO(http://crbug.com/644476): this class needs a better name.	10 # TODO(http://crbug.com/644476): this class needs a better name.

(...skipping 12 matching lines...) Expand all Loading...
23 # than needing this intermediate object.	23 # than needing this intermediate object.

24 # TODO(http://crbug.com/644476): this class needs a better name.	24 # TODO(http://crbug.com/644476): this class needs a better name.

25 class StackInfo(namedtuple('StackInfo', ['frame', 'priority'])):	25 class StackInfo(namedtuple('StackInfo', ['frame', 'priority'])):

26 """Pair of a frame and the ``priority`` of the ``CallStack`` it came from."""	26 """Pair of a frame and the ``priority`` of the ``CallStack`` it came from."""

27 __slots__ = ()	27 __slots__ = ()

28	28

29 def __str__(self): # pragma: no cover	29 def __str__(self): # pragma: no cover

30 return 'StackInfo(frame = %s, priority = %f)' % (self.frame, self.priority)	30 return 'StackInfo(frame = %s, priority = %f)' % (self.frame, self.priority)

31	31

32	32

33 # TODO(http://crbug.com/644476): this class needs a better name.	33 # TODO(wrengr): break this into separate unanalyzed suspect, and analyzed

34 class Result(object):	34 # suspect; so we can distinguish the input to ``ChangelistClassifier``

35 """Represents findit culprit result."""	35 # from the output of it (which will amend each suspect with extra metadata

	36 # like the confidence and reasons).

	37 class Suspect(object):

	38 """A suspected changelog to be classified as a possible ``Culprit``.

	39

	40 That is, for each ``CrashReport`` the ``Predator.FindCulprit`` method

	41 receives, it will generate a bunch of these suspects and then inspect

	42 them to determine the ``Culprit`` it returns.

	43 """

36	44

37 def __init__(self, changelog, dep_path,	45 def __init__(self, changelog, dep_path,

38 confidence=None, reasons=None, changed_files=None):	46 confidence=None, reasons=None, changed_files=None):

39 assert isinstance(confidence, (int, float, type(None))), TypeError(	47 assert isinstance(confidence, (int, float, type(None))), TypeError(

40 'In the ``confidence`` argument to the Result constructor, '	48 'In the ``confidence`` argument to the Result constructor, '

41 'expected a number or None, but got a %s object instead.'	49 'expected a number or None, but got a %s object instead.'

42 % confidence.__class__.__name__)	50 % confidence.__class__.__name__)

43 self.changelog = changelog	51 self.changelog = changelog

44 self.dep_path = dep_path	52 self.dep_path = dep_path

45 self.confidence = None if confidence is None else float(confidence)	53 self.confidence = None if confidence is None else float(confidence)

46 self.reasons = reasons	54 self.reasons = reasons

47 self.changed_files = changed_files	55 self.changed_files = changed_files

48	56

49 # TODO(wrengr): (a) make these two fields private/readonly	57 # TODO(wrengr): (a) make these two fields private/readonly

50 # TODO(wrengr): (b) zip them together.	58 # TODO(wrengr): (b) zip them together.

	59 # TODO(wrengr): (c) move them to the relevant features instead.

51 self.file_to_stack_infos = {}	60 self.file_to_stack_infos = {}

52 self.file_to_analysis_info = {}	61 self.file_to_analysis_info = {}

53	62

54 def ToDict(self):	63 def ToDict(self):

55 return {	64 return {

56 'url': self.changelog.commit_url,	65 'url': self.changelog.commit_url,

57 'review_url': self.changelog.code_review_url,	66 'review_url': self.changelog.code_review_url,

58 'revision': self.changelog.revision,	67 'revision': self.changelog.revision,

59 'project_path': self.dep_path,	68 'project_path': self.dep_path,

60 'author': self.changelog.author_email,	69 'author': self.changelog.author_email,

(...skipping 21 matching lines...) Expand all Loading...
82	91

83 lines.append('Changed file %s crashed in %s' % (	92 lines.append('Changed file %s crashed in %s' % (

84 file_path, ', '.join(line_parts)))	93 file_path, ', '.join(line_parts)))

85	94

86 return '\n'.join(lines)	95 return '\n'.join(lines)

87	96

88 def __str__(self):	97 def __str__(self):

89 return self.ToString()	98 return self.ToString()

90	99

91	100

92 class MatchResult(Result):	101 def _UpdateSuspect(suspect, file_path, stack_infos, blame):

93 """Represents findit culprit result got from match algorithm."""	102 """Updates a ``Suspect`` with file path and its stack_infos and blame.

94	103

95 def Update(self, file_path, stack_infos, blame):	104 When a file_path is found both shown in stacktrace and touched by

96 """Updates a match result with file path and its stack_infos and blame.	105 the revision of this result, update result with the information of

	106 this file.

97	107

98 When a file_path is found both shown in stacktrace and touched by	108 Inserts the file path and its stack infos, and updates the min distance

99 the revision of this result, update result with the information of	109 if less distance is found between touched lines of this result and

100 this file.	110 crashed lines in the file path.

101	111

102 Inserts the file path and its stack infos, and updates the min distance	112 Args:

103 if less distance is found between touched lines of this result and	113 suspect (Suspect): the suspect to be updated.

104 crashed lines in the file path.	114 file_path (str): File path of the crashed file.

	115 stack_infos (list of StackInfo): List of the frames of this file

	116 together with their callstack priorities.

	117 blame (Blame): Blame oject of this file.

	118 """

	119 suspect.file_to_stack_infos[file_path] = stack_infos

105	120

106 Args:	121 if not blame:

107 file_path (str): File path of the crashed file.	122 return

108 stack_infos (list of StackInfo): List of the frames of this file

109 together with their callstack priorities.

110 blame (Blame): Blame oject of this file.

111 """

112 self.file_to_stack_infos[file_path] = stack_infos

113	123

114 if not blame:	124 min_distance = float('inf')

115 return	125 min_distance_frame = stack_infos[0][0]

	126 for region in blame:

	127 if region.revision != suspect.changelog.revision:

	128 continue

116	129

117 min_distance = float('inf')	130 region_start = region.start

118 min_distance_frame = stack_infos[0][0]	131 region_end = region_start + region.count - 1

119 for region in blame:	132 for frame, _ in stack_infos:

120 if region.revision != self.changelog.revision:	133 frame_start = frame.crashed_line_numbers[0]

121 continue	134 frame_end = frame.crashed_line_numbers[-1]

	135 distance = _DistanceBetweenLineRanges((frame_start, frame_end),

	136 (region_start, region_end))

	137 if distance < min_distance:

	138 min_distance = distance

	139 min_distance_frame = frame

122	140

123 region_start = region.start	141 suspect.file_to_analysis_info[file_path] = AnalysisInfo(

124 region_end = region_start + region.count - 1	142 min_distance = min_distance,

125 for frame, _ in stack_infos:	143 min_distance_frame = min_distance_frame,

126 frame_start = frame.crashed_line_numbers[0]	144 )

127 frame_end = frame.crashed_line_numbers[-1]

128 distance = _DistanceBetweenLineRanges((frame_start, frame_end),

129 (region_start, region_end))

130 if distance < min_distance:

131 min_distance = distance

132 min_distance_frame = frame

133

134 self.file_to_analysis_info[file_path] = AnalysisInfo(

135 min_distance = min_distance,

136 min_distance_frame = min_distance_frame,

137 )

138	145

139	146

140 def _DistanceBetweenLineRanges((start1, end1), (start2, end2)):	147 def _DistanceBetweenLineRanges((start1, end1), (start2, end2)):

141 """Given two ranges, compute the (unsigned) distance between them.	148 """Given two ranges, compute the (unsigned) distance between them.

142	149

143 Args:	150 Args:

144 start1: the start of the first range	151 start1: the start of the first range

145 end1: the end of the first range. Must be greater than start1.	152 end1: the end of the first range. Must be greater than start1.

146 start2: the start of the second range	153 start2: the start of the second range

147 end2: the end of the second range. Must be greater than start2.	154 end2: the end of the second range. Must be greater than start2.

148	155

149 Returns:	156 Returns:

150 If the end of the earlier range comes before the start of the later	157 If the end of the earlier range comes before the start of the later

151 range, then the difference between those points. Otherwise, returns	158 range, then the difference between those points. Otherwise, returns

152 zero (because the ranges overlap)."""	159 zero (because the ranges overlap)."""

153 assert end1 >= start1, ValueError(	160 assert end1 >= start1, ValueError(

154 'the first range is empty: %d < %d' % (end1, start1))	161 'the first range is empty: %d < %d' % (end1, start1))

155 assert end2 >= start2, ValueError(	162 assert end2 >= start2, ValueError(

156 'the second range is empty: %d < %d' % (end2, start2))	163 'the second range is empty: %d < %d' % (end2, start2))

157 # There are six possible cases, but in all the cases where the two	164 # There are six possible cases, but in all the cases where the two

158 # ranges overlap, the latter two differences will be negative.	165 # ranges overlap, the latter two differences will be negative.

159 return max(0, start2 - end1, start1 - end2)	166 return max(0, start2 - end1, start1 - end2)

160	167

161	168

162 class MatchResults(dict):	169 class Suspects(dict):

163 """A map from revisions to the MatchResult object for that revision."""	170 """A map from revisions to the ``Suspect`` object for that revision."""

164	171

165 def __init__(self, ignore_cls=None):	172 def __init__(self, ignore_cls=None):

166 super(MatchResults, self).__init__()	173 super(Suspects, self).__init__()

167 self._ignore_cls = ignore_cls	174 self._ignore_cls = ignore_cls

168	175

169 def GenerateMatchResults(self, file_path, dep_path,	176 def GenerateSuspects(self, file_path, dep_path, stack_infos, changelogs,

170 stack_infos, changelogs, blame):	177 blame):
	stgao 2016/12/19 18:30:31 nit: not aligned nit: not aligned wrengr 2016/12/19 18:33:42 Follows the 4-space indentation rule, which is all Show quoted text On 2016/12/19 18:30:31, stgao (slow on Monday) wrote: > nit: not aligned Follows the 4-space indentation rule, which is allowed in as an alternative to the alignment rule.
171 """Compute match results from a list of CLs, and store them.	178 """Compute suspects from a list of CLs, and store them.

172	179

173 Match results are generated based on newly found file path, its stack_infos,	180 Suspects are generated based on newly found file path, its stack_infos,

174 and all the changelogs that touched this file in the dep in regression	181 and all the changelogs that touched this file in the dep in regression

175 ranges, those reverted changelogs should be ignored.	182 ranges, those reverted changelogs should be ignored.

176	183

177 Args:	184 Args:

178 file_path (str): File path of the crashed file.	185 file_path (str): File path of the crashed file.

179 dep_path (str): Path of the dependency of the file.	186 dep_path (str): Path of the dependency of the file.

180 stack_infos (list): List of stack_info dicts, represents frames of this	187 stack_infos (list): List of stack_info dicts, represents frames of this

181 file and the callstack priorities of those frames.	188 file and the callstack priorities of those frames.

182 changelogs (list): List of Changelog objects in the dep in regression	189 changelogs (list): List of Changelog objects in the dep in regression

183 range which touched the file.	190 range which touched the file.

184 blame (Blame): Blame of the file.	191 blame (Blame): Blame of the file.

185 """	192 """

186 for changelog in changelogs:	193 for changelog in changelogs:

187 if self._ignore_cls and changelog.revision in self._ignore_cls:	194 if self._ignore_cls and changelog.revision in self._ignore_cls:

188 continue	195 continue

189	196

190 if changelog.revision not in self:	197 if changelog.revision not in self:

191 self[changelog.revision] = MatchResult(changelog, dep_path)	198 self[changelog.revision] = Suspect(changelog, dep_path)

192	199

193 match_result = self[changelog.revision]	200 _UpdateSuspect(self[changelog.revision], file_path, stack_infos, blame)

194 match_result.Update(file_path, stack_infos, blame)

OLD	NEW