Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: appengine/findit/crash/loglinear/changelist_features/min_distance.py

Issue 2704843002: [Predator] Add TouchCrashedDirectory feature. (Closed)
Patch Set: . Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import math 6 import math
7 7
8 from crash.loglinear.changelist_classifier import StackInfo 8 from crash.crash_match import FrameInfo
9 from crash.loglinear.feature import ChangedFile 9 from crash.loglinear.feature import ChangedFile
10 from crash.loglinear.feature import Feature 10 from crash.loglinear.feature import Feature
11 from crash.loglinear.feature import FeatureValue 11 from crash.loglinear.feature import FeatureValue
12 from crash.loglinear.feature import LogLinearlyScaled 12 from crash.loglinear.feature import LogLinearlyScaled
13 from libs.gitiles.diff import ChangeType 13 from libs.gitiles.diff import ChangeType
14 import libs.math.logarithms as lmath 14 import libs.math.logarithms as lmath
15 15
16 16
17 class ModifiedFrameInfo(object): 17 class Distance(object):
18 """Represents the closest frame to a changelog which modified it. 18 """Represents the closest frame to a changelog which modified it.
19 19
20 The "closest" means that the distance between crashed lines in the frame and 20 The "closest" means that the distance between crashed lines in the frame and
21 touched lines in a changelog is minimum. 21 touched lines in a changelog is minimum.
22 22
23 Properties: 23 Properties:
24 distance (int or float('inf')): The distance between crashed lines and 24 distance (int or float('inf')): The distance between crashed lines and
25 touched lines, if a changelog doesn't show in blame of the crashed file of 25 touched lines, if a changelog doesn't show in blame of the crashed file of
26 the crashed version (either it didn't touch the crashed file or it got 26 the crashed version (either it didn't touch the crashed file or it got
27 overwritten by other cls), the distance would be infinite. 27 overwritten by other cls), the distance would be infinite.
28 frame (StackFrame): The frame which got modified. 28 frame (StackFrame): The frame which has the minimum distance to touched
29 lines.
29 """ 30 """
30 31
31 def __init__(self, distance, frame): 32 def __init__(self, distance, frame):
32 self.distance = distance 33 self.distance = distance
33 self.frame = frame 34 self.frame = frame
34 35
35 def Update(self, distance, frame): 36 def Update(self, distance, frame):
36 if distance < self.distance: 37 if distance < self.distance:
37 self.distance = distance 38 self.distance = distance
38 self.frame = frame 39 self.frame = frame
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 Args: 99 Args:
99 maximum (float): An upper bound on the min_distance to consider. 100 maximum (float): An upper bound on the min_distance to consider.
100 """ 101 """
101 self._get_repository = get_repository 102 self._get_repository = get_repository
102 self._maximum = maximum 103 self._maximum = maximum
103 104
104 @property 105 @property
105 def name(self): 106 def name(self):
106 return 'MinDistance' 107 return 'MinDistance'
107 108
108 def DistanceBetweenTouchedFileAndStacktrace( 109 def DistanceBetweenTouchedFileAndFrameInfos(
109 self, revision, touched_file, stack_infos, crash_dependency): 110 self, revision, touched_file, frame_infos, crash_dependency):
110 """Gets ``ModifiedFrameInfo`` between touched and crashed lines in a file. 111 """Gets ``Distance`` between touched and crashed lines in a file.
111 112
112 Args: 113 Args:
113 revision (str): The revision of the suspect. 114 revision (str): The revision of the suspect.
114 touched_file (FileChangeInfo): The file touched by the suspect. 115 touched_file (FileChangeInfo): The file touched by the suspect.
115 stack_infos (list of StackInfos): List of information of frames in the 116 frame_infos (list of FrameInfos): List of information of frames in the
116 stacktrace which contains ``touched_file``. 117 stacktrace which contains ``touched_file``.
117 crash_dependency (Dependency): The depedency of crashed revision. N.B. The 118 crash_dependency (Dependency): The depedency of crashed revision. N.B. The
118 crashed revision is the revision where crash happens, however the 119 crashed revision is the revision where crash happens, however the
119 first parameter ``revision`` is the revision of the suspect cl, which 120 first parameter ``revision`` is the revision of the suspect cl, which
120 must be before the crashed revision. 121 must be before the crashed revision.
121 122
122 Returns: 123 Returns:
123 ``ModifiedFrameInfo`` object of touched file and stacktrace. 124 ``Distance`` object of touched file and stacktrace.
124 """ 125 """
125 # TODO(katesonia) ``GetBlame`` is called for the same file everytime 126 # TODO(katesonia) ``GetBlame`` is called for the same file everytime
126 # there is a suspect that touched it, which can be very expensive. 127 # there is a suspect that touched it, which can be very expensive.
127 # The blame information can either be cached through repository (cached 128 # The blame information can either be cached through repository (cached
128 # by memcache based on repo url, revision and file path), or this 129 # by memcache based on repo url, revision and file path), or this
129 # function can have a static in-memory cache to cache blame for touched 130 # function can have a static in-memory cache to cache blame for touched
130 # files, however since blame information is big, it's not a good idea to 131 # files, however since blame information is big, it's not a good idea to
131 # keep it in memory. 132 # keep it in memory.
132 repository = self._get_repository(crash_dependency.repo_url) 133 repository = self._get_repository(crash_dependency.repo_url)
133 blame = repository.GetBlame(touched_file.new_path, 134 blame = repository.GetBlame(touched_file.new_path,
134 crash_dependency.revision) 135 crash_dependency.revision)
135 if not blame: 136 if not blame:
136 logging.warning('Failed to get blame information for %s', 137 logging.warning('Failed to get blame information for %s',
137 touched_file.new_path) 138 touched_file.new_path)
138 return None 139 return None
139 140
140 # Distance of this file. 141 # Distance of this file.
141 modified_frame_info = ModifiedFrameInfo(float('inf'), None) 142 distance = Distance(float('inf'), None)
142 for region in blame: 143 for region in blame:
143 if region.revision != revision: 144 if region.revision != revision:
144 continue 145 continue
145 146
146 region_start = region.start 147 region_start = region.start
147 region_end = region_start + region.count - 1 148 region_end = region_start + region.count - 1
148 for stack_info in stack_infos: 149 for frame_info in frame_infos:
149 frame_start = stack_info.frame.crashed_line_numbers[0] 150 frame_start = frame_info.frame.crashed_line_numbers[0]
150 frame_end = stack_info.frame.crashed_line_numbers[-1] 151 frame_end = frame_info.frame.crashed_line_numbers[-1]
151 distance = DistanceBetweenLineRanges((frame_start, frame_end), 152 line_distance = DistanceBetweenLineRanges((frame_start, frame_end),
152 (region_start, region_end)) 153 (region_start, region_end))
153 modified_frame_info.Update(distance, stack_info.frame) 154 distance.Update(line_distance, frame_info.frame)
154 155
155 return modified_frame_info 156 return distance
156 157
157 def __call__(self, report): 158 def __call__(self, report):
158 """Returns the scaled min ``ModifiedFrameInfo.distance`` across all files. 159 """Returns the scaled min ``Distance.distance`` across all files.
159 160
160 Args: 161 Args:
161 report (CrashReport): the crash report being analyzed. 162 report (CrashReport): the crash report being analyzed.
162 163
163 Returns: 164 Returns:
164 A function from ``Suspect`` to the minimum distance between (the code 165 A function from ``Suspect`` to the minimum distance between (the code
165 for) a stack frame in that suspect and the CL in that suspect, as a 166 for) a stack frame in that suspect and the CL in that suspect, as a
166 log-domain ``float``. 167 log-domain ``float``.
167 """ 168 """
168 def FeatureValueGivenReport(suspect, touched_file_to_stack_infos): 169 def FeatureValueGivenReport(suspect, matches):
169 """Function mapping suspect related data to MinDistance FeatureValue. 170 """Function mapping suspect related data to MinDistance FeatureValue.
170 171
171 Args: 172 Args:
172 suspect (Suspect): The suspected changelog and some meta information 173 suspect (Suspect): The suspected changelog and some meta information
173 about it. 174 about it.
174 touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to 175 matches(dict): Dict mapping crashed group(CrashedFile, CrashedDirectory)
175 a list of ``StackInfo``s representing all the frames that the suspect 176 to a list of ``Match``s representing all frames and all touched files
176 touched. 177 matched in the same crashed group(same crashed file or crashed
178 directory).
177 179
178 Returns: 180 Returns:
179 The ``FeatureValue`` of this feature. 181 The ``FeatureValue`` of this feature.
180 """ 182 """
181 if not touched_file_to_stack_infos: 183 if not matches:
182 FeatureValue(self.name, lmath.LOG_ZERO, 184 FeatureValue(self.name, lmath.LOG_ZERO,
183 'No file got touched by the suspect.', None) 185 'No file got touched by the suspect.', None)
184 186
185 modified_frame_info = ModifiedFrameInfo(float('inf'), None) 187 distance = Distance(float('inf'), None)
186 touched_file_to_modified_frame_info = {} 188 touched_file_to_distance = {}
187 for touched_file, stack_infos in touched_file_to_stack_infos.iteritems(): 189 for match in matches.itervalues():
190 if len(match.touched_files) != 1:
191 logging.warning('There should be only one touched file per crashed '
192 'file group.')
193 continue
194
195 touched_file = match.touched_files[0]
188 # Records the closest frame (the frame has minimum distance between 196 # Records the closest frame (the frame has minimum distance between
189 # crashed lines and touched lines) for each touched file of the suspect. 197 # crashed lines and touched lines) for each touched file of the suspect.
190 modified_frame_info_per_file = ( 198 distance_per_file = self.DistanceBetweenTouchedFileAndFrameInfos(
191 self.DistanceBetweenTouchedFileAndStacktrace( 199 suspect.changelog.revision, touched_file,
192 suspect.changelog.revision, touched_file, stack_infos, 200 match.frame_infos, report.dependencies[suspect.dep_path])
193 report.dependencies[suspect.dep_path]))
194 # Failed to get blame information of a file. 201 # Failed to get blame information of a file.
195 if not modified_frame_info_per_file: 202 if not distance_per_file:
196 logging.warning('suspect\'s change cannot be blamed due to lack of' 203 logging.warning('suspect\'s change cannot be blamed due to lack of'
197 'blame information for crashed file %s' % 204 'blame information for crashed file %s' %
198 touched_file.new_path) 205 touched_file.new_path)
199 continue 206 continue
200 207
201 # It is possible that a changelog doesn't show in the blame of a file, 208 # It is possible that a changelog doesn't show in the blame of a file,
202 # in this case, treat the changelog as if it didn't change the file. 209 # in this case, treat the changelog as if it didn't change the file.
203 if modified_frame_info_per_file.IsInfinity(): 210 if distance_per_file.IsInfinity():
204 continue 211 continue
205 212
206 touched_file_to_modified_frame_info[ 213 touched_file_to_distance[touched_file] = distance_per_file
207 touched_file] = modified_frame_info_per_file 214 distance.Update(distance_per_file.distance,
208 modified_frame_info.Update(modified_frame_info_per_file.distance, 215 distance_per_file.frame)
209 modified_frame_info_per_file.frame)
210 216
211 return FeatureValue( 217 return FeatureValue(
212 name = self.name, 218 name = self.name,
213 value = LogLinearlyScaled(float(modified_frame_info.distance), 219 value = LogLinearlyScaled(float(distance.distance),
214 float(self._maximum)), 220 float(self._maximum)),
215 reason = ('Minimum distance is %d' % int(modified_frame_info.distance) 221 reason = ('Minimum distance is %d' % int(distance.distance)
216 if not math.isinf(modified_frame_info.distance) else 222 if not math.isinf(distance.distance) else
217 'Minimum distance is infinity'), 223 'Minimum distance is infinity'),
218 changed_files = MinDistanceFeature.ChangedFiles( 224 changed_files = MinDistanceFeature.ChangedFiles(
219 suspect, touched_file_to_modified_frame_info, 225 suspect, touched_file_to_distance,
220 report.crashed_version)) 226 report.crashed_version))
221 227
222 return FeatureValueGivenReport 228 return FeatureValueGivenReport
223 229
224 @staticmethod 230 @staticmethod
225 def ChangedFiles(suspect, touched_file_to_modified_frame_info, 231 def ChangedFiles(suspect, touched_file_to_distance, crashed_version):
226 crashed_version):
227 """Get all the changed files causing this feature to blame this result. 232 """Get all the changed files causing this feature to blame this result.
228 233
229 Arg: 234 Arg:
230 suspect (Suspect): the suspect being blamed. 235 suspect (Suspect): the suspect being blamed.
231 touched_file_to_modified_frame_info (dict): Dict mapping file name to 236 touched_file_to_distance (dict): Dict mapping file name to
232 ``ModifiedFrameInfo``s. 237 ``Distance``s.
233 crashed_version (str): Crashed version. 238 crashed_version (str): Crashed version.
234 239
235 Returns: 240 Returns:
236 List of ``ChangedFile`` objects sorted by frame index. For example: 241 List of ``ChangedFile`` objects sorted by frame index. For example:
237 242
238 [ChangedFile( 243 [ChangedFile(
239 file = 'render_frame_impl.cc', 244 file = 'render_frame_impl.cc',
240 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', 245 blame_url = 'https://chr.com/../render_frame_impl.cc#1586',
241 reasons = ['Minimum distance (LOC) 1, frame #5'] 246 reasons = ['Minimum distance (LOC) 1, frame #5']
242 )] 247 )]
243 """ 248 """
244 frame_index_to_changed_files = {} 249 frame_index_to_changed_files = {}
245 250
246 for touched_file, modified_frame_info in ( 251 for touched_file, distance in (
247 touched_file_to_modified_frame_info.iteritems()): 252 touched_file_to_distance.iteritems()):
248 file_name = touched_file.new_path.split('/')[-1] 253 file_name = touched_file.new_path.split('/')[-1]
249 if modified_frame_info.frame is None: # pragma: no cover 254 if distance.frame is None: # pragma: no cover
250 logging.warning('Missing the min_distance_frame for file %s' % 255 logging.warning('Missing the min_distance_frame for file %s' %
251 file_name) 256 file_name)
252 continue 257 continue
253 258
254 frame_index_to_changed_files[ 259 frame_index_to_changed_files[distance.frame.index] = ChangedFile(
255 modified_frame_info.frame.index] = ChangedFile(
256 name=file_name, 260 name=file_name,
257 blame_url=modified_frame_info.frame.BlameUrl(crashed_version), 261 blame_url=distance.frame.BlameUrl(crashed_version),
258 reasons=['Distance from touched lines and crashed lines is %d, in' 262 reasons=['Distance from touched lines and crashed lines is %d, in'
259 ' frame #%d' % (modified_frame_info.distance, 263 ' frame #%d' % (distance.distance,
260 modified_frame_info.frame.index)]) 264 distance.frame.index)])
261 265
262 if not frame_index_to_changed_files: # pragma: no cover 266 if not frame_index_to_changed_files: # pragma: no cover
263 logging.warning('Found no changed files for suspect: %s', str(suspect)) 267 logging.warning('Found no changed files for suspect: %s', str(suspect))
264 return [] 268 return []
265 269
266 # Sort changed file by frame index. 270 # Sort changed file by frame index.
267 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), 271 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(),
268 key=lambda x: x[0])) 272 key=lambda x: x[0]))
269 273
270 return list(changed_files) 274 return list(changed_files)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698