| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from crash.loglinear.changelist_classifier import StackInfo |
| 8 from crash.loglinear.feature import ChangedFile | 9 from crash.loglinear.feature import ChangedFile |
| 9 from crash.loglinear.feature import Feature | 10 from crash.loglinear.feature import Feature |
| 10 from crash.loglinear.feature import FeatureValue | 11 from crash.loglinear.feature import FeatureValue |
| 11 from crash.loglinear.feature import LogLinearlyScaled | 12 from crash.loglinear.feature import LogLinearlyScaled |
| 13 from libs.gitiles.diff import ChangeType |
| 12 import libs.math.logarithms as lmath | 14 import libs.math.logarithms as lmath |
| 13 | 15 |
| 14 # N.B., this must not be infinity, else we'll start getting NaN values | 16 |
| 15 # from LinearMinDistanceFeature (and SquaredMinDistanceFeature). | 17 |
| 16 DEFAULT_MAXIMUM = 50 | 18 class ModifiedFrameInfo(object): |
| 19 """Represents the closest frame to a changelog which modified it. |
| 20 |
| 21 The "closest" means that the distance between crashed lines in the frame and |
| 22 touched lines in a changelog is minimum. |
| 23 |
| 24 Properties: |
| 25 distance (int or float('inf')): The distance between crashed lines and |
| 26 touched lines, if a changelog doesn't show in blame of the crashed file of |
| 27 the crashed version (either it didn't touch the crashed file or it got |
| 28 overwritten by other cls), the distance would be infinite. |
| 29 frame (StackFrame): The frame which got modified. |
| 30 """ |
| 31 |
| 32 def __init__(self, distance, frame): |
| 33 self.distance = distance |
| 34 self.frame = frame |
| 35 |
| 36 def Update(self, distance, frame): |
| 37 if distance < self.distance: |
| 38 self.distance = distance |
| 39 self.frame = frame |
| 40 |
| 41 # TODO(katesonia): we should change things to use integers with None as |
| 42 # \"infinity\", rather than using floats. |
| 43 def IsInfinity(self): |
| 44 return math.isinf(self.distance) |
| 45 |
| 46 def __str__(self): # pragma: no cover |
| 47 return 'Min distance(distance = %f, frame = %s)' % (float(self.distance), |
| 48 str(self.frame)) |
| 49 |
| 50 def __eq__(self, other): |
| 51 return self.distance == other.distance and self.frame == other.frame |
| 52 |
| 53 |
| 54 def DistanceBetweenLineRanges((start1, end1), (start2, end2)): |
| 55 """Given two ranges, compute the (unsigned) distance between them. |
| 56 |
| 57 Args: |
| 58 start1 (int): the first line included in the first range. |
| 59 end1 (int): the last line included in the first range. Must be |
| 60 greater than or equal to ``start1``. |
| 61 start2 (int): the first line included in the second range. |
| 62 end2 (int): the last line included in the second range. Must be |
| 63 greater than or equal to ``start1``. |
| 64 |
| 65 Returns: |
| 66 If the end of the earlier range comes before the start of the later |
| 67 range, then the difference between those points. Otherwise, returns |
| 68 zero (because the ranges overlap). |
| 69 """ |
| 70 if end1 < start1: |
| 71 raise ValueError('the first range is empty: %d < %d' % (end1, start1)) |
| 72 if end2 < start2: |
| 73 raise ValueError('the second range is empty: %d < %d' % (end2, start2)) |
| 74 # There are six possible cases, but in all the cases where the two |
| 75 # ranges overlap, the latter two differences will be negative. |
| 76 return max(0, start2 - end1, start1 - end2) |
| 17 | 77 |
| 18 | 78 |
| 19 class MinDistanceFeature(Feature): | 79 class MinDistanceFeature(Feature): |
| 20 """Returns the minimum min_distance scaled between -inf and 0. | 80 """Returns the minimum min_distance scaled between -inf and 0. |
| 21 | 81 |
| 22 That is, the normal-domain value is scaled linearly between 0 and 1, | 82 That is, the normal-domain value is scaled linearly between 0 and 1, |
| 23 but since we want to return a log-domain value we take the logarithm | 83 but since we want to return a log-domain value we take the logarithm |
| 24 of that (hence -inf to 0). This ensures that when a suspect has a | 84 of that (hence -inf to 0). This ensures that when a suspect has a |
| 25 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely | 85 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely |
| 26 refuse to blame that suspect. This heuristic behavior is intended. Before | 86 refuse to blame that suspect. This heuristic behavior is intended. Before |
| 27 changing it to be less aggressive about refusing to blame the suspect, | 87 changing it to be less aggressive about refusing to blame the suspect, |
| 28 we should delta test to be sure the new heuristic acts as indented. | 88 we should delta test to be sure the new heuristic acts as indented. |
| 29 | 89 |
| 30 When the actual minimum min_distance is zero, we return the log-domain | 90 When the actual minimum min_distance is zero, we return the log-domain |
| 31 value 0 (aka normal-domain value of 1). When the suspect has no files | 91 value 0 (aka normal-domain value of 1). When the suspect has no files |
| 32 or the actual minimum min_distance is greater than the ``maximum``, | 92 or the actual minimum min_distance is greater than the ``maximum``, |
| 33 we return the log-domain value -inf (aka normal-domain value of 0). In | 93 we return the log-domain value -inf (aka normal-domain value of 0). In |
| 34 between we scale the normal-domain values linearly, which means the | 94 between we scale the normal-domain values linearly, which means the |
| 35 log-domain values are scaled exponentially. | 95 log-domain values are scaled exponentially. |
| 36 """ | 96 """ |
| 37 def __init__(self, maximum=None): | 97 def __init__(self, get_repository, maximum): |
| 38 """ | 98 """ |
| 39 Args: | 99 Args: |
| 40 maximum (float): An upper bound on the min_distance to | 100 maximum (float): An upper bound on the min_distance to consider. |
| 41 consider. This argument is optional and defaults to | |
| 42 ``DEFAULT_MAXIMUM``. | |
| 43 """ | 101 """ |
| 44 if maximum is None: | 102 self._get_repository = get_repository |
| 45 maximum = DEFAULT_MAXIMUM | |
| 46 self._maximum = maximum | 103 self._maximum = maximum |
| 47 | 104 |
| 48 @property | 105 @property |
| 49 def name(self): | 106 def name(self): |
| 50 return 'MinDistance' | 107 return 'MinDistance' |
| 51 | 108 |
| 109 def DistanceBetweenTouchedFileAndStacktrace( |
| 110 self, revision, touched_file, stack_infos, crash_dependency): |
| 111 """Gets ``ModifiedFrameInfo`` between touched and crashed lines in a file. |
| 112 |
| 113 Args: |
| 114 revision (str): The revision of the suspect. |
| 115 touched_file (FileChangeInfo): The file touched by the suspect. |
| 116 stack_infos (list of StackInfos): List of information of frames in the |
| 117 stacktrace which contains ``touched_file``. |
| 118 crash_dependency (Dependency): The depedency of crashed revision. N.B. The |
| 119 crashed revision is the revision where crash happens, however the |
| 120 first parameter ``revision`` is the revision of the suspect cl, which |
| 121 must be before the crashed revision. |
| 122 |
| 123 Returns: |
| 124 ``ModifiedFrameInfo`` object of touched file and stacktrace. |
| 125 """ |
| 126 # TODO(katesonia) ``GetBlame`` is called for the same file everytime |
| 127 # there is a suspect that touched it, which can be very expensive. |
| 128 # The blame information can either be cached through repository (cached |
| 129 # by memcache based on repo url, revision and file path), or this |
| 130 # function can have a static in-memory cache to cache blame for touched |
| 131 # files, however since blame information is big, it's not a good idea to |
| 132 # keep it in memory. |
| 133 repository = self._get_repository(crash_dependency.repo_url) |
| 134 blame = repository.GetBlame(touched_file.new_path, |
| 135 crash_dependency.revision) |
| 136 if not blame: |
| 137 logging.warning('Failed to get blame information for %s', |
| 138 touched_file.new_path) |
| 139 return None |
| 140 |
| 141 # Distance of this file. |
| 142 modified_frame_info = ModifiedFrameInfo(float('inf'), None) |
| 143 for region in blame: |
| 144 if region.revision != revision: |
| 145 continue |
| 146 |
| 147 region_start = region.start |
| 148 region_end = region_start + region.count - 1 |
| 149 for stack_info in stack_infos: |
| 150 frame_start = stack_info.frame.crashed_line_numbers[0] |
| 151 frame_end = stack_info.frame.crashed_line_numbers[-1] |
| 152 distance = DistanceBetweenLineRanges((frame_start, frame_end), |
| 153 (region_start, region_end)) |
| 154 modified_frame_info.Update(distance, stack_info.frame) |
| 155 |
| 156 return modified_frame_info |
| 157 |
| 52 def __call__(self, report): | 158 def __call__(self, report): |
| 53 """Returns the scaled min ``AnalysisInfo.min_distance`` across all files. | 159 """Returns the scaled min ``ModifiedFrameInfo.distance`` across all files. |
| 54 | 160 |
| 55 Args: | 161 Args: |
| 56 report (CrashReport): the crash report being analyzed. | 162 report (CrashReport): the crash report being analyzed. |
| 57 | 163 |
| 58 Returns: | 164 Returns: |
| 59 A function from ``Suspect`` to the minimum distance between (the code | 165 A function from ``Suspect`` to the minimum distance between (the code |
| 60 for) a stack frame in that suspect and the CL in that suspect, as a | 166 for) a stack frame in that suspect and the CL in that suspect, as a |
| 61 log-domain ``float``. | 167 log-domain ``float``. |
| 62 """ | 168 """ |
| 63 def FeatureValueGivenReport(suspect): | 169 def FeatureValueGivenReport(suspect, touched_file_to_stack_infos): |
| 64 analyses = suspect.file_to_analysis_info | 170 """Function mapping suspect related data to MinDistance FeatureValue. |
| 65 if not analyses: | |
| 66 message = 'No AnalysisInfo for any file in suspect: %s' % str(suspect) | |
| 67 logging.warning(message) | |
| 68 return FeatureValue(self.name, lmath.LOG_ZERO, message, None) | |
| 69 | 171 |
| 70 min_distance = min(per_file_analysis.min_distance | 172 Args: |
| 71 for per_file_analysis in analyses.itervalues()) | 173 suspect (Suspect): The suspected changelog and some meta information |
| 174 about it. |
| 175 touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to |
| 176 a list of ``StackInfo``s representing all the frames that the suspect |
| 177 touched. |
| 178 |
| 179 Returns: |
| 180 The ``FeatureValue`` of this feature. |
| 181 """ |
| 182 if not touched_file_to_stack_infos: |
| 183 FeatureValue(self.name, lmath.LOG_ZERO, |
| 184 'No file got touched by the suspect.', None) |
| 185 |
| 186 modified_frame_info = ModifiedFrameInfo(float('inf'), None) |
| 187 touched_file_to_modified_frame_info = {} |
| 188 for touched_file, stack_infos in touched_file_to_stack_infos.iteritems(): |
| 189 # Records the closest frame (the frame has minimum distance between |
| 190 # crashed lines and touched lines) for each touched file of the suspect. |
| 191 modified_frame_info_per_file = ( |
| 192 self.DistanceBetweenTouchedFileAndStacktrace( |
| 193 suspect.changelog.revision, touched_file, stack_infos, |
| 194 report.dependencies[suspect.dep_path])) |
| 195 # Failed to get blame information of a file. |
| 196 if not modified_frame_info_per_file: |
| 197 logging.warning('suspect\'s change cannot be blamed due to lack of' |
| 198 'blame information for crashed file %s' % |
| 199 touched_file.new_path) |
| 200 continue |
| 201 |
| 202 # It is possible that a changelog doesn't show in the blame of a file, |
| 203 # in this case, treat the changelog as if it didn't change the file. |
| 204 if modified_frame_info_per_file.IsInfinity(): |
| 205 continue |
| 206 |
| 207 touched_file_to_modified_frame_info[ |
| 208 touched_file] = modified_frame_info_per_file |
| 209 modified_frame_info.Update(modified_frame_info_per_file.distance, |
| 210 modified_frame_info_per_file.frame) |
| 72 | 211 |
| 73 return FeatureValue( | 212 return FeatureValue( |
| 74 name = self.name, | 213 name = self.name, |
| 75 value = LogLinearlyScaled(float(min_distance), float(self._maximum)), | 214 value = LogLinearlyScaled(float(modified_frame_info.distance), |
| 76 reason = ('Minimum distance is %d' % min_distance | 215 float(self._maximum)), |
| 77 if not math.isinf(min_distance) else | 216 reason = ('Minimum distance is %d' % int(modified_frame_info.distance) |
| 217 if not math.isinf(modified_frame_info.distance) else |
| 78 'Minimum distance is infinity'), | 218 'Minimum distance is infinity'), |
| 79 changed_files = self._ChangedFiles(suspect), | 219 changed_files = MinDistanceFeature.ChangedFiles( |
| 80 ) | 220 suspect, touched_file_to_modified_frame_info, |
| 221 report.crashed_version)) |
| 81 | 222 |
| 82 return FeatureValueGivenReport | 223 return FeatureValueGivenReport |
| 83 | 224 |
| 84 def _ChangedFiles(self, suspect): | 225 @staticmethod |
| 85 """Get all the changed files causing this feature to blame this suspect. | 226 def ChangedFiles(suspect, touched_file_to_modified_frame_info, |
| 227 crashed_version): |
| 228 """Get all the changed files causing this feature to blame this result. |
| 86 | 229 |
| 87 Arg: | 230 Arg: |
| 88 suspect (Suspect): the suspect being blamed. | 231 suspect (Suspect): the suspect being blamed. |
| 232 touched_file_to_modified_frame_info (dict): Dict mapping file name to |
| 233 ``ModifiedFrameInfo``s. |
| 234 crashed_version (str): Crashed version. |
| 89 | 235 |
| 90 Returns: | 236 Returns: |
| 91 List of ``ChangedFile`` objects sorted by frame index. For example: | 237 List of ``ChangedFile`` objects sorted by frame index. For example: |
| 92 | 238 |
| 93 [ChangedFile( | 239 [ChangedFile( |
| 94 file = 'render_frame_impl.cc', | 240 file = 'render_frame_impl.cc', |
| 95 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', | 241 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', |
| 96 reasons = ['Minimum distance (LOC) 1, frame #5'] | 242 reasons = ['Minimum distance (LOC) 1, frame #5'] |
| 97 )] | 243 )] |
| 98 """ | 244 """ |
| 99 index_to_changed_files = {} | 245 frame_index_to_changed_files = {} |
| 100 | 246 |
| 101 for file_path, analysis_info in suspect.file_to_analysis_info.iteritems(): | 247 for touched_file, modified_frame_info in ( |
| 102 file_name = file_path.split('/')[-1] | 248 touched_file_to_modified_frame_info.iteritems()): |
| 103 frame = analysis_info.min_distance_frame | 249 file_name = touched_file.new_path.split('/')[-1] |
| 104 if frame is None: # pragma: no cover | 250 if modified_frame_info.frame is None: # pragma: no cover |
| 105 logging.warning('Missing the min_distance_frame for %s' | 251 logging.warning('Missing the min_distance_frame for file %s' % |
| 106 % str(analysis_info)) | 252 file_name) |
| 107 continue | 253 continue |
| 108 | 254 |
| 109 # It is possible that a changelog doesn't show in the blame of a file, | 255 frame_index_to_changed_files[ |
| 110 # in this case, treat the changelog as if it didn't change the file. | 256 modified_frame_info.frame.index] = ChangedFile( |
| 111 if math.isinf(analysis_info.min_distance): # pragma: no cover | 257 name=file_name, |
| 112 logging.warning('min_distance is infinite for %s' % str(analysis_info)) | 258 blame_url=modified_frame_info.frame.BlameUrl(crashed_version), |
| 113 continue | 259 reasons=['Distance from touched lines and crashed lines is %d, in' |
| 260 ' frame #%d' % (modified_frame_info.distance, |
| 261 modified_frame_info.frame.index)]) |
| 114 | 262 |
| 115 index_to_changed_files[frame.index] = ChangedFile( | 263 if not frame_index_to_changed_files: # pragma: no cover |
| 116 name = file_name, | |
| 117 blame_url = frame.BlameUrl(suspect.changelog.revision), | |
| 118 reasons = ['Minimum distance (LOC) %d, frame #%d' % ( | |
| 119 analysis_info.min_distance, frame.index)] | |
| 120 ) | |
| 121 | |
| 122 if not index_to_changed_files: # pragma: no cover | |
| 123 logging.warning('Found no changed files for suspect: %s', str(suspect)) | 264 logging.warning('Found no changed files for suspect: %s', str(suspect)) |
| 265 return [] |
| 124 | 266 |
| 125 # Sort changed file by frame index. | 267 # Sort changed file by frame index. |
| 126 _, changed_files = zip(*sorted(index_to_changed_files.items(), | 268 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), |
| 127 key=lambda x: x[0])) | 269 key=lambda x: x[0])) |
| 128 | 270 |
| 129 return list(changed_files) | 271 return list(changed_files) |
| OLD | NEW |