Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from crash.loglinear.changelist_classifier import StackInfo | |
| 8 from crash.loglinear.feature import ChangedFile | 9 from crash.loglinear.feature import ChangedFile |
| 9 from crash.loglinear.feature import Feature | 10 from crash.loglinear.feature import Feature |
| 10 from crash.loglinear.feature import FeatureValue | 11 from crash.loglinear.feature import FeatureValue |
| 11 from crash.loglinear.feature import LogLinearlyScaled | 12 from crash.loglinear.feature import LogLinearlyScaled |
| 13 from libs.gitiles.diff import ChangeType | |
| 12 import libs.math.logarithms as lmath | 14 import libs.math.logarithms as lmath |
| 13 | 15 |
| 14 # N.B., this must not be infinity, else we'll start getting NaN values | 16 |
| 15 # from LinearMinDistanceFeature (and SquaredMinDistanceFeature). | 17 # TODO(katesonia): we should change things to use integers with None as |
|
Martin Barbella
2017/01/24 22:06:40
Could you move this to IsInfinity?
Sharu Jiang
2017/01/25 19:13:31
Done.
| |
| 16 DEFAULT_MAXIMUM = 50 | 18 # \"infinity\", rather than using floats. |
| 19 # TODO(http://crbug.com/644476): this class needs a better name. | |
| 20 class DistanceInfo(object): | |
|
Martin Barbella
2017/01/24 22:06:40
Agreed that DistanceInfo isn't the best name for t
Sharu Jiang
2017/01/25 19:13:31
Changed to ``ModifiedFrameInfo``.
| |
| 21 | |
| 22 def __init__(self, distance, frame): | |
| 23 self.distance = distance | |
| 24 self.frame = frame | |
| 25 | |
| 26 def Update(self, distance, frame): | |
| 27 if distance < self.distance: | |
| 28 self.distance = distance | |
| 29 self.frame = frame | |
| 30 | |
| 31 def IsInfinity(self): | |
| 32 return math.isinf(self.distance) | |
| 33 | |
| 34 def __str__(self): # pragma: no cover | |
| 35 return 'Min distance(distance = %f, frame = %s)' % (float(self.distance), | |
| 36 str(self.frame)) | |
| 37 | |
| 38 def __eq__(self, other): | |
| 39 return self.distance == other.distance and self.frame == other.frame | |
| 40 | |
| 41 | |
| 42 def DistanceBetweenLineRanges((start1, end1), (start2, end2)): | |
| 43 """Given two ranges, compute the (unsigned) distance between them. | |
| 44 | |
| 45 Args: | |
| 46 start1 (int): the first line included in the first range. | |
| 47 end1 (int): the last line included in the first range. Must be | |
| 48 greater than or equal to ``start1``. | |
| 49 start2 (int): the first line included in the second range. | |
| 50 end2 (int): the last line included in the second range. Must be | |
| 51 greater than or equal to ``start1``. | |
| 52 | |
| 53 Returns: | |
| 54 If the end of the earlier range comes before the start of the later | |
| 55 range, then the difference between those points. Otherwise, returns | |
| 56 zero (because the ranges overlap). | |
| 57 """ | |
| 58 if end1 < start1: | |
| 59 raise ValueError('the first range is empty: %d < %d' % (end1, start1)) | |
| 60 if end2 < start2: | |
| 61 raise ValueError('the second range is empty: %d < %d' % (end2, start2)) | |
| 62 # There are six possible cases, but in all the cases where the two | |
| 63 # ranges overlap, the latter two differences will be negative. | |
| 64 return max(0, start2 - end1, start1 - end2) | |
| 17 | 65 |
| 18 | 66 |
| 19 class MinDistanceFeature(Feature): | 67 class MinDistanceFeature(Feature): |
| 20 """Returns the minimum min_distance scaled between -inf and 0. | 68 """Returns the minimum min_distance scaled between -inf and 0. |
| 21 | 69 |
| 22 That is, the normal-domain value is scaled linearly between 0 and 1, | 70 That is, the normal-domain value is scaled linearly between 0 and 1, |
| 23 but since we want to return a log-domain value we take the logarithm | 71 but since we want to return a log-domain value we take the logarithm |
| 24 of that (hence -inf to 0). This ensures that when a suspect has a | 72 of that (hence -inf to 0). This ensures that when a suspect has a |
| 25 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely | 73 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely |
| 26 refuse to blame that suspect. This heuristic behavior is intended. Before | 74 refuse to blame that suspect. This heuristic behavior is intended. Before |
| 27 changing it to be less aggressive about refusing to blame the suspect, | 75 changing it to be less aggressive about refusing to blame the suspect, |
| 28 we should delta test to be sure the new heuristic acts as indented. | 76 we should delta test to be sure the new heuristic acts as indented. |
| 29 | 77 |
| 30 When the actual minimum min_distance is zero, we return the log-domain | 78 When the actual minimum min_distance is zero, we return the log-domain |
| 31 value 0 (aka normal-domain value of 1). When the suspect has no files | 79 value 0 (aka normal-domain value of 1). When the suspect has no files |
| 32 or the actual minimum min_distance is greater than the ``maximum``, | 80 or the actual minimum min_distance is greater than the ``maximum``, |
| 33 we return the log-domain value -inf (aka normal-domain value of 0). In | 81 we return the log-domain value -inf (aka normal-domain value of 0). In |
| 34 between we scale the normal-domain values linearly, which means the | 82 between we scale the normal-domain values linearly, which means the |
| 35 log-domain values are scaled exponentially. | 83 log-domain values are scaled exponentially. |
| 36 """ | 84 """ |
| 37 def __init__(self, maximum=None): | 85 def __init__(self, get_repository, maximum): |
| 38 """ | 86 """ |
| 39 Args: | 87 Args: |
| 40 maximum (float): An upper bound on the min_distance to | 88 maximum (float): An upper bound on the min_distance to consider. |
| 41 consider. This argument is optional and defaults to | |
| 42 ``DEFAULT_MAXIMUM``. | |
| 43 """ | 89 """ |
| 44 if maximum is None: | 90 self._get_repository = get_repository |
| 45 maximum = DEFAULT_MAXIMUM | |
| 46 self._maximum = maximum | 91 self._maximum = maximum |
| 47 | 92 |
| 48 @property | 93 @property |
| 49 def name(self): | 94 def name(self): |
| 50 return 'MinDistance' | 95 return 'MinDistance' |
| 51 | 96 |
| 97 def DistanceInfoBetweenTouchedFileAndStacktrace( | |
|
Martin Barbella
2017/01/24 22:06:40
"Info" seems unnecessary to me, especially since i
Sharu Jiang
2017/01/25 19:13:31
Deleted it.
| |
| 98 self, revision, touched_file, stack_infos, crash_dependency): | |
| 99 """Gets ``DistanceInfo`` between touched lines and crashed lines for a file. | |
| 100 | |
| 101 Args: | |
| 102 revision (str): The revision of the suspect. | |
| 103 touched_file (FileChangeInfo): The file touched by the suspect. | |
| 104 stack_infos (list of StackInfos): List of information of frames in the | |
| 105 stacktrace which contains ``touched_file``. | |
| 106 crash_dependency (Dependency): The depedency of crashed revision. N.B. The | |
| 107 crashed revision is the revision where crash happens, however the | |
| 108 first parameter ``revision`` is the revision of the suspect cl, which | |
| 109 must be before the crashed revision. | |
| 110 | |
| 111 Returns: | |
| 112 ``DistanceInfo`` object of touched file and stacktrace. | |
| 113 """ | |
| 114 # TODO(katesonia) ``GetBlame`` is called for the same file everytime | |
| 115 # there is a suspect that touched it, which can be very expensive. | |
| 116 # The blame information can either be cached through repository (cached | |
| 117 # by memcache based on repo url, revision and file path), or this | |
| 118 # function can have a static in-memory cache to cache blame for touched | |
| 119 # files, however since blame information is big, it's not a good idea to | |
| 120 # keep it in memory. | |
| 121 repository = self._get_repository(crash_dependency.repo_url) | |
| 122 blame = repository.GetBlame(touched_file.new_path, | |
| 123 crash_dependency.revision) | |
| 124 if not blame: | |
| 125 logging.warning('Failed to get blame information for %s', | |
| 126 touched_file.new_path) | |
| 127 return None | |
| 128 | |
| 129 # Distance of this file. | |
| 130 distance_info = DistanceInfo(float('inf'), None) | |
| 131 for region in blame: | |
| 132 if region.revision != revision: | |
| 133 continue | |
| 134 | |
| 135 region_start = region.start | |
| 136 region_end = region_start + region.count - 1 | |
| 137 for stack_info in stack_infos: | |
| 138 frame_start = stack_info.frame.crashed_line_numbers[0] | |
| 139 frame_end = stack_info.frame.crashed_line_numbers[-1] | |
| 140 distance = DistanceBetweenLineRanges((frame_start, frame_end), | |
| 141 (region_start, region_end)) | |
| 142 distance_info.Update(distance, stack_info.frame) | |
| 143 | |
| 144 return distance_info | |
| 145 | |
| 52 def __call__(self, report): | 146 def __call__(self, report): |
| 53 """Returns the scaled min ``AnalysisInfo.min_distance`` across all files. | 147 """Returns the scaled min ``DistanceInfo.distance`` across all files. |
| 54 | 148 |
| 55 Args: | 149 Args: |
| 56 report (CrashReport): the crash report being analyzed. | 150 report (CrashReport): the crash report being analyzed. |
| 57 | 151 |
| 58 Returns: | 152 Returns: |
| 59 A function from ``Suspect`` to the minimum distance between (the code | 153 A function from ``Suspect`` to the minimum distance between (the code |
| 60 for) a stack frame in that suspect and the CL in that suspect, as a | 154 for) a stack frame in that suspect and the CL in that suspect, as a |
| 61 log-domain ``float``. | 155 log-domain ``float``. |
| 62 """ | 156 """ |
| 63 def FeatureValueGivenReport(suspect): | 157 def FeatureValueGivenReport(suspect, touched_file_to_stack_infos): |
| 64 analyses = suspect.file_to_analysis_info | 158 """Function mapping suspect related data to MinDistance FeatureValue. |
| 65 if not analyses: | |
| 66 message = 'No AnalysisInfo for any file in suspect: %s' % str(suspect) | |
| 67 logging.warning(message) | |
| 68 return FeatureValue(self.name, lmath.LOG_ZERO, message, None) | |
| 69 | 159 |
| 70 min_distance = min(per_file_analysis.min_distance | 160 Args: |
| 71 for per_file_analysis in analyses.itervalues()) | 161 suspect (Suspect): The suspected changelog and some meta information |
| 162 about it. | |
| 163 touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to | |
| 164 a list of ``StackInfo``s representing all the frames that the suspect | |
| 165 touched. | |
| 166 | |
| 167 Returns: | |
| 168 The ``FeatureValue`` of this feature. | |
| 169 """ | |
| 170 if not touched_file_to_stack_infos: | |
| 171 FeatureValue(self.name, lmath.LOG_ZERO, | |
| 172 'No file got touched by the suspect.', None) | |
| 173 | |
| 174 distance_info = DistanceInfo(float('inf'), None) | |
| 175 touched_file_to_distance_info = {} | |
| 176 for touched_file, stack_infos in touched_file_to_stack_infos.iteritems(): | |
| 177 distance_info_per_file = ( | |
| 178 self.DistanceInfoBetweenTouchedFileAndStacktrace( | |
| 179 suspect.changelog.revision, touched_file, stack_infos, | |
| 180 report.dependencies[suspect.dep_path])) | |
| 181 # Failed to get blame information of a file. | |
| 182 if not distance_info_per_file: | |
| 183 logging.warning('suspect\'s change cannot be blamed due to lack of' | |
| 184 'blame information for crashed file %s' % | |
| 185 touched_file.new_path) | |
| 186 continue | |
| 187 | |
| 188 # It is possible that a changelog doesn't show in the blame of a file, | |
| 189 # in this case, treat the changelog as if it didn't change the file. | |
| 190 if distance_info_per_file.IsInfinity(): | |
| 191 continue | |
| 192 | |
| 193 touched_file_to_distance_info[touched_file] = distance_info_per_file | |
| 194 distance_info.Update(distance_info_per_file.distance, | |
| 195 distance_info_per_file.frame) | |
| 72 | 196 |
| 73 return FeatureValue( | 197 return FeatureValue( |
| 74 name = self.name, | 198 name = self.name, |
| 75 value = LogLinearlyScaled(float(min_distance), float(self._maximum)), | 199 value = LogLinearlyScaled(float(distance_info.distance), |
| 76 reason = ('Minimum distance is %d' % min_distance), | 200 float(self._maximum)), |
| 77 changed_files = self._ChangedFiles(suspect), | 201 reason = ('Minimum distance is %f' % float(distance_info.distance)), |
| 78 ) | 202 changed_files = MinDistanceFeature.ChangedFiles( |
| 203 suspect, touched_file_to_distance_info, report.crashed_version)) | |
| 79 | 204 |
| 80 return FeatureValueGivenReport | 205 return FeatureValueGivenReport |
| 81 | 206 |
| 82 def _ChangedFiles(self, suspect): | 207 @staticmethod |
| 83 """Get all the changed files causing this feature to blame this suspect. | 208 def ChangedFiles(suspect, touched_file_to_distance_info, crashed_version): |
| 209 """Get all the changed files causing this feature to blame this result. | |
| 84 | 210 |
| 85 Arg: | 211 Arg: |
| 86 suspect (Suspect): the suspect being blamed. | 212 suspect (Suspect): the suspect being blamed. |
| 213 touched_file_to_distance_info (dict): Dict mapping file name to | |
| 214 ``DistanceInfo``s. | |
| 215 crashed_version (str): Crashed version. | |
| 87 | 216 |
| 88 Returns: | 217 Returns: |
| 89 List of ``ChangedFile`` objects sorted by frame index. For example: | 218 List of ``ChangedFile`` objects sorted by frame index. For example: |
| 90 | 219 |
| 91 [ChangedFile( | 220 [ChangedFile( |
| 92 file = 'render_frame_impl.cc', | 221 file = 'render_frame_impl.cc', |
| 93 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', | 222 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', |
| 94 reasons = ['Minimum distance (LOC) 1, frame #5'] | 223 reasons = ['Minimum distance (LOC) 1, frame #5'] |
| 95 )] | 224 )] |
| 96 """ | 225 """ |
| 97 index_to_changed_files = {} | 226 frame_index_to_changed_files = {} |
| 98 | 227 |
| 99 for file_path, analysis_info in suspect.file_to_analysis_info.iteritems(): | 228 for touched_file, distance_info in ( |
| 100 file_name = file_path.split('/')[-1] | 229 touched_file_to_distance_info.iteritems()): |
| 101 frame = analysis_info.min_distance_frame | 230 file_name = touched_file.new_path.split('/')[-1] |
| 102 if frame is None: # pragma: no cover | 231 if distance_info.frame is None: # pragma: no cover |
| 103 logging.warning('Missing the min_distance_frame for %s' | 232 logging.warning('Missing the min_distance_frame for file %s' % |
| 104 % str(analysis_info)) | 233 file_name) |
| 105 continue | 234 continue |
| 106 | 235 |
| 107 # It is possible that a changelog doesn't show in the blame of a file, | 236 frame_index_to_changed_files[distance_info.frame.index] = ChangedFile( |
| 108 # in this case, treat the changelog as if it didn't change the file. | 237 name=file_name, |
| 109 if math.isinf(analysis_info.min_distance): # pragma: no cover | 238 blame_url=distance_info.frame.BlameUrl(crashed_version), |
| 110 logging.warning('min_distance is infinite for %s' % str(analysis_info)) | 239 reasons=['Distance from touched lines and crashed lines is %d, in ' |
| 111 continue | 240 'frame #%d' % (distance_info.distance, |
| 241 distance_info.frame.index)]) | |
| 112 | 242 |
| 113 index_to_changed_files[frame.index] = ChangedFile( | 243 if not frame_index_to_changed_files: # pragma: no cover |
| 114 name = file_name, | |
| 115 blame_url = frame.BlameUrl(suspect.changelog.revision), | |
| 116 reasons = ['Minimum distance (LOC) %d, frame #%d' % ( | |
| 117 analysis_info.min_distance, frame.index)] | |
| 118 ) | |
| 119 | |
| 120 if not index_to_changed_files: # pragma: no cover | |
| 121 logging.warning('Found no changed files for suspect: %s', str(suspect)) | 244 logging.warning('Found no changed files for suspect: %s', str(suspect)) |
| 245 return [] | |
| 122 | 246 |
| 123 # Sort changed file by frame index. | 247 # Sort changed file by frame index. |
| 124 _, changed_files = zip(*sorted(index_to_changed_files.items(), | 248 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), |
| 125 key=lambda x: x[0])) | 249 key=lambda x: x[0])) |
| 126 | 250 |
| 127 return list(changed_files) | 251 return list(changed_files) |
| OLD | NEW |