Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from crash.loglinear.changelist_classifier import StackInfo | |
| 8 from crash.loglinear.feature import ChangedFile | 9 from crash.loglinear.feature import ChangedFile |
| 9 from crash.loglinear.feature import Feature | 10 from crash.loglinear.feature import Feature |
| 10 from crash.loglinear.feature import FeatureValue | 11 from crash.loglinear.feature import FeatureValue |
| 11 from crash.loglinear.feature import LogLinearlyScaled | 12 from crash.loglinear.feature import LogLinearlyScaled |
| 13 from libs.gitiles.diff import ChangeType | |
| 12 import libs.math.logarithms as lmath | 14 import libs.math.logarithms as lmath |
| 13 | 15 |
| 14 # N.B., this must not be infinity, else we'll start getting NaN values | 16 # N.B., this must not be infinity, else we'll start getting NaN values |
| 15 # from LinearMinDistanceFeature (and SquaredMinDistanceFeature). | 17 # from LinearMinDistanceFeature (and SquaredMinDistanceFeature). |
| 16 DEFAULT_MAXIMUM = 50 | 18 DEFAULT_MAXIMUM = 50 |
| 17 | 19 |
| 18 | 20 |
| 21 # TODO(wrengr): we should change things to use integers with None as | |
|
chanli
2017/01/23 04:37:45
Nit: since wrengr has left the team, change the TO
Sharu Jiang
2017/01/23 21:44:46
Done.
| |
| 22 # \"infinity\", rather than using floats. | |
| 23 # TODO(http://crbug.com/644476): this class needs a better name. | |
| 24 class DistanceInfo(object): | |
| 25 | |
| 26 def __init__(self, distance, frame): | |
| 27 self.distance = distance | |
| 28 self.frame = frame | |
| 29 | |
| 30 def Update(self, distance, frame): | |
| 31 if distance < self.distance: | |
| 32 self.distance = distance | |
| 33 self.frame = frame | |
| 34 | |
| 35 def IsInfinity(self): | |
| 36 return math.isinf(self.distance) | |
| 37 | |
| 38 def __str__(self): # pragma: no cover | |
| 39 return 'Min distance(distance = %f, frame = %s)' % (float(self.distance), | |
| 40 str(self.frame)) | |
| 41 | |
| 42 def __eq__(self, other): | |
| 43 return self.distance == other.distance and self.frame == other.frame | |
| 44 | |
| 45 | |
| 46 def DistanceBetweenLineRanges((start1, end1), (start2, end2)): | |
| 47 """Given two ranges, compute the (unsigned) distance between them. | |
| 48 | |
| 49 Args: | |
| 50 start1 (int): the first line included in the first range. | |
| 51 end1 (int): the last line included in the first range. Must be | |
| 52 greater than or equal to ``start1``. | |
| 53 start2 (int): the first line included in the second range. | |
| 54 end2 (int): the last line included in the second range. Must be | |
| 55 greater than or equal to ``start1``. | |
| 56 | |
| 57 Returns: | |
| 58 If the end of the earlier range comes before the start of the later | |
| 59 range, then the difference between those points. Otherwise, returns | |
| 60 zero (because the ranges overlap). | |
| 61 """ | |
| 62 if end1 < start1: | |
| 63 raise ValueError('the first range is empty: %d < %d' % (end1, start1)) | |
| 64 if end2 < start2: | |
| 65 raise ValueError('the second range is empty: %d < %d' % (end2, start2)) | |
| 66 # There are six possible cases, but in all the cases where the two | |
| 67 # ranges overlap, the latter two differences will be negative. | |
| 68 return max(0, start2 - end1, start1 - end2) | |
| 69 | |
| 70 | |
| 19 class MinDistanceFeature(Feature): | 71 class MinDistanceFeature(Feature): |
| 20 """Returns the minimum min_distance scaled between -inf and 0. | 72 """Returns the minimum min_distance scaled between -inf and 0. |
| 21 | 73 |
| 22 That is, the normal-domain value is scaled linearly between 0 and 1, | 74 That is, the normal-domain value is scaled linearly between 0 and 1, |
| 23 but since we want to return a log-domain value we take the logarithm | 75 but since we want to return a log-domain value we take the logarithm |
| 24 of that (hence -inf to 0). This ensures that when a suspect has a | 76 of that (hence -inf to 0). This ensures that when a suspect has a |
| 25 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely | 77 linearly-scaled value of 0 (aka log-scaled value of -inf) we absolutely |
| 26 refuse to blame that suspect. This heuristic behavior is intended. Before | 78 refuse to blame that suspect. This heuristic behavior is intended. Before |
| 27 changing it to be less aggressive about refusing to blame the suspect, | 79 changing it to be less aggressive about refusing to blame the suspect, |
| 28 we should delta test to be sure the new heuristic acts as indented. | 80 we should delta test to be sure the new heuristic acts as indented. |
| 29 | 81 |
| 30 When the actual minimum min_distance is zero, we return the log-domain | 82 When the actual minimum min_distance is zero, we return the log-domain |
| 31 value 0 (aka normal-domain value of 1). When the suspect has no files | 83 value 0 (aka normal-domain value of 1). When the suspect has no files |
| 32 or the actual minimum min_distance is greater than the ``maximum``, | 84 or the actual minimum min_distance is greater than the ``maximum``, |
| 33 we return the log-domain value -inf (aka normal-domain value of 0). In | 85 we return the log-domain value -inf (aka normal-domain value of 0). In |
| 34 between we scale the normal-domain values linearly, which means the | 86 between we scale the normal-domain values linearly, which means the |
| 35 log-domain values are scaled exponentially. | 87 log-domain values are scaled exponentially. |
| 36 """ | 88 """ |
| 37 def __init__(self, maximum=None): | 89 def __init__(self, get_repository, maximum=None): |
| 38 """ | 90 """ |
| 39 Args: | 91 Args: |
| 40 maximum (float): An upper bound on the min_distance to | 92 maximum (float): An upper bound on the min_distance to |
| 41 consider. This argument is optional and defaults to | 93 consider. This argument is optional and defaults to |
| 42 ``DEFAULT_MAXIMUM``. | 94 ``DEFAULT_MAXIMUM``. |
| 43 """ | 95 """ |
| 96 self._get_repository = get_repository | |
| 44 if maximum is None: | 97 if maximum is None: |
| 45 maximum = DEFAULT_MAXIMUM | 98 maximum = DEFAULT_MAXIMUM |
| 46 self._maximum = maximum | 99 self._maximum = maximum |
|
chanli
2017/01/23 04:37:45
Nit: this can be simplified to self._maximum = max
Sharu Jiang
2017/01/23 21:44:46
They are not exactly the same. If maximum is 0, we
| |
| 47 | 100 |
| 48 @property | 101 @property |
| 49 def name(self): | 102 def name(self): |
| 50 return 'MinDistance' | 103 return 'MinDistance' |
| 51 | 104 |
| 105 def DistanceInfoBetweenTouchedFileAndStacktrace( | |
| 106 self, revision, touched_file, stack_infos, crash_dependency): | |
| 107 """Gets ``DistanceInfo`` between touched lines and crashed lines for a file. | |
| 108 | |
| 109 Args: | |
|
chanli
2017/01/23 04:37:45
Nit: add docstring for revision
Sharu Jiang
2017/01/23 21:44:46
Done.
| |
| 110 touched_file (FileChangeInfo): The file touched by a suspect. | |
| 111 stack_infos (list of StackInfos): Information about frames that contains | |
| 112 this touched file and the priorities of their callstacks. | |
| 113 crash_dependency (Dependency): The depedency of crashed revision. | |
| 114 | |
| 115 Returns: | |
| 116 ``DistanceInfo`` object of touched file and stacktrace. | |
| 117 """ | |
| 118 # TODO(katesonia) ``GetBlame`` is called for the same file everytime | |
| 119 # there is a suspect that touched it, which can be very expensive. | |
| 120 # The blame information can either be cached through repository (cached | |
| 121 # by memcache based on repo url, revision and file path), or this | |
| 122 # function can have a static in-memory cache to cache blame for touched | |
| 123 # files, however since blame information is big, it's not a good idea to | |
| 124 # keep it in memory. | |
| 125 repository = self._get_repository(crash_dependency.repo_url) | |
| 126 blame = repository.GetBlame(touched_file.new_path, | |
| 127 crash_dependency.revision) | |
|
chanli
2017/01/23 04:37:45
Why not use revision directly? And crash_dependenc
Sharu Jiang
2017/01/23 21:44:46
We also need ``crash_dependency.repo_url`` in the
| |
| 128 if not blame: | |
| 129 logging.warning('Failed to get blame information for %s', | |
| 130 touched_file.new_path) | |
| 131 return None | |
| 132 | |
| 133 # Distance of this file. | |
| 134 distance_info = DistanceInfo(float('inf'), None) | |
| 135 for region in blame: | |
| 136 if region.revision != revision: | |
| 137 continue | |
| 138 | |
| 139 region_start = region.start | |
| 140 region_end = region_start + region.count - 1 | |
| 141 for stack_info in stack_infos: | |
| 142 frame_start = stack_info.frame.crashed_line_numbers[0] | |
| 143 frame_end = stack_info.frame.crashed_line_numbers[-1] | |
| 144 distance = DistanceBetweenLineRanges((frame_start, frame_end), | |
| 145 (region_start, region_end)) | |
| 146 distance_info.Update(distance, stack_info.frame) | |
| 147 | |
| 148 return distance_info | |
| 149 | |
| 52 def __call__(self, report): | 150 def __call__(self, report): |
| 53 """Returns the scaled min ``AnalysisInfo.min_distance`` across all files. | 151 """Returns the scaled min ``DistanceInfo.distance`` across all files. |
| 54 | 152 |
| 55 Args: | 153 Args: |
| 56 report (CrashReport): the crash report being analyzed. | 154 report (CrashReport): the crash report being analyzed. |
| 57 | 155 |
| 58 Returns: | 156 Returns: |
| 59 A function from ``Suspect`` to the minimum distance between (the code | 157 A function from ``Suspect`` to the minimum distance between (the code |
| 60 for) a stack frame in that suspect and the CL in that suspect, as a | 158 for) a stack frame in that suspect and the CL in that suspect, as a |
| 61 log-domain ``float``. | 159 log-domain ``float``. |
| 62 """ | 160 """ |
| 63 def FeatureValueGivenReport(suspect): | 161 def FeatureValueGivenReport(suspect, touched_file_to_stack_infos): |
| 64 analyses = suspect.file_to_analysis_info | 162 """Function mapping suspect related data to MinDistance FeatureValue. |
| 65 if not analyses: | |
| 66 message = 'No AnalysisInfo for any file in suspect: %s' % str(suspect) | |
| 67 logging.warning(message) | |
| 68 return FeatureValue(self.name, lmath.LOG_ZERO, message, None) | |
| 69 | 163 |
| 70 min_distance = min(per_file_analysis.min_distance | 164 Args: |
| 71 for per_file_analysis in analyses.itervalues()) | 165 suspect (Suspect): The suspected changelog and some meta information |
| 166 about it. | |
| 167 touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to | |
| 168 a list of ``StackInfo``s representing all the frames that the suspect | |
| 169 touched. | |
| 170 | |
| 171 Returns: | |
| 172 The ``FeatureValue`` of this feature. | |
| 173 """ | |
| 174 if not touched_file_to_stack_infos: | |
| 175 FeatureValue(self.name, lmath.LOG_ZERO, | |
| 176 'No file got touched by the suspect.', None) | |
| 177 | |
| 178 distance_info = DistanceInfo(float('inf'), None) | |
| 179 touched_file_to_distance_info = {} | |
| 180 for touched_file, stack_infos in touched_file_to_stack_infos.iteritems(): | |
| 181 distance_info_per_file = ( | |
| 182 self.DistanceInfoBetweenTouchedFileAndStacktrace( | |
| 183 suspect.changelog.revision, touched_file, stack_infos, | |
| 184 report.dependencies[suspect.dep_path])) | |
| 185 # It is possible that a changelog doesn't show in the blame of a file, | |
| 186 # in this case, treat the changelog as if it didn't change the file. | |
| 187 if not distance_info_per_file: | |
| 188 logging.warning('suspect\'s change cannot be blamed due to lack of' | |
| 189 'blame information for crashed file %s' % | |
| 190 touched_file.new_path) | |
| 191 continue | |
| 192 | |
| 193 # This file change shouldn't be blamed because it's a blame cl of the | |
| 194 # crashed file. | |
|
chanli
2017/01/23 04:37:45
I don't quite understand this comment... IIUC, dis
Sharu Jiang
2017/01/23 21:44:46
That's right, wrong comment... corrected it.
| |
| 195 if distance_info_per_file.IsInfinity(): | |
| 196 continue | |
| 197 | |
| 198 touched_file_to_distance_info[touched_file] = distance_info_per_file | |
| 199 distance_info.Update(distance_info_per_file.distance, | |
| 200 distance_info_per_file.frame) | |
| 72 | 201 |
| 73 return FeatureValue( | 202 return FeatureValue( |
| 74 name = self.name, | 203 name = self.name, |
| 75 value = LogLinearlyScaled(float(min_distance), float(self._maximum)), | 204 value = LogLinearlyScaled(float(distance_info.distance), |
| 76 reason = ('Minimum distance is %d' % min_distance), | 205 float(self._maximum)), |
| 77 changed_files = self._ChangedFiles(suspect), | 206 reason = ('Minimum distance is %f' % float(distance_info.distance)), |
| 78 ) | 207 changed_files = MinDistanceFeature.ChangedFiles( |
| 208 suspect, touched_file_to_distance_info, report.crashed_version)) | |
| 79 | 209 |
| 80 return FeatureValueGivenReport | 210 return FeatureValueGivenReport |
| 81 | 211 |
| 82 def _ChangedFiles(self, suspect): | 212 @staticmethod |
| 83 """Get all the changed files causing this feature to blame this suspect. | 213 def ChangedFiles(suspect, touched_file_to_distance_info, crashed_version): |
| 214 """Get all the changed files causing this feature to blame this result. | |
| 84 | 215 |
| 85 Arg: | 216 Arg: |
| 86 suspect (Suspect): the suspect being blamed. | 217 suspect (Suspect): the suspect being blamed. |
| 218 touched_file_to_stack_infos (dict): Dict mapping file name to | |
| 219 ``StackInfo``s. | |
|
chanli
2017/01/23 04:37:45
DistanceInfo?
Sharu Jiang
2017/01/23 21:44:46
Oops. Done.
| |
| 220 crashed_version (str): Crashed version. | |
| 87 | 221 |
| 88 Returns: | 222 Returns: |
| 89 List of ``ChangedFile`` objects sorted by frame index. For example: | 223 List of ``ChangedFile`` objects sorted by frame index. For example: |
| 90 | 224 |
| 91 [ChangedFile( | 225 [ChangedFile( |
| 92 file = 'render_frame_impl.cc', | 226 file = 'render_frame_impl.cc', |
| 93 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', | 227 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', |
| 94 reasons = ['Minimum distance (LOC) 1, frame #5'] | 228 reasons = ['Minimum distance (LOC) 1, frame #5'] |
| 95 )] | 229 )] |
| 96 """ | 230 """ |
| 97 index_to_changed_files = {} | 231 frame_index_to_changed_files = {} |
| 98 | 232 |
| 99 for file_path, analysis_info in suspect.file_to_analysis_info.iteritems(): | 233 for touched_file, distance_info in ( |
| 100 file_name = file_path.split('/')[-1] | 234 touched_file_to_distance_info.iteritems()): |
| 101 frame = analysis_info.min_distance_frame | 235 file_name = touched_file.new_path.split('/')[-1] |
| 102 if frame is None: # pragma: no cover | 236 if distance_info.frame is None: # pragma: no cover |
| 103 logging.warning('Missing the min_distance_frame for %s' | 237 logging.warning('Missing the min_distance_frame for file %s' % |
| 104 % str(analysis_info)) | 238 file_name) |
| 105 continue | 239 continue |
| 106 | 240 |
| 107 # It is possible that a changelog doesn't show in the blame of a file, | 241 frame_index_to_changed_files[distance_info.frame.index] = ChangedFile( |
| 108 # in this case, treat the changelog as if it didn't change the file. | 242 name=file_name, |
| 109 if math.isinf(analysis_info.min_distance): # pragma: no cover | 243 blame_url=distance_info.frame.BlameUrl(crashed_version), |
| 110 logging.warning('min_distance is infinite for %s' % str(analysis_info)) | 244 reasons=['Distance from touched lines and crashed lines is %d, in ' |
| 111 continue | 245 'frame #%d' % (distance_info.distance, |
| 246 distance_info.frame.index)]) | |
| 112 | 247 |
| 113 index_to_changed_files[frame.index] = ChangedFile( | 248 if not frame_index_to_changed_files: # pragma: no cover |
| 114 name = file_name, | |
| 115 blame_url = frame.BlameUrl(suspect.changelog.revision), | |
| 116 reasons = ['Minimum distance (LOC) %d, frame #%d' % ( | |
| 117 analysis_info.min_distance, frame.index)] | |
| 118 ) | |
| 119 | |
| 120 if not index_to_changed_files: # pragma: no cover | |
| 121 logging.warning('Found no changed files for suspect: %s', str(suspect)) | 249 logging.warning('Found no changed files for suspect: %s', str(suspect)) |
| 250 return [] | |
| 122 | 251 |
| 123 # Sort changed file by frame index. | 252 # Sort changed file by frame index. |
| 124 _, changed_files = zip(*sorted(index_to_changed_files.items(), | 253 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), |
| 125 key=lambda x: x[0])) | 254 key=lambda x: x[0])) |
| 126 | 255 |
| 127 return list(changed_files) | 256 return list(changed_files) |
| OLD | NEW |