| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 from crash.loglinear.changelist_classifier import StackInfo | 8 from crash.crash_match import FrameInfo |
| 9 from crash.loglinear.feature import ChangedFile | 9 from crash.loglinear.feature import ChangedFile |
| 10 from crash.loglinear.feature import Feature | 10 from crash.loglinear.feature import Feature |
| 11 from crash.loglinear.feature import FeatureValue | 11 from crash.loglinear.feature import FeatureValue |
| 12 from crash.loglinear.feature import LogLinearlyScaled | 12 from crash.loglinear.feature import LogLinearlyScaled |
| 13 from libs.gitiles.diff import ChangeType | 13 from libs.gitiles.diff import ChangeType |
| 14 import libs.math.logarithms as lmath | 14 import libs.math.logarithms as lmath |
| 15 | 15 |
| 16 | 16 |
| 17 class ModifiedFrameInfo(object): | 17 class Distance(object): |
| 18 """Represents the closest frame to a changelog which modified it. | 18 """Represents the closest frame to a changelog which modified it. |
| 19 | 19 |
| 20 The "closest" means that the distance between crashed lines in the frame and | 20 The "closest" means that the distance between crashed lines in the frame and |
| 21 touched lines in a changelog is minimum. | 21 touched lines in a changelog is minimum. |
| 22 | 22 |
| 23 Properties: | 23 Properties: |
| 24 distance (int or float('inf')): The distance between crashed lines and | 24 distance (int or float('inf')): The distance between crashed lines and |
| 25 touched lines, if a changelog doesn't show in blame of the crashed file of | 25 touched lines, if a changelog doesn't show in blame of the crashed file of |
| 26 the crashed version (either it didn't touch the crashed file or it got | 26 the crashed version (either it didn't touch the crashed file or it got |
| 27 overwritten by other cls), the distance would be infinite. | 27 overwritten by other cls), the distance would be infinite. |
| 28 frame (StackFrame): The frame which got modified. | 28 frame (StackFrame): The frame which has the minimum distance to touched |
| 29 lines. |
| 29 """ | 30 """ |
| 30 | 31 |
| 31 def __init__(self, distance, frame): | 32 def __init__(self, distance, frame): |
| 32 self.distance = distance | 33 self.distance = distance |
| 33 self.frame = frame | 34 self.frame = frame |
| 34 | 35 |
| 35 def Update(self, distance, frame): | 36 def Update(self, distance, frame): |
| 36 if distance < self.distance: | 37 if distance < self.distance: |
| 37 self.distance = distance | 38 self.distance = distance |
| 38 self.frame = frame | 39 self.frame = frame |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 Args: | 99 Args: |
| 99 maximum (float): An upper bound on the min_distance to consider. | 100 maximum (float): An upper bound on the min_distance to consider. |
| 100 """ | 101 """ |
| 101 self._get_repository = get_repository | 102 self._get_repository = get_repository |
| 102 self._maximum = maximum | 103 self._maximum = maximum |
| 103 | 104 |
| 104 @property | 105 @property |
| 105 def name(self): | 106 def name(self): |
| 106 return 'MinDistance' | 107 return 'MinDistance' |
| 107 | 108 |
| 108 def DistanceBetweenTouchedFileAndStacktrace( | 109 def DistanceBetweenTouchedFileAndFrameInfos( |
| 109 self, revision, touched_file, stack_infos, crash_dependency): | 110 self, revision, touched_file, frame_infos, crash_dependency): |
| 110 """Gets ``ModifiedFrameInfo`` between touched and crashed lines in a file. | 111 """Gets ``Distance`` between touched and crashed lines in a file. |
| 111 | 112 |
| 112 Args: | 113 Args: |
| 113 revision (str): The revision of the suspect. | 114 revision (str): The revision of the suspect. |
| 114 touched_file (FileChangeInfo): The file touched by the suspect. | 115 touched_file (FileChangeInfo): The file touched by the suspect. |
| 115 stack_infos (list of StackInfos): List of information of frames in the | 116 frame_infos (list of FrameInfos): List of information of frames in the |
| 116 stacktrace which contains ``touched_file``. | 117 stacktrace which contains ``touched_file``. |
| 117 crash_dependency (Dependency): The depedency of crashed revision. N.B. The | 118 crash_dependency (Dependency): The depedency of crashed revision. N.B. The |
| 118 crashed revision is the revision where crash happens, however the | 119 crashed revision is the revision where crash happens, however the |
| 119 first parameter ``revision`` is the revision of the suspect cl, which | 120 first parameter ``revision`` is the revision of the suspect cl, which |
| 120 must be before the crashed revision. | 121 must be before the crashed revision. |
| 121 | 122 |
| 122 Returns: | 123 Returns: |
| 123 ``ModifiedFrameInfo`` object of touched file and stacktrace. | 124 ``Distance`` object of touched file and stacktrace. |
| 124 """ | 125 """ |
| 125 # TODO(katesonia) ``GetBlame`` is called for the same file everytime | 126 # TODO(katesonia) ``GetBlame`` is called for the same file everytime |
| 126 # there is a suspect that touched it, which can be very expensive. | 127 # there is a suspect that touched it, which can be very expensive. |
| 127 # The blame information can either be cached through repository (cached | 128 # The blame information can either be cached through repository (cached |
| 128 # by memcache based on repo url, revision and file path), or this | 129 # by memcache based on repo url, revision and file path), or this |
| 129 # function can have a static in-memory cache to cache blame for touched | 130 # function can have a static in-memory cache to cache blame for touched |
| 130 # files, however since blame information is big, it's not a good idea to | 131 # files, however since blame information is big, it's not a good idea to |
| 131 # keep it in memory. | 132 # keep it in memory. |
| 132 repository = self._get_repository(crash_dependency.repo_url) | 133 repository = self._get_repository(crash_dependency.repo_url) |
| 133 blame = repository.GetBlame(touched_file.new_path, | 134 blame = repository.GetBlame(touched_file.new_path, |
| 134 crash_dependency.revision) | 135 crash_dependency.revision) |
| 135 if not blame: | 136 if not blame: |
| 136 logging.warning('Failed to get blame information for %s', | 137 logging.warning('Failed to get blame information for %s', |
| 137 touched_file.new_path) | 138 touched_file.new_path) |
| 138 return None | 139 return None |
| 139 | 140 |
| 140 # Distance of this file. | 141 # Distance of this file. |
| 141 modified_frame_info = ModifiedFrameInfo(float('inf'), None) | 142 distance = Distance(float('inf'), None) |
| 142 for region in blame: | 143 for region in blame: |
| 143 if region.revision != revision: | 144 if region.revision != revision: |
| 144 continue | 145 continue |
| 145 | 146 |
| 146 region_start = region.start | 147 region_start = region.start |
| 147 region_end = region_start + region.count - 1 | 148 region_end = region_start + region.count - 1 |
| 148 for stack_info in stack_infos: | 149 for frame_info in frame_infos: |
| 149 frame_start = stack_info.frame.crashed_line_numbers[0] | 150 frame_start = frame_info.frame.crashed_line_numbers[0] |
| 150 frame_end = stack_info.frame.crashed_line_numbers[-1] | 151 frame_end = frame_info.frame.crashed_line_numbers[-1] |
| 151 distance = DistanceBetweenLineRanges((frame_start, frame_end), | 152 line_distance = DistanceBetweenLineRanges((frame_start, frame_end), |
| 152 (region_start, region_end)) | 153 (region_start, region_end)) |
| 153 modified_frame_info.Update(distance, stack_info.frame) | 154 distance.Update(line_distance, frame_info.frame) |
| 154 | 155 |
| 155 return modified_frame_info | 156 return distance |
| 156 | 157 |
| 157 def __call__(self, report): | 158 def __call__(self, report): |
| 158 """Returns the scaled min ``ModifiedFrameInfo.distance`` across all files. | 159 """Returns the scaled min ``Distance.distance`` across all files. |
| 159 | 160 |
| 160 Args: | 161 Args: |
| 161 report (CrashReport): the crash report being analyzed. | 162 report (CrashReport): the crash report being analyzed. |
| 162 | 163 |
| 163 Returns: | 164 Returns: |
| 164 A function from ``Suspect`` to the minimum distance between (the code | 165 A function from ``Suspect`` to the minimum distance between (the code |
| 165 for) a stack frame in that suspect and the CL in that suspect, as a | 166 for) a stack frame in that suspect and the CL in that suspect, as a |
| 166 log-domain ``float``. | 167 log-domain ``float``. |
| 167 """ | 168 """ |
| 168 def FeatureValueGivenReport(suspect, touched_file_to_stack_infos): | 169 def FeatureValueGivenReport(suspect, matches): |
| 169 """Function mapping suspect related data to MinDistance FeatureValue. | 170 """Function mapping suspect related data to MinDistance FeatureValue. |
| 170 | 171 |
| 171 Args: | 172 Args: |
| 172 suspect (Suspect): The suspected changelog and some meta information | 173 suspect (Suspect): The suspected changelog and some meta information |
| 173 about it. | 174 about it. |
| 174 touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to | 175 matches(dict): Dict mapping crashed group(CrashedFile, CrashedDirectory) |
| 175 a list of ``StackInfo``s representing all the frames that the suspect | 176 to a list of ``Match``s representing all frames and all touched files |
| 176 touched. | 177 matched in the same crashed group(same crashed file or crashed |
| 178 directory). |
| 177 | 179 |
| 178 Returns: | 180 Returns: |
| 179 The ``FeatureValue`` of this feature. | 181 The ``FeatureValue`` of this feature. |
| 180 """ | 182 """ |
| 181 if not touched_file_to_stack_infos: | 183 if not matches: |
| 182 FeatureValue(self.name, lmath.LOG_ZERO, | 184 FeatureValue(self.name, lmath.LOG_ZERO, |
| 183 'No file got touched by the suspect.', None) | 185 'No file got touched by the suspect.', None) |
| 184 | 186 |
| 185 modified_frame_info = ModifiedFrameInfo(float('inf'), None) | 187 distance = Distance(float('inf'), None) |
| 186 touched_file_to_modified_frame_info = {} | 188 touched_file_to_distance = {} |
| 187 for touched_file, stack_infos in touched_file_to_stack_infos.iteritems(): | 189 for match in matches.itervalues(): |
| 190 if len(match.touched_files) != 1: |
| 191 logging.warning('There should be only one touched file per crashed ' |
| 192 'file group.') |
| 193 continue |
| 194 |
| 195 touched_file = match.touched_files[0] |
| 188 # Records the closest frame (the frame has minimum distance between | 196 # Records the closest frame (the frame has minimum distance between |
| 189 # crashed lines and touched lines) for each touched file of the suspect. | 197 # crashed lines and touched lines) for each touched file of the suspect. |
| 190 modified_frame_info_per_file = ( | 198 distance_per_file = self.DistanceBetweenTouchedFileAndFrameInfos( |
| 191 self.DistanceBetweenTouchedFileAndStacktrace( | 199 suspect.changelog.revision, touched_file, |
| 192 suspect.changelog.revision, touched_file, stack_infos, | 200 match.frame_infos, report.dependencies[suspect.dep_path]) |
| 193 report.dependencies[suspect.dep_path])) | |
| 194 # Failed to get blame information of a file. | 201 # Failed to get blame information of a file. |
| 195 if not modified_frame_info_per_file: | 202 if not distance_per_file: |
| 196 logging.warning('suspect\'s change cannot be blamed due to lack of' | 203 logging.warning('suspect\'s change cannot be blamed due to lack of' |
| 197 'blame information for crashed file %s' % | 204 'blame information for crashed file %s' % |
| 198 touched_file.new_path) | 205 touched_file.new_path) |
| 199 continue | 206 continue |
| 200 | 207 |
| 201 # It is possible that a changelog doesn't show in the blame of a file, | 208 # It is possible that a changelog doesn't show in the blame of a file, |
| 202 # in this case, treat the changelog as if it didn't change the file. | 209 # in this case, treat the changelog as if it didn't change the file. |
| 203 if modified_frame_info_per_file.IsInfinity(): | 210 if distance_per_file.IsInfinity(): |
| 204 continue | 211 continue |
| 205 | 212 |
| 206 touched_file_to_modified_frame_info[ | 213 touched_file_to_distance[touched_file] = distance_per_file |
| 207 touched_file] = modified_frame_info_per_file | 214 distance.Update(distance_per_file.distance, |
| 208 modified_frame_info.Update(modified_frame_info_per_file.distance, | 215 distance_per_file.frame) |
| 209 modified_frame_info_per_file.frame) | |
| 210 | 216 |
| 211 return FeatureValue( | 217 return FeatureValue( |
| 212 name = self.name, | 218 name = self.name, |
| 213 value = LogLinearlyScaled(float(modified_frame_info.distance), | 219 value = LogLinearlyScaled(float(distance.distance), |
| 214 float(self._maximum)), | 220 float(self._maximum)), |
| 215 reason = ('Minimum distance is %d' % int(modified_frame_info.distance) | 221 reason = ('Minimum distance is %d' % int(distance.distance) |
| 216 if not math.isinf(modified_frame_info.distance) else | 222 if not math.isinf(distance.distance) else |
| 217 'Minimum distance is infinity'), | 223 'Minimum distance is infinity'), |
| 218 changed_files = MinDistanceFeature.ChangedFiles( | 224 changed_files = MinDistanceFeature.ChangedFiles( |
| 219 suspect, touched_file_to_modified_frame_info, | 225 suspect, touched_file_to_distance, |
| 220 report.crashed_version)) | 226 report.crashed_version)) |
| 221 | 227 |
| 222 return FeatureValueGivenReport | 228 return FeatureValueGivenReport |
| 223 | 229 |
| 224 @staticmethod | 230 @staticmethod |
| 225 def ChangedFiles(suspect, touched_file_to_modified_frame_info, | 231 def ChangedFiles(suspect, touched_file_to_distance, crashed_version): |
| 226 crashed_version): | |
| 227 """Get all the changed files causing this feature to blame this result. | 232 """Get all the changed files causing this feature to blame this result. |
| 228 | 233 |
| 229 Arg: | 234 Arg: |
| 230 suspect (Suspect): the suspect being blamed. | 235 suspect (Suspect): the suspect being blamed. |
| 231 touched_file_to_modified_frame_info (dict): Dict mapping file name to | 236 touched_file_to_distance (dict): Dict mapping file name to |
| 232 ``ModifiedFrameInfo``s. | 237 ``Distance``s. |
| 233 crashed_version (str): Crashed version. | 238 crashed_version (str): Crashed version. |
| 234 | 239 |
| 235 Returns: | 240 Returns: |
| 236 List of ``ChangedFile`` objects sorted by frame index. For example: | 241 List of ``ChangedFile`` objects sorted by frame index. For example: |
| 237 | 242 |
| 238 [ChangedFile( | 243 [ChangedFile( |
| 239 file = 'render_frame_impl.cc', | 244 file = 'render_frame_impl.cc', |
| 240 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', | 245 blame_url = 'https://chr.com/../render_frame_impl.cc#1586', |
| 241 reasons = ['Minimum distance (LOC) 1, frame #5'] | 246 reasons = ['Minimum distance (LOC) 1, frame #5'] |
| 242 )] | 247 )] |
| 243 """ | 248 """ |
| 244 frame_index_to_changed_files = {} | 249 frame_index_to_changed_files = {} |
| 245 | 250 |
| 246 for touched_file, modified_frame_info in ( | 251 for touched_file, distance in ( |
| 247 touched_file_to_modified_frame_info.iteritems()): | 252 touched_file_to_distance.iteritems()): |
| 248 file_name = touched_file.new_path.split('/')[-1] | 253 file_name = touched_file.new_path.split('/')[-1] |
| 249 if modified_frame_info.frame is None: # pragma: no cover | 254 if distance.frame is None: # pragma: no cover |
| 250 logging.warning('Missing the min_distance_frame for file %s' % | 255 logging.warning('Missing the min_distance_frame for file %s' % |
| 251 file_name) | 256 file_name) |
| 252 continue | 257 continue |
| 253 | 258 |
| 254 frame_index_to_changed_files[ | 259 frame_index_to_changed_files[distance.frame.index] = ChangedFile( |
| 255 modified_frame_info.frame.index] = ChangedFile( | |
| 256 name=file_name, | 260 name=file_name, |
| 257 blame_url=modified_frame_info.frame.BlameUrl(crashed_version), | 261 blame_url=distance.frame.BlameUrl(crashed_version), |
| 258 reasons=['Distance from touched lines and crashed lines is %d, in' | 262 reasons=['Distance from touched lines and crashed lines is %d, in' |
| 259 ' frame #%d' % (modified_frame_info.distance, | 263 ' frame #%d' % (distance.distance, |
| 260 modified_frame_info.frame.index)]) | 264 distance.frame.index)]) |
| 261 | 265 |
| 262 if not frame_index_to_changed_files: # pragma: no cover | 266 if not frame_index_to_changed_files: # pragma: no cover |
| 263 logging.warning('Found no changed files for suspect: %s', str(suspect)) | 267 logging.warning('Found no changed files for suspect: %s', str(suspect)) |
| 264 return [] | 268 return [] |
| 265 | 269 |
| 266 # Sort changed file by frame index. | 270 # Sort changed file by frame index. |
| 267 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), | 271 _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(), |
| 268 key=lambda x: x[0])) | 272 key=lambda x: x[0])) |
| 269 | 273 |
| 270 return list(changed_files) | 274 return list(changed_files) |
| OLD | NEW |