Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1872)

Unified Diff: appengine/findit/crash/loglinear/changelist_features/min_distance.py

Issue 2613153006: [Predator] Add TouchCrashedFileMetaFeature. (Closed)
Patch Set: Add comments. Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/crash/loglinear/changelist_features/min_distance.py
diff --git a/appengine/findit/crash/loglinear/changelist_features/min_distance.py b/appengine/findit/crash/loglinear/changelist_features/min_distance.py
index 4f985a45bd07391855796ab19df89d9ea37f69ad..d60e82d758cca590c6e76b2c12b1877db7765a50 100644
--- a/appengine/findit/crash/loglinear/changelist_features/min_distance.py
+++ b/appengine/findit/crash/loglinear/changelist_features/min_distance.py
@@ -5,15 +5,75 @@
import logging
import math
+from crash.loglinear.changelist_classifier import StackInfo
from crash.loglinear.feature import ChangedFile
from crash.loglinear.feature import Feature
from crash.loglinear.feature import FeatureValue
from crash.loglinear.feature import LogLinearlyScaled
+from libs.gitiles.diff import ChangeType
import libs.math.logarithms as lmath
-# N.B., this must not be infinity, else we'll start getting NaN values
-# from LinearMinDistanceFeature (and SquaredMinDistanceFeature).
-DEFAULT_MAXIMUM = 50
+
+
+class ModifiedFrameInfo(object):
+ """Represents the closest frame to a changelog which modified it.
+
+ The "closest" means that the distance between crashed lines in the frame and
+ touched lines in a changelog is minimum.
+
+ Properties:
+ distance (int or float('inf')): The distance between crashed lines and
+ touched lines, if a changelog doesn't show in blame of the crashed file of
+ the crashed version (either it didn't touch the crashed file or it got
+ overwritten by other cls), the distance would be infinite.
+ frame (StackFrame): The frame which got modified.
+ """
+
+ def __init__(self, distance, frame):
+ self.distance = distance
+ self.frame = frame
+
+ def Update(self, distance, frame):
+ if distance < self.distance:
+ self.distance = distance
+ self.frame = frame
+
+ # TODO(katesonia): we should change things to use integers with None as
+ # \"infinity\", rather than using floats.
+ def IsInfinity(self):
+ return math.isinf(self.distance)
+
+ def __str__(self): # pragma: no cover
+ return 'Min distance(distance = %f, frame = %s)' % (float(self.distance),
+ str(self.frame))
+
+ def __eq__(self, other):
+ return self.distance == other.distance and self.frame == other.frame
+
+
+def DistanceBetweenLineRanges((start1, end1), (start2, end2)):
+ """Given two ranges, compute the (unsigned) distance between them.
+
+ Args:
+ start1 (int): the first line included in the first range.
+ end1 (int): the last line included in the first range. Must be
+ greater than or equal to ``start1``.
+ start2 (int): the first line included in the second range.
+ end2 (int): the last line included in the second range. Must be
+ greater than or equal to ``start1``.
+
+ Returns:
+ If the end of the earlier range comes before the start of the later
+ range, then the difference between those points. Otherwise, returns
+ zero (because the ranges overlap).
+ """
+ if end1 < start1:
+ raise ValueError('the first range is empty: %d < %d' % (end1, start1))
+ if end2 < start2:
+ raise ValueError('the second range is empty: %d < %d' % (end2, start2))
+ # There are six possible cases, but in all the cases where the two
+ # ranges overlap, the latter two differences will be negative.
+ return max(0, start2 - end1, start1 - end2)
class MinDistanceFeature(Feature):
@@ -34,23 +94,69 @@ class MinDistanceFeature(Feature):
between we scale the normal-domain values linearly, which means the
log-domain values are scaled exponentially.
"""
- def __init__(self, maximum=None):
+ def __init__(self, get_repository, maximum):
"""
Args:
- maximum (float): An upper bound on the min_distance to
- consider. This argument is optional and defaults to
- ``DEFAULT_MAXIMUM``.
+ maximum (float): An upper bound on the min_distance to consider.
"""
- if maximum is None:
- maximum = DEFAULT_MAXIMUM
+ self._get_repository = get_repository
self._maximum = maximum
@property
def name(self):
return 'MinDistance'
+ def DistanceBetweenTouchedFileAndStacktrace(
+ self, revision, touched_file, stack_infos, crash_dependency):
+ """Gets ``ModifiedFrameInfo`` between touched and crashed lines in a file.
+
+ Args:
+ revision (str): The revision of the suspect.
+ touched_file (FileChangeInfo): The file touched by the suspect.
+ stack_infos (list of StackInfos): List of information of frames in the
+ stacktrace which contains ``touched_file``.
+ crash_dependency (Dependency): The depedency of crashed revision. N.B. The
+ crashed revision is the revision where crash happens, however the
+ first parameter ``revision`` is the revision of the suspect cl, which
+ must be before the crashed revision.
+
+ Returns:
+ ``ModifiedFrameInfo`` object of touched file and stacktrace.
+ """
+ # TODO(katesonia) ``GetBlame`` is called for the same file everytime
+ # there is a suspect that touched it, which can be very expensive.
+ # The blame information can either be cached through repository (cached
+ # by memcache based on repo url, revision and file path), or this
+ # function can have a static in-memory cache to cache blame for touched
+ # files, however since blame information is big, it's not a good idea to
+ # keep it in memory.
+ repository = self._get_repository(crash_dependency.repo_url)
+ blame = repository.GetBlame(touched_file.new_path,
+ crash_dependency.revision)
+ if not blame:
+ logging.warning('Failed to get blame information for %s',
+ touched_file.new_path)
+ return None
+
+ # Distance of this file.
+ modified_frame_info = ModifiedFrameInfo(float('inf'), None)
+ for region in blame:
+ if region.revision != revision:
+ continue
+
+ region_start = region.start
+ region_end = region_start + region.count - 1
+ for stack_info in stack_infos:
+ frame_start = stack_info.frame.crashed_line_numbers[0]
+ frame_end = stack_info.frame.crashed_line_numbers[-1]
+ distance = DistanceBetweenLineRanges((frame_start, frame_end),
+ (region_start, region_end))
+ modified_frame_info.Update(distance, stack_info.frame)
+
+ return modified_frame_info
+
def __call__(self, report):
- """Returns the scaled min ``AnalysisInfo.min_distance`` across all files.
+ """Returns the scaled min ``ModifiedFrameInfo.distance`` across all files.
Args:
report (CrashReport): the crash report being analyzed.
@@ -60,32 +166,72 @@ class MinDistanceFeature(Feature):
for) a stack frame in that suspect and the CL in that suspect, as a
log-domain ``float``.
"""
- def FeatureValueGivenReport(suspect):
- analyses = suspect.file_to_analysis_info
- if not analyses:
- message = 'No AnalysisInfo for any file in suspect: %s' % str(suspect)
- logging.warning(message)
- return FeatureValue(self.name, lmath.LOG_ZERO, message, None)
+ def FeatureValueGivenReport(suspect, touched_file_to_stack_infos):
+ """Function mapping suspect related data to MinDistance FeatureValue.
+
+ Args:
+ suspect (Suspect): The suspected changelog and some meta information
+ about it.
+ touched_file_to_stack_infos(dict): Dict mapping ``FileChangeInfo`` to
+ a list of ``StackInfo``s representing all the frames that the suspect
+ touched.
+
+ Returns:
+ The ``FeatureValue`` of this feature.
+ """
+ if not touched_file_to_stack_infos:
+ FeatureValue(self.name, lmath.LOG_ZERO,
+ 'No file got touched by the suspect.', None)
- min_distance = min(per_file_analysis.min_distance
- for per_file_analysis in analyses.itervalues())
+ modified_frame_info = ModifiedFrameInfo(float('inf'), None)
+ touched_file_to_modified_frame_info = {}
+ for touched_file, stack_infos in touched_file_to_stack_infos.iteritems():
+ # Records the closest frame (the frame has minimum distance between
+ # crashed lines and touched lines) for each touched file of the suspect.
+ modified_frame_info_per_file = (
+ self.DistanceBetweenTouchedFileAndStacktrace(
+ suspect.changelog.revision, touched_file, stack_infos,
+ report.dependencies[suspect.dep_path]))
+ # Failed to get blame information of a file.
+ if not modified_frame_info_per_file:
+ logging.warning('suspect\'s change cannot be blamed due to lack of'
+ 'blame information for crashed file %s' %
+ touched_file.new_path)
+ continue
+
+ # It is possible that a changelog doesn't show in the blame of a file,
+ # in this case, treat the changelog as if it didn't change the file.
+ if modified_frame_info_per_file.IsInfinity():
+ continue
+
+ touched_file_to_modified_frame_info[
+ touched_file] = modified_frame_info_per_file
+ modified_frame_info.Update(modified_frame_info_per_file.distance,
+ modified_frame_info_per_file.frame)
return FeatureValue(
name = self.name,
- value = LogLinearlyScaled(float(min_distance), float(self._maximum)),
- reason = ('Minimum distance is %d' % min_distance
- if not math.isinf(min_distance) else
+ value = LogLinearlyScaled(float(modified_frame_info.distance),
+ float(self._maximum)),
+ reason = ('Minimum distance is %d' % int(modified_frame_info.distance)
+ if not math.isinf(modified_frame_info.distance) else
'Minimum distance is infinity'),
- changed_files = self._ChangedFiles(suspect),
- )
+ changed_files = MinDistanceFeature.ChangedFiles(
+ suspect, touched_file_to_modified_frame_info,
+ report.crashed_version))
return FeatureValueGivenReport
- def _ChangedFiles(self, suspect):
- """Get all the changed files causing this feature to blame this suspect.
+ @staticmethod
+ def ChangedFiles(suspect, touched_file_to_modified_frame_info,
+ crashed_version):
+ """Get all the changed files causing this feature to blame this result.
Arg:
suspect (Suspect): the suspect being blamed.
+ touched_file_to_modified_frame_info (dict): Dict mapping file name to
+ ``ModifiedFrameInfo``s.
+ crashed_version (str): Crashed version.
Returns:
List of ``ChangedFile`` objects sorted by frame index. For example:
@@ -96,34 +242,30 @@ class MinDistanceFeature(Feature):
reasons = ['Minimum distance (LOC) 1, frame #5']
)]
"""
- index_to_changed_files = {}
-
- for file_path, analysis_info in suspect.file_to_analysis_info.iteritems():
- file_name = file_path.split('/')[-1]
- frame = analysis_info.min_distance_frame
- if frame is None: # pragma: no cover
- logging.warning('Missing the min_distance_frame for %s'
- % str(analysis_info))
- continue
+ frame_index_to_changed_files = {}
- # It is possible that a changelog doesn't show in the blame of a file,
- # in this case, treat the changelog as if it didn't change the file.
- if math.isinf(analysis_info.min_distance): # pragma: no cover
- logging.warning('min_distance is infinite for %s' % str(analysis_info))
+ for touched_file, modified_frame_info in (
+ touched_file_to_modified_frame_info.iteritems()):
+ file_name = touched_file.new_path.split('/')[-1]
+ if modified_frame_info.frame is None: # pragma: no cover
+ logging.warning('Missing the min_distance_frame for file %s' %
+ file_name)
continue
- index_to_changed_files[frame.index] = ChangedFile(
- name = file_name,
- blame_url = frame.BlameUrl(suspect.changelog.revision),
- reasons = ['Minimum distance (LOC) %d, frame #%d' % (
- analysis_info.min_distance, frame.index)]
- )
+ frame_index_to_changed_files[
+ modified_frame_info.frame.index] = ChangedFile(
+ name=file_name,
+ blame_url=modified_frame_info.frame.BlameUrl(crashed_version),
+ reasons=['Distance from touched lines and crashed lines is %d, in'
+ ' frame #%d' % (modified_frame_info.distance,
+ modified_frame_info.frame.index)])
- if not index_to_changed_files: # pragma: no cover
+ if not frame_index_to_changed_files: # pragma: no cover
logging.warning('Found no changed files for suspect: %s', str(suspect))
+ return []
# Sort changed file by frame index.
- _, changed_files = zip(*sorted(index_to_changed_files.items(),
+ _, changed_files = zip(*sorted(frame_index_to_changed_files.iteritems(),
key=lambda x: x[0]))
return list(changed_files)

Powered by Google App Engine
This is Rietveld 408576698