Index: git_hyper_blame.py |
diff --git a/git_hyper_blame.py b/git_hyper_blame.py |
index 17424511ab178c62a7d22f8e3f77f343053b46bc..5a7daa0cf56b2db00fb5c98b391f7572fa62ed0c 100755 |
--- a/git_hyper_blame.py |
+++ b/git_hyper_blame.py |
@@ -149,6 +149,110 @@ def get_parsed_blame(filename, revision='HEAD'): |
return list(parse_blame(blame)) |
+# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but |
+# only the hunk line numbers, not the actual diff contents). |
+# hunk list contains (old, new) pairs, where old and new are (start, length) |
+# pairs. A hunk list can also be None (if the diff failed). |
+diff_hunks_cache = {} |
+ |
+ |
+def cache_diff_hunks(oldrev, newrev): |
+ def parse_start_length(s): |
+ # Chop the '-' or '+'. |
+ s = s[1:] |
+ # Length is optional (defaults to 1). |
+ try: |
+ start, length = s.split(',') |
+ except ValueError: |
+ start = s |
+ length = 1 |
+ return int(start), int(length) |
+ |
+ try: |
+ return diff_hunks_cache[(oldrev, newrev)] |
+ except KeyError: |
+ pass |
+ |
+ # Use -U0 to get the smallest possible hunks. |
+ diff = git_common.diff(oldrev, newrev, '-U0') |
+ |
+ # Get all the hunks. |
+ hunks = [] |
+ for line in diff.split('\n'): |
+ if not line.startswith('@@'): |
+ continue |
+ ranges = line.split(' ', 3)[1:3] |
+ ranges = tuple(parse_start_length(r) for r in ranges) |
+ hunks.append(ranges) |
+ |
+ diff_hunks_cache[(oldrev, newrev)] = hunks |
+ return hunks |
+ |
+ |
+def approx_lineno_across_revs(filename, newfilename, revision, newrevision, |
+ lineno): |
+ """Computes the approximate movement of a line number between two revisions. |
+ |
+ Consider line |lineno| in |filename| at |revision|. This function computes the |
+ line number of that line in |newfilename| at |newrevision|. This is |
+ necessarily approximate. |
+ |
+ Args: |
+ filename: The file (within the repo) at |revision|. |
+ newfilename: The name of the same file at |newrevision|. |
+ revision: A git revision. |
+ newrevision: Another git revision. Note: Can be ahead or behind |revision|. |
+ lineno: Line number within |filename| at |revision|. |
+ |
+ Returns: |
+ Line number within |newfilename| at |newrevision|. |
+ """ |
+ # This doesn't work that well if there are a lot of line changes within the |
+ # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion). |
+ # A fuzzy heuristic that takes the text of the new line and tries to find a |
+ # deleted line within the hunk that mostly matches the new line could help. |
+ |
+ # Use the <revision>:<filename> syntax to diff between two blobs. This is the |
+ # only way to diff a file that has been renamed. |
+ old = '%s:%s' % (revision, filename) |
+ new = '%s:%s' % (newrevision, newfilename) |
+ hunks = cache_diff_hunks(old, new) |
+ |
+ cumulative_offset = 0 |
+ |
+ # Find the hunk containing lineno (if any). |
+ for (oldstart, oldlength), (newstart, newlength) in hunks: |
+ cumulative_offset += newlength - oldlength |
+ |
+ if lineno >= oldstart + oldlength: |
+ # Not there yet. |
+ continue |
+ |
+ if lineno < oldstart: |
+ # Gone too far. |
+ break |
+ |
+ # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at |
+ # newrevision. |
+ |
+ # If newlength == 0, newstart will be the line before the deleted hunk. |
+ # Since the line must have been deleted, just return that as the nearest |
+ # line in the new file. Caution: newstart can be 0 in this case. |
+ if newlength == 0: |
+ return max(1, newstart) |
+ |
+ newend = newstart + newlength - 1 |
+ |
+ # Move lineno based on the amount the entire hunk shifted. |
+ lineno = lineno + newstart - oldstart |
+ # Constrain the output within the range [newstart, newend]. |
+ return min(newend, max(newstart, lineno)) |
+ |
+ # Wasn't in a hunk. Figure out the line motion based on the difference in |
+ # length between the hunks seen so far. |
+ return lineno + cumulative_offset |
+ |
+ |
def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout, |
err=sys.stderr): |
# Map from commit to parsed blame from that commit. |
@@ -189,23 +293,19 @@ def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout, |
# ignore this commit. |
break |
- # line.lineno_then is the line number in question at line.commit. |
- # TODO(mgiuca): This will be incorrect if line.commit added or removed |
- # lines. Translate that line number so that it refers to the position of |
- # the same line on previouscommit. |
- lineno_previous = line.lineno_then |
+ # line.lineno_then is the line number in question at line.commit. We need |
+ # to translate that line number so that it refers to the position of the |
+ # same line on previouscommit. |
+ lineno_previous = approx_lineno_across_revs( |
+ line.commit.filename, previousfilename, line.commit.commithash, |
+ previouscommit, line.lineno_then) |
logging.debug('ignore commit %s on line p%d/t%d/n%d', |
line.commit.commithash, lineno_previous, line.lineno_then, |
line.lineno_now) |
# Get the line at lineno_previous in the parent commit. |
- assert lineno_previous > 0 |
- try: |
- newline = parent_blame[lineno_previous - 1] |
- except IndexError: |
- # lineno_previous is a guess, so it may be past the end of the file. |
- # Just grab the last line in the file. |
- newline = parent_blame[-1] |
+ assert 1 <= lineno_previous <= len(parent_blame) |
+ newline = parent_blame[lineno_previous - 1] |
# Replace the commit and lineno_then, but not the lineno_now or context. |
logging.debug(' replacing with %r', newline) |