OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2016 The Chromium Authors. All rights reserved. | 2 # Copyright 2016 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Wrapper around git blame that ignores certain commits. | 6 """Wrapper around git blame that ignores certain commits. |
7 """ | 7 """ |
8 | 8 |
9 from __future__ import print_function | 9 from __future__ import print_function |
10 | 10 |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
142 row.insert(1, line.commit.filename) | 142 row.insert(1, line.commit.filename) |
143 table.append(row) | 143 table.append(row) |
144 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out) | 144 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out) |
145 | 145 |
146 | 146 |
147 def get_parsed_blame(filename, revision='HEAD'): | 147 def get_parsed_blame(filename, revision='HEAD'): |
148 blame = git_common.blame(filename, revision=revision, porcelain=True) | 148 blame = git_common.blame(filename, revision=revision, porcelain=True) |
149 return list(parse_blame(blame)) | 149 return list(parse_blame(blame)) |
150 | 150 |
151 | 151 |
| 152 # Map from (oldrev, newrev) to hunk list (caching the results of git diff, but |
| 153 # only the hunk line numbers, not the actual diff contents). |
| 154 # hunk list contains (old, new) pairs, where old and new are (start, length) |
| 155 # pairs. A hunk list can also be None (if the diff failed). |
| 156 diff_hunks_cache = {} |
| 157 |
| 158 |
| 159 def cache_diff_hunks(oldrev, newrev): |
| 160 def parse_start_length(s): |
| 161 # Chop the '-' or '+'. |
| 162 s = s[1:] |
| 163 # Length is optional (defaults to 1). |
| 164 try: |
| 165 start, length = s.split(',') |
| 166 except ValueError: |
| 167 start = s |
| 168 length = 1 |
| 169 return int(start), int(length) |
| 170 |
| 171 try: |
| 172 return diff_hunks_cache[(oldrev, newrev)] |
| 173 except KeyError: |
| 174 pass |
| 175 |
| 176 # Use -U0 to get the smallest possible hunks. |
| 177 diff = git_common.diff(oldrev, newrev, '-U0') |
| 178 |
| 179 # Get all the hunks. |
| 180 hunks = [] |
| 181 for line in diff.split('\n'): |
| 182 if not line.startswith('@@'): |
| 183 continue |
| 184 ranges = line.split(' ', 3)[1:3] |
| 185 ranges = tuple(parse_start_length(r) for r in ranges) |
| 186 hunks.append(ranges) |
| 187 |
| 188 diff_hunks_cache[(oldrev, newrev)] = hunks |
| 189 return hunks |
| 190 |
| 191 |
| 192 def approx_lineno_across_revs(filename, newfilename, revision, newrevision, |
| 193 lineno): |
| 194 """Computes the approximate movement of a line number between two revisions. |
| 195 |
| 196 Consider line |lineno| in |filename| at |revision|. This function computes the |
| 197 line number of that line in |newfilename| at |newrevision|. This is |
| 198 necessarily approximate. |
| 199 |
| 200 Args: |
| 201 filename: The file (within the repo) at |revision|. |
| 202 newfilename: The name of the same file at |newrevision|. |
| 203 revision: A git revision. |
| 204 newrevision: Another git revision. Note: Can be ahead or behind |revision|. |
| 205 lineno: Line number within |filename| at |revision|. |
| 206 |
| 207 Returns: |
| 208 Line number within |newfilename| at |newrevision|. |
| 209 """ |
| 210 # This doesn't work that well if there are a lot of line changes within the |
| 211 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion). |
| 212 # A fuzzy heuristic that takes the text of the new line and tries to find a |
| 213 # deleted line within the hunk that mostly matches the new line could help. |
| 214 |
| 215 # Use the <revision>:<filename> syntax to diff between two blobs. This is the |
| 216 # only way to diff a file that has been renamed. |
| 217 old = '%s:%s' % (revision, filename) |
| 218 new = '%s:%s' % (newrevision, newfilename) |
| 219 hunks = cache_diff_hunks(old, new) |
| 220 |
| 221 cumulative_offset = 0 |
| 222 |
| 223 # Find the hunk containing lineno (if any). |
| 224 for (oldstart, oldlength), (newstart, newlength) in hunks: |
| 225 cumulative_offset += newlength - oldlength |
| 226 |
| 227 if lineno >= oldstart + oldlength: |
| 228 # Not there yet. |
| 229 continue |
| 230 |
| 231 if lineno < oldstart: |
| 232 # Gone too far. |
| 233 break |
| 234 |
| 235 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at |
| 236 # newrevision. |
| 237 |
| 238 # If newlength == 0, newstart will be the line before the deleted hunk. |
| 239 # Since the line must have been deleted, just return that as the nearest |
| 240 # line in the new file. Caution: newstart can be 0 in this case. |
| 241 if newlength == 0: |
| 242 return max(1, newstart) |
| 243 |
| 244 newend = newstart + newlength - 1 |
| 245 |
| 246 # Move lineno based on the amount the entire hunk shifted. |
| 247 lineno = lineno + newstart - oldstart |
| 248 # Constrain the output within the range [newstart, newend]. |
| 249 return min(newend, max(newstart, lineno)) |
| 250 |
| 251 # Wasn't in a hunk. Figure out the line motion based on the difference in |
| 252 # length between the hunks seen so far. |
| 253 return lineno + cumulative_offset |
| 254 |
| 255 |
152 def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout, | 256 def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout, |
153 err=sys.stderr): | 257 err=sys.stderr): |
154 # Map from commit to parsed blame from that commit. | 258 # Map from commit to parsed blame from that commit. |
155 blame_from = {} | 259 blame_from = {} |
156 | 260 |
157 def cache_blame_from(filename, commithash): | 261 def cache_blame_from(filename, commithash): |
158 try: | 262 try: |
159 return blame_from[commithash] | 263 return blame_from[commithash] |
160 except KeyError: | 264 except KeyError: |
161 parsed = get_parsed_blame(filename, commithash) | 265 parsed = get_parsed_blame(filename, commithash) |
(...skipping 20 matching lines...) Expand all Loading... |
182 break | 286 break |
183 | 287 |
184 previouscommit, previousfilename = line.commit.previous.split(' ', 1) | 288 previouscommit, previousfilename = line.commit.previous.split(' ', 1) |
185 parent_blame = cache_blame_from(previousfilename, previouscommit) | 289 parent_blame = cache_blame_from(previousfilename, previouscommit) |
186 | 290 |
187 if len(parent_blame) == 0: | 291 if len(parent_blame) == 0: |
188 # The previous version of this file was empty, therefore, you can't | 292 # The previous version of this file was empty, therefore, you can't |
189 # ignore this commit. | 293 # ignore this commit. |
190 break | 294 break |
191 | 295 |
192 # line.lineno_then is the line number in question at line.commit. | 296 # line.lineno_then is the line number in question at line.commit. We need |
193 # TODO(mgiuca): This will be incorrect if line.commit added or removed | 297 # to translate that line number so that it refers to the position of the |
194 # lines. Translate that line number so that it refers to the position of | 298 # same line on previouscommit. |
195 # the same line on previouscommit. | 299 lineno_previous = approx_lineno_across_revs( |
196 lineno_previous = line.lineno_then | 300 line.commit.filename, previousfilename, line.commit.commithash, |
| 301 previouscommit, line.lineno_then) |
197 logging.debug('ignore commit %s on line p%d/t%d/n%d', | 302 logging.debug('ignore commit %s on line p%d/t%d/n%d', |
198 line.commit.commithash, lineno_previous, line.lineno_then, | 303 line.commit.commithash, lineno_previous, line.lineno_then, |
199 line.lineno_now) | 304 line.lineno_now) |
200 | 305 |
201 # Get the line at lineno_previous in the parent commit. | 306 # Get the line at lineno_previous in the parent commit. |
202 assert lineno_previous > 0 | 307 assert 1 <= lineno_previous <= len(parent_blame) |
203 try: | 308 newline = parent_blame[lineno_previous - 1] |
204 newline = parent_blame[lineno_previous - 1] | |
205 except IndexError: | |
206 # lineno_previous is a guess, so it may be past the end of the file. | |
207 # Just grab the last line in the file. | |
208 newline = parent_blame[-1] | |
209 | 309 |
210 # Replace the commit and lineno_then, but not the lineno_now or context. | 310 # Replace the commit and lineno_then, but not the lineno_now or context. |
211 logging.debug(' replacing with %r', newline) | 311 logging.debug(' replacing with %r', newline) |
212 line = BlameLine(newline.commit, line.context, lineno_previous, | 312 line = BlameLine(newline.commit, line.context, lineno_previous, |
213 line.lineno_now, True) | 313 line.lineno_now, True) |
214 | 314 |
215 # If any line has a different filename to the file's current name, turn on | 315 # If any line has a different filename to the file's current name, turn on |
216 # filename display for the entire blame output. | 316 # filename display for the entire blame output. |
217 if line.commit.filename != filename: | 317 if line.commit.filename != filename: |
218 show_filenames = True | 318 show_filenames = True |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 # Custom error message (the message from git-rev-parse is inappropriate). | 357 # Custom error message (the message from git-rev-parse is inappropriate). |
258 stderr.write('fatal: unknown revision \'%s\'.\n' % c) | 358 stderr.write('fatal: unknown revision \'%s\'.\n' % c) |
259 return e.returncode | 359 return e.returncode |
260 | 360 |
261 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr) | 361 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr) |
262 | 362 |
263 | 363 |
264 if __name__ == '__main__': # pragma: no cover | 364 if __name__ == '__main__': # pragma: no cover |
265 with git_common.less() as less_input: | 365 with git_common.less() as less_input: |
266 sys.exit(main(sys.argv[1:], stdout=less_input)) | 366 sys.exit(main(sys.argv[1:], stdout=less_input)) |
OLD | NEW |