| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 ## | 3 ## |
| 4 ## Use of this source code is governed by a BSD-style license | 4 ## Use of this source code is governed by a BSD-style license |
| 5 ## that can be found in the LICENSE file in the root of the source | 5 ## that can be found in the LICENSE file in the root of the source |
| 6 ## tree. An additional intellectual property rights grant can be found | 6 ## tree. An additional intellectual property rights grant can be found |
| 7 ## in the file PATENTS. All contributing project authors may | 7 ## in the file PATENTS. All contributing project authors may |
| 8 ## be found in the AUTHORS file in the root of the source tree. | 8 ## be found in the AUTHORS file in the root of the source tree. |
| 9 ## | 9 ## |
| 10 """Calculates the "intersection" of two unified diffs. | 10 """Calculates the "intersection" of two unified diffs. |
| 11 | 11 |
| 12 Given two diffs, A and B, it finds all hunks in B that had non-context lines | 12 Given two diffs, A and B, it finds all hunks in B that had non-context lines |
| 13 in A and prints them to stdout. This is useful to determine the hunks in B that | 13 in A and prints them to stdout. This is useful to determine the hunks in B that |
| 14 are relevant to A. The resulting file can be applied with patch(1) on top of A. | 14 are relevant to A. The resulting file can be applied with patch(1) on top of A. |
| 15 """ | 15 """ |
| 16 | 16 |
| 17 __author__ = "jkoleszar@google.com" | 17 __author__ = "jkoleszar@google.com" |
| 18 | 18 |
| 19 import re | |
| 20 import sys | 19 import sys |
| 21 | 20 |
| 22 | 21 import diff |
| 23 class DiffLines(object): | |
| 24 """A container for one half of a diff.""" | |
| 25 | |
| 26 def __init__(self, filename, offset, length): | |
| 27 self.filename = filename | |
| 28 self.offset = offset | |
| 29 self.length = length | |
| 30 self.lines = [] | |
| 31 self.delta_line_nums = [] | |
| 32 | |
| 33 def Append(self, line): | |
| 34 l = len(self.lines) | |
| 35 if line[0] != " ": | |
| 36 self.delta_line_nums.append(self.offset + l) | |
| 37 self.lines.append(line[1:]) | |
| 38 assert l+1 <= self.length | |
| 39 | |
| 40 def Complete(self): | |
| 41 return len(self.lines) == self.length | |
| 42 | |
| 43 def __contains__(self, item): | |
| 44 return item >= self.offset and item <= self.offset + self.length - 1 | |
| 45 | |
| 46 | |
| 47 class DiffHunk(object): | |
| 48 """A container for one diff hunk, consisting of two DiffLines.""" | |
| 49 | |
| 50 def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): | |
| 51 self.header = header | |
| 52 self.left = DiffLines(file_a, start_a, len_a) | |
| 53 self.right = DiffLines(file_b, start_b, len_b) | |
| 54 self.lines = [] | |
| 55 | |
| 56 def Append(self, line): | |
| 57 """Adds a line to the DiffHunk and its DiffLines children.""" | |
| 58 if line[0] == "-": | |
| 59 self.left.Append(line) | |
| 60 elif line[0] == "+": | |
| 61 self.right.Append(line) | |
| 62 elif line[0] == " ": | |
| 63 self.left.Append(line) | |
| 64 self.right.Append(line) | |
| 65 else: | |
| 66 assert False, ("Unrecognized character at start of diff line " | |
| 67 "%r" % line[0]) | |
| 68 self.lines.append(line) | |
| 69 | |
| 70 def Complete(self): | |
| 71 return self.left.Complete() and self.right.Complete() | |
| 72 | |
| 73 def __repr__(self): | |
| 74 return "DiffHunk(%s, %s, len %d)" % ( | |
| 75 self.left.filename, self.right.filename, | |
| 76 max(self.left.length, self.right.length)) | |
| 77 | |
| 78 | |
| 79 def ParseDiffHunks(stream): | |
| 80 """Walk a file-like object, yielding DiffHunks as they're parsed.""" | |
| 81 | |
| 82 file_regex = re.compile(r"(\+\+\+|---) (\S+)") | |
| 83 range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") | |
| 84 hunk = None | |
| 85 while True: | |
| 86 line = stream.readline() | |
| 87 if not line: | |
| 88 break | |
| 89 | |
| 90 if hunk is None: | |
| 91 # Parse file names | |
| 92 diff_file = file_regex.match(line) | |
| 93 if diff_file: | |
| 94 if line.startswith("---"): | |
| 95 a_line = line | |
| 96 a = diff_file.group(2) | |
| 97 continue | |
| 98 if line.startswith("+++"): | |
| 99 b_line = line | |
| 100 b = diff_file.group(2) | |
| 101 continue | |
| 102 | |
| 103 # Parse offset/lengths | |
| 104 diffrange = range_regex.match(line) | |
| 105 if diffrange: | |
| 106 if diffrange.group(2): | |
| 107 start_a = int(diffrange.group(1)) | |
| 108 len_a = int(diffrange.group(3)) | |
| 109 else: | |
| 110 start_a = 1 | |
| 111 len_a = int(diffrange.group(1)) | |
| 112 | |
| 113 if diffrange.group(5): | |
| 114 start_b = int(diffrange.group(4)) | |
| 115 len_b = int(diffrange.group(6)) | |
| 116 else: | |
| 117 start_b = 1 | |
| 118 len_b = int(diffrange.group(4)) | |
| 119 | |
| 120 header = [a_line, b_line, line] | |
| 121 hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) | |
| 122 else: | |
| 123 # Add the current line to the hunk | |
| 124 hunk.Append(line) | |
| 125 | |
| 126 # See if the whole hunk has been parsed. If so, yield it and prepare | |
| 127 # for the next hunk. | |
| 128 if hunk.Complete(): | |
| 129 yield hunk | |
| 130 hunk = None | |
| 131 | |
| 132 # Partial hunks are a parse error | |
| 133 assert hunk is None | |
| 134 | 22 |
| 135 | 23 |
| 136 def FormatDiffHunks(hunks): | 24 def FormatDiffHunks(hunks): |
| 137 """Re-serialize a list of DiffHunks.""" | 25 """Re-serialize a list of DiffHunks.""" |
| 138 r = [] | 26 r = [] |
| 139 last_header = None | 27 last_header = None |
| 140 for hunk in hunks: | 28 for hunk in hunks: |
| 141 this_header = hunk.header[0:2] | 29 this_header = hunk.header[0:2] |
| 142 if last_header != this_header: | 30 if last_header != this_header: |
| 143 r.extend(hunk.header) | 31 r.extend(hunk.header) |
| (...skipping 11 matching lines...) Expand all Loading... |
| 155 rhs_file = rhs_hunk.right.filename.split("/")[1:] | 43 rhs_file = rhs_hunk.right.filename.split("/")[1:] |
| 156 | 44 |
| 157 for lhs_hunk in lhs_hunks: | 45 for lhs_hunk in lhs_hunks: |
| 158 lhs_file = lhs_hunk.left.filename.split("/")[1:] | 46 lhs_file = lhs_hunk.left.filename.split("/")[1:] |
| 159 if lhs_file != rhs_file: | 47 if lhs_file != rhs_file: |
| 160 continue | 48 continue |
| 161 yield (rhs_hunk, lhs_hunk) | 49 yield (rhs_hunk, lhs_hunk) |
| 162 | 50 |
| 163 | 51 |
| 164 def main(): | 52 def main(): |
| 165 old_hunks = [x for x in ParseDiffHunks(open(sys.argv[1], "r"))] | 53 old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))] |
| 166 new_hunks = [x for x in ParseDiffHunks(open(sys.argv[2], "r"))] | 54 new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))] |
| 167 out_hunks = [] | 55 out_hunks = [] |
| 168 | 56 |
| 169 # Join the right hand side of the older diff with the left hand side of the | 57 # Join the right hand side of the older diff with the left hand side of the |
| 170 # newer diff. | 58 # newer diff. |
| 171 for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): | 59 for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): |
| 172 if new_hunk in out_hunks: | 60 if new_hunk in out_hunks: |
| 173 continue | 61 continue |
| 174 old_lines = old_hunk.right | 62 old_lines = old_hunk.right |
| 175 new_lines = new_hunk.left | 63 new_lines = new_hunk.left |
| 176 | 64 |
| 177 # Determine if this hunk overlaps any non-context line from the other | 65 # Determine if this hunk overlaps any non-context line from the other |
| 178 for i in old_lines.delta_line_nums: | 66 for i in old_lines.delta_line_nums: |
| 179 if i in new_lines: | 67 if i in new_lines: |
| 180 out_hunks.append(new_hunk) | 68 out_hunks.append(new_hunk) |
| 181 break | 69 break |
| 182 | 70 |
| 183 if out_hunks: | 71 if out_hunks: |
| 184 print FormatDiffHunks(out_hunks) | 72 print FormatDiffHunks(out_hunks) |
| 185 sys.exit(1) | 73 sys.exit(1) |
| 186 | 74 |
| 187 if __name__ == "__main__": | 75 if __name__ == "__main__": |
| 188 main() | 76 main() |
| OLD | NEW |