OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
3 ## | 3 ## |
4 ## Use of this source code is governed by a BSD-style license | 4 ## Use of this source code is governed by a BSD-style license |
5 ## that can be found in the LICENSE file in the root of the source | 5 ## that can be found in the LICENSE file in the root of the source |
6 ## tree. An additional intellectual property rights grant can be found | 6 ## tree. An additional intellectual property rights grant can be found |
7 ## in the file PATENTS. All contributing project authors may | 7 ## in the file PATENTS. All contributing project authors may |
8 ## be found in the AUTHORS file in the root of the source tree. | 8 ## be found in the AUTHORS file in the root of the source tree. |
9 ## | 9 ## |
10 """Calculates the "intersection" of two unified diffs. | 10 """Calculates the "intersection" of two unified diffs. |
11 | 11 |
12 Given two diffs, A and B, it finds all hunks in B that had non-context lines | 12 Given two diffs, A and B, it finds all hunks in B that had non-context lines |
13 in A and prints them to stdout. This is useful to determine the hunks in B that | 13 in A and prints them to stdout. This is useful to determine the hunks in B that |
14 are relevant to A. The resulting file can be applied with patch(1) on top of A. | 14 are relevant to A. The resulting file can be applied with patch(1) on top of A. |
15 """ | 15 """ |
16 | 16 |
17 __author__ = "jkoleszar@google.com" | 17 __author__ = "jkoleszar@google.com" |
18 | 18 |
19 import re | |
20 import sys | 19 import sys |
21 | 20 |
22 | 21 import diff |
23 class DiffLines(object): | |
24 """A container for one half of a diff.""" | |
25 | |
26 def __init__(self, filename, offset, length): | |
27 self.filename = filename | |
28 self.offset = offset | |
29 self.length = length | |
30 self.lines = [] | |
31 self.delta_line_nums = [] | |
32 | |
33 def Append(self, line): | |
34 l = len(self.lines) | |
35 if line[0] != " ": | |
36 self.delta_line_nums.append(self.offset + l) | |
37 self.lines.append(line[1:]) | |
38 assert l+1 <= self.length | |
39 | |
40 def Complete(self): | |
41 return len(self.lines) == self.length | |
42 | |
43 def __contains__(self, item): | |
44 return item >= self.offset and item <= self.offset + self.length - 1 | |
45 | |
46 | |
47 class DiffHunk(object): | |
48 """A container for one diff hunk, consisting of two DiffLines.""" | |
49 | |
50 def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): | |
51 self.header = header | |
52 self.left = DiffLines(file_a, start_a, len_a) | |
53 self.right = DiffLines(file_b, start_b, len_b) | |
54 self.lines = [] | |
55 | |
56 def Append(self, line): | |
57 """Adds a line to the DiffHunk and its DiffLines children.""" | |
58 if line[0] == "-": | |
59 self.left.Append(line) | |
60 elif line[0] == "+": | |
61 self.right.Append(line) | |
62 elif line[0] == " ": | |
63 self.left.Append(line) | |
64 self.right.Append(line) | |
65 else: | |
66 assert False, ("Unrecognized character at start of diff line " | |
67 "%r" % line[0]) | |
68 self.lines.append(line) | |
69 | |
70 def Complete(self): | |
71 return self.left.Complete() and self.right.Complete() | |
72 | |
73 def __repr__(self): | |
74 return "DiffHunk(%s, %s, len %d)" % ( | |
75 self.left.filename, self.right.filename, | |
76 max(self.left.length, self.right.length)) | |
77 | |
78 | |
79 def ParseDiffHunks(stream): | |
80 """Walk a file-like object, yielding DiffHunks as they're parsed.""" | |
81 | |
82 file_regex = re.compile(r"(\+\+\+|---) (\S+)") | |
83 range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") | |
84 hunk = None | |
85 while True: | |
86 line = stream.readline() | |
87 if not line: | |
88 break | |
89 | |
90 if hunk is None: | |
91 # Parse file names | |
92 diff_file = file_regex.match(line) | |
93 if diff_file: | |
94 if line.startswith("---"): | |
95 a_line = line | |
96 a = diff_file.group(2) | |
97 continue | |
98 if line.startswith("+++"): | |
99 b_line = line | |
100 b = diff_file.group(2) | |
101 continue | |
102 | |
103 # Parse offset/lengths | |
104 diffrange = range_regex.match(line) | |
105 if diffrange: | |
106 if diffrange.group(2): | |
107 start_a = int(diffrange.group(1)) | |
108 len_a = int(diffrange.group(3)) | |
109 else: | |
110 start_a = 1 | |
111 len_a = int(diffrange.group(1)) | |
112 | |
113 if diffrange.group(5): | |
114 start_b = int(diffrange.group(4)) | |
115 len_b = int(diffrange.group(6)) | |
116 else: | |
117 start_b = 1 | |
118 len_b = int(diffrange.group(4)) | |
119 | |
120 header = [a_line, b_line, line] | |
121 hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) | |
122 else: | |
123 # Add the current line to the hunk | |
124 hunk.Append(line) | |
125 | |
126 # See if the whole hunk has been parsed. If so, yield it and prepare | |
127 # for the next hunk. | |
128 if hunk.Complete(): | |
129 yield hunk | |
130 hunk = None | |
131 | |
132 # Partial hunks are a parse error | |
133 assert hunk is None | |
134 | 22 |
135 | 23 |
136 def FormatDiffHunks(hunks): | 24 def FormatDiffHunks(hunks): |
137 """Re-serialize a list of DiffHunks.""" | 25 """Re-serialize a list of DiffHunks.""" |
138 r = [] | 26 r = [] |
139 last_header = None | 27 last_header = None |
140 for hunk in hunks: | 28 for hunk in hunks: |
141 this_header = hunk.header[0:2] | 29 this_header = hunk.header[0:2] |
142 if last_header != this_header: | 30 if last_header != this_header: |
143 r.extend(hunk.header) | 31 r.extend(hunk.header) |
(...skipping 11 matching lines...) Expand all Loading... |
155 rhs_file = rhs_hunk.right.filename.split("/")[1:] | 43 rhs_file = rhs_hunk.right.filename.split("/")[1:] |
156 | 44 |
157 for lhs_hunk in lhs_hunks: | 45 for lhs_hunk in lhs_hunks: |
158 lhs_file = lhs_hunk.left.filename.split("/")[1:] | 46 lhs_file = lhs_hunk.left.filename.split("/")[1:] |
159 if lhs_file != rhs_file: | 47 if lhs_file != rhs_file: |
160 continue | 48 continue |
161 yield (rhs_hunk, lhs_hunk) | 49 yield (rhs_hunk, lhs_hunk) |
162 | 50 |
163 | 51 |
164 def main(): | 52 def main(): |
165 old_hunks = [x for x in ParseDiffHunks(open(sys.argv[1], "r"))] | 53 old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))] |
166 new_hunks = [x for x in ParseDiffHunks(open(sys.argv[2], "r"))] | 54 new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))] |
167 out_hunks = [] | 55 out_hunks = [] |
168 | 56 |
169 # Join the right hand side of the older diff with the left hand side of the | 57 # Join the right hand side of the older diff with the left hand side of the |
170 # newer diff. | 58 # newer diff. |
171 for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): | 59 for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): |
172 if new_hunk in out_hunks: | 60 if new_hunk in out_hunks: |
173 continue | 61 continue |
174 old_lines = old_hunk.right | 62 old_lines = old_hunk.right |
175 new_lines = new_hunk.left | 63 new_lines = new_hunk.left |
176 | 64 |
177 # Determine if this hunk overlaps any non-context line from the other | 65 # Determine if this hunk overlaps any non-context line from the other |
178 for i in old_lines.delta_line_nums: | 66 for i in old_lines.delta_line_nums: |
179 if i in new_lines: | 67 if i in new_lines: |
180 out_hunks.append(new_hunk) | 68 out_hunks.append(new_hunk) |
181 break | 69 break |
182 | 70 |
183 if out_hunks: | 71 if out_hunks: |
184 print FormatDiffHunks(out_hunks) | 72 print FormatDiffHunks(out_hunks) |
185 sys.exit(1) | 73 sys.exit(1) |
186 | 74 |
187 if __name__ == "__main__": | 75 if __name__ == "__main__": |
188 main() | 76 main() |
OLD | NEW |