OLD | NEW |
| (Empty) |
1 # coding=utf8 | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 """Utility functions to handle patches.""" | |
6 | |
7 import posixpath | |
8 import os | |
9 import re | |
10 | |
11 | |
12 class UnsupportedPatchFormat(Exception): | |
13 def __init__(self, filename, status): | |
14 super(UnsupportedPatchFormat, self).__init__(filename, status) | |
15 self.filename = filename | |
16 self.status = status | |
17 | |
18 def __str__(self): | |
19 out = 'Can\'t process patch for file %s.' % self.filename | |
20 if self.status: | |
21 out += '\n%s' % self.status | |
22 return out | |
23 | |
24 | |
25 class FilePatchBase(object): | |
26 """Defines a single file being modified. | |
27 | |
28 '/' is always used instead of os.sep for consistency. | |
29 """ | |
30 is_delete = False | |
31 is_binary = False | |
32 is_new = False | |
33 | |
34 def __init__(self, filename): | |
35 assert self.__class__ is not FilePatchBase | |
36 self.filename = self._process_filename(filename) | |
37 # Set when the file is copied or moved. | |
38 self.source_filename = None | |
39 | |
40 @property | |
41 def filename_utf8(self): | |
42 return self.filename.encode('utf-8') | |
43 | |
44 @property | |
45 def source_filename_utf8(self): | |
46 if self.source_filename is not None: | |
47 return self.source_filename.encode('utf-8') | |
48 | |
49 @staticmethod | |
50 def _process_filename(filename): | |
51 filename = filename.replace('\\', '/') | |
52 # Blacklist a few characters for simplicity. | |
53 for i in ('%', '$', '..', '\'', '"'): | |
54 if i in filename: | |
55 raise UnsupportedPatchFormat( | |
56 filename, 'Can\'t use \'%s\' in filename.' % i) | |
57 for i in ('/', 'CON', 'COM'): | |
58 if filename.startswith(i): | |
59 raise UnsupportedPatchFormat( | |
60 filename, 'Filename can\'t start with \'%s\'.' % i) | |
61 return filename | |
62 | |
63 def set_relpath(self, relpath): | |
64 if not relpath: | |
65 return | |
66 relpath = relpath.replace('\\', '/') | |
67 if relpath[0] == '/': | |
68 self._fail('Relative path starts with %s' % relpath[0]) | |
69 self.filename = self._process_filename( | |
70 posixpath.join(relpath, self.filename)) | |
71 if self.source_filename: | |
72 self.source_filename = self._process_filename( | |
73 posixpath.join(relpath, self.source_filename)) | |
74 | |
75 def _fail(self, msg): | |
76 """Shortcut function to raise UnsupportedPatchFormat.""" | |
77 raise UnsupportedPatchFormat(self.filename, msg) | |
78 | |
79 def __str__(self): | |
80 # Use a status-like board. | |
81 out = '' | |
82 if self.is_binary: | |
83 out += 'B' | |
84 else: | |
85 out += ' ' | |
86 if self.is_delete: | |
87 out += 'D' | |
88 else: | |
89 out += ' ' | |
90 if self.is_new: | |
91 out += 'N' | |
92 else: | |
93 out += ' ' | |
94 if self.source_filename: | |
95 out += 'R' | |
96 else: | |
97 out += ' ' | |
98 out += ' ' | |
99 if self.source_filename: | |
100 out += '%s->' % self.source_filename_utf8 | |
101 return out + self.filename_utf8 | |
102 | |
103 def dump(self): | |
104 """Dumps itself in a verbose way to help diagnosing.""" | |
105 return str(self) | |
106 | |
107 | |
108 class FilePatchDelete(FilePatchBase): | |
109 """Deletes a file.""" | |
110 is_delete = True | |
111 | |
112 def __init__(self, filename, is_binary): | |
113 super(FilePatchDelete, self).__init__(filename) | |
114 self.is_binary = is_binary | |
115 | |
116 | |
117 class FilePatchBinary(FilePatchBase): | |
118 """Content of a new binary file.""" | |
119 is_binary = True | |
120 | |
121 def __init__(self, filename, data, svn_properties, is_new): | |
122 super(FilePatchBinary, self).__init__(filename) | |
123 self.data = data | |
124 self.svn_properties = svn_properties or [] | |
125 self.is_new = is_new | |
126 | |
127 def get(self): | |
128 return self.data | |
129 | |
130 def __str__(self): | |
131 return str(super(FilePatchBinary, self)) + ' %d bytes' % len(self.data) | |
132 | |
133 | |
134 class Hunk(object): | |
135 """Parsed hunk data container.""" | |
136 | |
137 def __init__(self, start_src, lines_src, start_dst, lines_dst): | |
138 self.start_src = start_src | |
139 self.lines_src = lines_src | |
140 self.start_dst = start_dst | |
141 self.lines_dst = lines_dst | |
142 self.variation = self.lines_dst - self.lines_src | |
143 self.text = [] | |
144 | |
145 def __repr__(self): | |
146 return '%s<(%d, %d) to (%d, %d)>' % ( | |
147 self.__class__.__name__, | |
148 self.start_src, self.lines_src, self.start_dst, self.lines_dst) | |
149 | |
150 | |
151 class FilePatchDiff(FilePatchBase): | |
152 """Patch for a single file.""" | |
153 | |
154 def __init__(self, filename, diff, svn_properties): | |
155 super(FilePatchDiff, self).__init__(filename) | |
156 if not diff: | |
157 self._fail('File doesn\'t have a diff.') | |
158 self.diff_header, self.diff_hunks = self._split_header(diff) | |
159 self.svn_properties = svn_properties or [] | |
160 self.is_git_diff = self._is_git_diff_header(self.diff_header) | |
161 self.patchlevel = 0 | |
162 if self.is_git_diff: | |
163 self._verify_git_header() | |
164 else: | |
165 self._verify_svn_header() | |
166 self.hunks = self._split_hunks() | |
167 if self.source_filename and not self.is_new: | |
168 self._fail('If source_filename is set, is_new must be also be set') | |
169 | |
170 def get(self, for_git): | |
171 if for_git or not self.source_filename: | |
172 return self.diff_header + self.diff_hunks | |
173 else: | |
174 # patch is stupid. It patches the source_filename instead so get rid of | |
175 # any source_filename reference if needed. | |
176 return ( | |
177 self.diff_header.replace( | |
178 self.source_filename_utf8, self.filename_utf8) + | |
179 self.diff_hunks) | |
180 | |
181 def set_relpath(self, relpath): | |
182 old_filename = self.filename_utf8 | |
183 old_source_filename = self.source_filename_utf8 or self.filename_utf8 | |
184 super(FilePatchDiff, self).set_relpath(relpath) | |
185 # Update the header too. | |
186 filename = self.filename_utf8 | |
187 source_filename = self.source_filename_utf8 or self.filename_utf8 | |
188 lines = self.diff_header.splitlines(True) | |
189 for i, line in enumerate(lines): | |
190 if line.startswith('diff --git'): | |
191 lines[i] = line.replace( | |
192 'a/' + old_source_filename, source_filename).replace( | |
193 'b/' + old_filename, filename) | |
194 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): | |
195 lines[i] = line.replace(old_source_filename, source_filename) | |
196 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): | |
197 lines[i] = line.replace(old_filename, filename) | |
198 self.diff_header = ''.join(lines) | |
199 | |
200 def _split_header(self, diff): | |
201 """Splits a diff in two: the header and the hunks.""" | |
202 header = [] | |
203 hunks = diff.splitlines(True) | |
204 while hunks: | |
205 header.append(hunks.pop(0)) | |
206 if header[-1].startswith('--- '): | |
207 break | |
208 else: | |
209 # Some diff may not have a ---/+++ set like a git rename with no change or | |
210 # a svn diff with only property change. | |
211 pass | |
212 | |
213 if hunks: | |
214 if not hunks[0].startswith('+++ '): | |
215 self._fail('Inconsistent header') | |
216 header.append(hunks.pop(0)) | |
217 if hunks: | |
218 if not hunks[0].startswith('@@ '): | |
219 self._fail('Inconsistent hunk header') | |
220 | |
221 # Mangle any \\ in the header to /. | |
222 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') | |
223 basename = os.path.basename(self.filename_utf8) | |
224 for i in xrange(len(header)): | |
225 if (header[i].split(' ', 1)[0] in header_lines or | |
226 header[i].endswith(basename)): | |
227 header[i] = header[i].replace('\\', '/') | |
228 return ''.join(header), ''.join(hunks) | |
229 | |
230 @staticmethod | |
231 def _is_git_diff_header(diff_header): | |
232 """Returns True if the diff for a single files was generated with git.""" | |
233 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff | |
234 # Rename partial change: | |
235 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff | |
236 # Rename no change: | |
237 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff | |
238 return any(l.startswith('diff --git') for l in diff_header.splitlines()) | |
239 | |
240 def _split_hunks(self): | |
241 """Splits the hunks and does verification.""" | |
242 hunks = [] | |
243 for line in self.diff_hunks.splitlines(True): | |
244 if line.startswith('@@'): | |
245 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line) | |
246 # File add will result in "-0,0 +1" but file deletion will result in | |
247 # "-1,N +0,0" where N is the number of lines deleted. That's from diff | |
248 # and svn diff. git diff doesn't exhibit this behavior. | |
249 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun. | |
250 # "@@ -1 +1,N @@" is also valid where N is the length of the new file. | |
251 if not match: | |
252 self._fail('Hunk header is unparsable') | |
253 count = match.group(1).count(',') | |
254 if not count: | |
255 start_src = int(match.group(1)) | |
256 lines_src = 1 | |
257 elif count == 1: | |
258 start_src, lines_src = map(int, match.group(1).split(',', 1)) | |
259 else: | |
260 self._fail('Hunk header is malformed') | |
261 | |
262 count = match.group(2).count(',') | |
263 if not count: | |
264 start_dst = int(match.group(2)) | |
265 lines_dst = 1 | |
266 elif count == 1: | |
267 start_dst, lines_dst = map(int, match.group(2).split(',', 1)) | |
268 else: | |
269 self._fail('Hunk header is malformed') | |
270 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst) | |
271 if hunks: | |
272 if new_hunk.start_src <= hunks[-1].start_src: | |
273 self._fail('Hunks source lines are not ordered') | |
274 if new_hunk.start_dst <= hunks[-1].start_dst: | |
275 self._fail('Hunks destination lines are not ordered') | |
276 hunks.append(new_hunk) | |
277 continue | |
278 hunks[-1].text.append(line) | |
279 | |
280 if len(hunks) == 1: | |
281 if hunks[0].start_src == 0 and hunks[0].lines_src == 0: | |
282 self.is_new = True | |
283 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0: | |
284 self.is_delete = True | |
285 | |
286 if self.is_new and self.is_delete: | |
287 self._fail('Hunk header is all 0') | |
288 | |
289 if not self.is_new and not self.is_delete: | |
290 for hunk in hunks: | |
291 variation = ( | |
292 len([1 for i in hunk.text if i.startswith('+')]) - | |
293 len([1 for i in hunk.text if i.startswith('-')])) | |
294 if variation != hunk.variation: | |
295 self._fail( | |
296 'Hunk header is incorrect: %d vs %d; %r' % ( | |
297 variation, hunk.variation, hunk)) | |
298 if not hunk.start_src: | |
299 self._fail( | |
300 'Hunk header start line is incorrect: %d' % hunk.start_src) | |
301 if not hunk.start_dst: | |
302 self._fail( | |
303 'Hunk header start line is incorrect: %d' % hunk.start_dst) | |
304 hunk.start_src -= 1 | |
305 hunk.start_dst -= 1 | |
306 if self.is_new and hunks: | |
307 hunks[0].start_dst -= 1 | |
308 if self.is_delete and hunks: | |
309 hunks[0].start_src -= 1 | |
310 return hunks | |
311 | |
312 def mangle(self, string): | |
313 """Mangle a file path.""" | |
314 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:]) | |
315 | |
316 def _verify_git_header(self): | |
317 """Sanity checks the header. | |
318 | |
319 Expects the following format: | |
320 | |
321 <garbage> | |
322 diff --git (|a/)<filename> (|b/)<filename> | |
323 <similarity> | |
324 <filemode changes> | |
325 <index> | |
326 <copy|rename from> | |
327 <copy|rename to> | |
328 --- <filename> | |
329 +++ <filename> | |
330 | |
331 Everything is optional except the diff --git line. | |
332 """ | |
333 lines = self.diff_header.splitlines() | |
334 | |
335 # Verify the diff --git line. | |
336 old = None | |
337 new = None | |
338 while lines: | |
339 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) | |
340 if not match: | |
341 continue | |
342 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): | |
343 self.patchlevel = 1 | |
344 old = self.mangle(match.group(1)) | |
345 new = self.mangle(match.group(2)) | |
346 | |
347 # The rename is about the new file so the old file can be anything. | |
348 if new not in (self.filename_utf8, 'dev/null'): | |
349 self._fail('Unexpected git diff output name %s.' % new) | |
350 if old == 'dev/null' and new == 'dev/null': | |
351 self._fail('Unexpected /dev/null git diff.') | |
352 break | |
353 | |
354 if not old or not new: | |
355 self._fail('Unexpected git diff; couldn\'t find git header.') | |
356 | |
357 if old not in (self.filename_utf8, 'dev/null'): | |
358 # Copy or rename. | |
359 self.source_filename = old.decode('utf-8') | |
360 self.is_new = True | |
361 | |
362 last_line = '' | |
363 | |
364 while lines: | |
365 line = lines.pop(0) | |
366 self._verify_git_header_process_line(lines, line, last_line) | |
367 last_line = line | |
368 | |
369 # Cheap check to make sure the file name is at least mentioned in the | |
370 # 'diff' header. That the only remaining invariant. | |
371 if not self.filename_utf8 in self.diff_header: | |
372 self._fail('Diff seems corrupted.') | |
373 | |
374 def _verify_git_header_process_line(self, lines, line, last_line): | |
375 """Processes a single line of the header. | |
376 | |
377 Returns True if it should continue looping. | |
378 | |
379 Format is described to | |
380 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html | |
381 """ | |
382 match = re.match(r'^(rename|copy) from (.+)$', line) | |
383 old = self.source_filename_utf8 or self.filename_utf8 | |
384 if match: | |
385 if old != match.group(2): | |
386 self._fail('Unexpected git diff input name for line %s.' % line) | |
387 if not lines or not lines[0].startswith('%s to ' % match.group(1)): | |
388 self._fail( | |
389 'Confused %s from/to git diff for line %s.' % | |
390 (match.group(1), line)) | |
391 return | |
392 | |
393 match = re.match(r'^(rename|copy) to (.+)$', line) | |
394 if match: | |
395 if self.filename_utf8 != match.group(2): | |
396 self._fail('Unexpected git diff output name for line %s.' % line) | |
397 if not last_line.startswith('%s from ' % match.group(1)): | |
398 self._fail( | |
399 'Confused %s from/to git diff for line %s.' % | |
400 (match.group(1), line)) | |
401 return | |
402 | |
403 match = re.match(r'^deleted file mode (\d{6})$', line) | |
404 if match: | |
405 # It is necessary to parse it because there may be no hunk, like when the | |
406 # file was empty. | |
407 self.is_delete = True | |
408 return | |
409 | |
410 match = re.match(r'^new(| file) mode (\d{6})$', line) | |
411 if match: | |
412 mode = match.group(2) | |
413 # Only look at owner ACL for executable. | |
414 if bool(int(mode[4]) & 1): | |
415 self.svn_properties.append(('svn:executable', '.')) | |
416 elif not self.source_filename and self.is_new: | |
417 # It's a new file, not from a rename/copy, then there's no property to | |
418 # delete. | |
419 self.svn_properties.append(('svn:executable', None)) | |
420 return | |
421 | |
422 match = re.match(r'^--- (.*)$', line) | |
423 if match: | |
424 if last_line[:3] in ('---', '+++'): | |
425 self._fail('--- and +++ are reversed') | |
426 if match.group(1) == '/dev/null': | |
427 self.is_new = True | |
428 elif self.mangle(match.group(1)) != old: | |
429 # git patches are always well formatted, do not allow random filenames. | |
430 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1))) | |
431 if not lines or not lines[0].startswith('+++'): | |
432 self._fail('Missing git diff output name.') | |
433 return | |
434 | |
435 match = re.match(r'^\+\+\+ (.*)$', line) | |
436 if match: | |
437 if not last_line.startswith('---'): | |
438 self._fail('Unexpected git diff: --- not following +++.') | |
439 if '/dev/null' == match.group(1): | |
440 self.is_delete = True | |
441 elif self.filename_utf8 != self.mangle(match.group(1)): | |
442 self._fail( | |
443 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) | |
444 if lines: | |
445 self._fail('Crap after +++') | |
446 # We're done. | |
447 return | |
448 | |
449 def _verify_svn_header(self): | |
450 """Sanity checks the header. | |
451 | |
452 A svn diff can contain only property changes, in that case there will be no | |
453 proper header. To make things worse, this property change header is | |
454 localized. | |
455 """ | |
456 lines = self.diff_header.splitlines() | |
457 last_line = '' | |
458 | |
459 while lines: | |
460 line = lines.pop(0) | |
461 self._verify_svn_header_process_line(lines, line, last_line) | |
462 last_line = line | |
463 | |
464 # Cheap check to make sure the file name is at least mentioned in the | |
465 # 'diff' header. That the only remaining invariant. | |
466 if not self.filename_utf8 in self.diff_header: | |
467 self._fail('Diff seems corrupted.') | |
468 | |
469 def _verify_svn_header_process_line(self, lines, line, last_line): | |
470 """Processes a single line of the header. | |
471 | |
472 Returns True if it should continue looping. | |
473 """ | |
474 match = re.match(r'^--- ([^\t]+).*$', line) | |
475 if match: | |
476 if last_line[:3] in ('---', '+++'): | |
477 self._fail('--- and +++ are reversed') | |
478 if match.group(1) == '/dev/null': | |
479 self.is_new = True | |
480 elif self.mangle(match.group(1)) != self.filename_utf8: | |
481 # guess the source filename. | |
482 self.source_filename = match.group(1).decode('utf-8') | |
483 self.is_new = True | |
484 if not lines or not lines[0].startswith('+++'): | |
485 self._fail('Nothing after header.') | |
486 return | |
487 | |
488 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) | |
489 if match: | |
490 if not last_line.startswith('---'): | |
491 self._fail('Unexpected diff: --- not following +++.') | |
492 if match.group(1) == '/dev/null': | |
493 self.is_delete = True | |
494 elif self.mangle(match.group(1)) != self.filename_utf8: | |
495 self._fail('Unexpected diff: %s.' % match.group(1)) | |
496 if lines: | |
497 self._fail('Crap after +++') | |
498 # We're done. | |
499 return | |
500 | |
501 def dump(self): | |
502 """Dumps itself in a verbose way to help diagnosing.""" | |
503 return str(self) + '\n' + self.get(True) | |
504 | |
505 | |
506 class PatchSet(object): | |
507 """A list of FilePatch* objects.""" | |
508 | |
509 def __init__(self, patches): | |
510 for p in patches: | |
511 assert isinstance(p, FilePatchBase) | |
512 | |
513 def key(p): | |
514 """Sort by ordering of application. | |
515 | |
516 File move are first. | |
517 Deletes are last. | |
518 """ | |
519 # The bool is necessary because None < 'string' but the reverse is needed. | |
520 return ( | |
521 p.is_delete, | |
522 # False is before True, so files *with* a source file will be first. | |
523 not bool(p.source_filename), | |
524 p.source_filename_utf8, | |
525 p.filename_utf8) | |
526 | |
527 self.patches = sorted(patches, key=key) | |
528 | |
529 def set_relpath(self, relpath): | |
530 """Used to offset the patch into a subdirectory.""" | |
531 for patch in self.patches: | |
532 patch.set_relpath(relpath) | |
533 | |
534 def __iter__(self): | |
535 for patch in self.patches: | |
536 yield patch | |
537 | |
538 def __getitem__(self, key): | |
539 return self.patches[key] | |
540 | |
541 @property | |
542 def filenames(self): | |
543 return [p.filename for p in self.patches] | |
OLD | NEW |