Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(338)

Side by Side Diff: third_party/depot_tools_patch/patch.py

Issue 505153002: WebRTC: Remove android_apk recipe (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Rebased Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # coding=utf8
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 """Utility functions to handle patches."""
6
7 import posixpath
8 import os
9 import re
10
11
12 class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25 class FilePatchBase(object):
26 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
30 is_delete = False
31 is_binary = False
32 is_new = False
33
34 def __init__(self, filename):
35 assert self.__class__ is not FilePatchBase
36 self.filename = self._process_filename(filename)
37 # Set when the file is copied or moved.
38 self.source_filename = None
39
40 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
49 @staticmethod
50 def _process_filename(filename):
51 filename = filename.replace('\\', '/')
52 # Blacklist a few characters for simplicity.
53 for i in ('%', '$', '..', '\'', '"'):
54 if i in filename:
55 raise UnsupportedPatchFormat(
56 filename, 'Can\'t use \'%s\' in filename.' % i)
57 for i in ('/', 'CON', 'COM'):
58 if filename.startswith(i):
59 raise UnsupportedPatchFormat(
60 filename, 'Filename can\'t start with \'%s\'.' % i)
61 return filename
62
63 def set_relpath(self, relpath):
64 if not relpath:
65 return
66 relpath = relpath.replace('\\', '/')
67 if relpath[0] == '/':
68 self._fail('Relative path starts with %s' % relpath[0])
69 self.filename = self._process_filename(
70 posixpath.join(relpath, self.filename))
71 if self.source_filename:
72 self.source_filename = self._process_filename(
73 posixpath.join(relpath, self.source_filename))
74
75 def _fail(self, msg):
76 """Shortcut function to raise UnsupportedPatchFormat."""
77 raise UnsupportedPatchFormat(self.filename, msg)
78
79 def __str__(self):
80 # Use a status-like board.
81 out = ''
82 if self.is_binary:
83 out += 'B'
84 else:
85 out += ' '
86 if self.is_delete:
87 out += 'D'
88 else:
89 out += ' '
90 if self.is_new:
91 out += 'N'
92 else:
93 out += ' '
94 if self.source_filename:
95 out += 'R'
96 else:
97 out += ' '
98 out += ' '
99 if self.source_filename:
100 out += '%s->' % self.source_filename_utf8
101 return out + self.filename_utf8
102
103 def dump(self):
104 """Dumps itself in a verbose way to help diagnosing."""
105 return str(self)
106
107
108 class FilePatchDelete(FilePatchBase):
109 """Deletes a file."""
110 is_delete = True
111
112 def __init__(self, filename, is_binary):
113 super(FilePatchDelete, self).__init__(filename)
114 self.is_binary = is_binary
115
116
117 class FilePatchBinary(FilePatchBase):
118 """Content of a new binary file."""
119 is_binary = True
120
121 def __init__(self, filename, data, svn_properties, is_new):
122 super(FilePatchBinary, self).__init__(filename)
123 self.data = data
124 self.svn_properties = svn_properties or []
125 self.is_new = is_new
126
127 def get(self):
128 return self.data
129
130 def __str__(self):
131 return str(super(FilePatchBinary, self)) + ' %d bytes' % len(self.data)
132
133
134 class Hunk(object):
135 """Parsed hunk data container."""
136
137 def __init__(self, start_src, lines_src, start_dst, lines_dst):
138 self.start_src = start_src
139 self.lines_src = lines_src
140 self.start_dst = start_dst
141 self.lines_dst = lines_dst
142 self.variation = self.lines_dst - self.lines_src
143 self.text = []
144
145 def __repr__(self):
146 return '%s<(%d, %d) to (%d, %d)>' % (
147 self.__class__.__name__,
148 self.start_src, self.lines_src, self.start_dst, self.lines_dst)
149
150
151 class FilePatchDiff(FilePatchBase):
152 """Patch for a single file."""
153
154 def __init__(self, filename, diff, svn_properties):
155 super(FilePatchDiff, self).__init__(filename)
156 if not diff:
157 self._fail('File doesn\'t have a diff.')
158 self.diff_header, self.diff_hunks = self._split_header(diff)
159 self.svn_properties = svn_properties or []
160 self.is_git_diff = self._is_git_diff_header(self.diff_header)
161 self.patchlevel = 0
162 if self.is_git_diff:
163 self._verify_git_header()
164 else:
165 self._verify_svn_header()
166 self.hunks = self._split_hunks()
167 if self.source_filename and not self.is_new:
168 self._fail('If source_filename is set, is_new must be also be set')
169
170 def get(self, for_git):
171 if for_git or not self.source_filename:
172 return self.diff_header + self.diff_hunks
173 else:
174 # patch is stupid. It patches the source_filename instead so get rid of
175 # any source_filename reference if needed.
176 return (
177 self.diff_header.replace(
178 self.source_filename_utf8, self.filename_utf8) +
179 self.diff_hunks)
180
181 def set_relpath(self, relpath):
182 old_filename = self.filename_utf8
183 old_source_filename = self.source_filename_utf8 or self.filename_utf8
184 super(FilePatchDiff, self).set_relpath(relpath)
185 # Update the header too.
186 filename = self.filename_utf8
187 source_filename = self.source_filename_utf8 or self.filename_utf8
188 lines = self.diff_header.splitlines(True)
189 for i, line in enumerate(lines):
190 if line.startswith('diff --git'):
191 lines[i] = line.replace(
192 'a/' + old_source_filename, source_filename).replace(
193 'b/' + old_filename, filename)
194 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
195 lines[i] = line.replace(old_source_filename, source_filename)
196 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
197 lines[i] = line.replace(old_filename, filename)
198 self.diff_header = ''.join(lines)
199
200 def _split_header(self, diff):
201 """Splits a diff in two: the header and the hunks."""
202 header = []
203 hunks = diff.splitlines(True)
204 while hunks:
205 header.append(hunks.pop(0))
206 if header[-1].startswith('--- '):
207 break
208 else:
209 # Some diff may not have a ---/+++ set like a git rename with no change or
210 # a svn diff with only property change.
211 pass
212
213 if hunks:
214 if not hunks[0].startswith('+++ '):
215 self._fail('Inconsistent header')
216 header.append(hunks.pop(0))
217 if hunks:
218 if not hunks[0].startswith('@@ '):
219 self._fail('Inconsistent hunk header')
220
221 # Mangle any \\ in the header to /.
222 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
223 basename = os.path.basename(self.filename_utf8)
224 for i in xrange(len(header)):
225 if (header[i].split(' ', 1)[0] in header_lines or
226 header[i].endswith(basename)):
227 header[i] = header[i].replace('\\', '/')
228 return ''.join(header), ''.join(hunks)
229
230 @staticmethod
231 def _is_git_diff_header(diff_header):
232 """Returns True if the diff for a single files was generated with git."""
233 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
234 # Rename partial change:
235 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
236 # Rename no change:
237 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
238 return any(l.startswith('diff --git') for l in diff_header.splitlines())
239
240 def _split_hunks(self):
241 """Splits the hunks and does verification."""
242 hunks = []
243 for line in self.diff_hunks.splitlines(True):
244 if line.startswith('@@'):
245 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
246 # File add will result in "-0,0 +1" but file deletion will result in
247 # "-1,N +0,0" where N is the number of lines deleted. That's from diff
248 # and svn diff. git diff doesn't exhibit this behavior.
249 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
250 # "@@ -1 +1,N @@" is also valid where N is the length of the new file.
251 if not match:
252 self._fail('Hunk header is unparsable')
253 count = match.group(1).count(',')
254 if not count:
255 start_src = int(match.group(1))
256 lines_src = 1
257 elif count == 1:
258 start_src, lines_src = map(int, match.group(1).split(',', 1))
259 else:
260 self._fail('Hunk header is malformed')
261
262 count = match.group(2).count(',')
263 if not count:
264 start_dst = int(match.group(2))
265 lines_dst = 1
266 elif count == 1:
267 start_dst, lines_dst = map(int, match.group(2).split(',', 1))
268 else:
269 self._fail('Hunk header is malformed')
270 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
271 if hunks:
272 if new_hunk.start_src <= hunks[-1].start_src:
273 self._fail('Hunks source lines are not ordered')
274 if new_hunk.start_dst <= hunks[-1].start_dst:
275 self._fail('Hunks destination lines are not ordered')
276 hunks.append(new_hunk)
277 continue
278 hunks[-1].text.append(line)
279
280 if len(hunks) == 1:
281 if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
282 self.is_new = True
283 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
284 self.is_delete = True
285
286 if self.is_new and self.is_delete:
287 self._fail('Hunk header is all 0')
288
289 if not self.is_new and not self.is_delete:
290 for hunk in hunks:
291 variation = (
292 len([1 for i in hunk.text if i.startswith('+')]) -
293 len([1 for i in hunk.text if i.startswith('-')]))
294 if variation != hunk.variation:
295 self._fail(
296 'Hunk header is incorrect: %d vs %d; %r' % (
297 variation, hunk.variation, hunk))
298 if not hunk.start_src:
299 self._fail(
300 'Hunk header start line is incorrect: %d' % hunk.start_src)
301 if not hunk.start_dst:
302 self._fail(
303 'Hunk header start line is incorrect: %d' % hunk.start_dst)
304 hunk.start_src -= 1
305 hunk.start_dst -= 1
306 if self.is_new and hunks:
307 hunks[0].start_dst -= 1
308 if self.is_delete and hunks:
309 hunks[0].start_src -= 1
310 return hunks
311
312 def mangle(self, string):
313 """Mangle a file path."""
314 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
315
316 def _verify_git_header(self):
317 """Sanity checks the header.
318
319 Expects the following format:
320
321 <garbage>
322 diff --git (|a/)<filename> (|b/)<filename>
323 <similarity>
324 <filemode changes>
325 <index>
326 <copy|rename from>
327 <copy|rename to>
328 --- <filename>
329 +++ <filename>
330
331 Everything is optional except the diff --git line.
332 """
333 lines = self.diff_header.splitlines()
334
335 # Verify the diff --git line.
336 old = None
337 new = None
338 while lines:
339 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
340 if not match:
341 continue
342 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
343 self.patchlevel = 1
344 old = self.mangle(match.group(1))
345 new = self.mangle(match.group(2))
346
347 # The rename is about the new file so the old file can be anything.
348 if new not in (self.filename_utf8, 'dev/null'):
349 self._fail('Unexpected git diff output name %s.' % new)
350 if old == 'dev/null' and new == 'dev/null':
351 self._fail('Unexpected /dev/null git diff.')
352 break
353
354 if not old or not new:
355 self._fail('Unexpected git diff; couldn\'t find git header.')
356
357 if old not in (self.filename_utf8, 'dev/null'):
358 # Copy or rename.
359 self.source_filename = old.decode('utf-8')
360 self.is_new = True
361
362 last_line = ''
363
364 while lines:
365 line = lines.pop(0)
366 self._verify_git_header_process_line(lines, line, last_line)
367 last_line = line
368
369 # Cheap check to make sure the file name is at least mentioned in the
370 # 'diff' header. That the only remaining invariant.
371 if not self.filename_utf8 in self.diff_header:
372 self._fail('Diff seems corrupted.')
373
374 def _verify_git_header_process_line(self, lines, line, last_line):
375 """Processes a single line of the header.
376
377 Returns True if it should continue looping.
378
379 Format is described to
380 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
381 """
382 match = re.match(r'^(rename|copy) from (.+)$', line)
383 old = self.source_filename_utf8 or self.filename_utf8
384 if match:
385 if old != match.group(2):
386 self._fail('Unexpected git diff input name for line %s.' % line)
387 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
388 self._fail(
389 'Confused %s from/to git diff for line %s.' %
390 (match.group(1), line))
391 return
392
393 match = re.match(r'^(rename|copy) to (.+)$', line)
394 if match:
395 if self.filename_utf8 != match.group(2):
396 self._fail('Unexpected git diff output name for line %s.' % line)
397 if not last_line.startswith('%s from ' % match.group(1)):
398 self._fail(
399 'Confused %s from/to git diff for line %s.' %
400 (match.group(1), line))
401 return
402
403 match = re.match(r'^deleted file mode (\d{6})$', line)
404 if match:
405 # It is necessary to parse it because there may be no hunk, like when the
406 # file was empty.
407 self.is_delete = True
408 return
409
410 match = re.match(r'^new(| file) mode (\d{6})$', line)
411 if match:
412 mode = match.group(2)
413 # Only look at owner ACL for executable.
414 if bool(int(mode[4]) & 1):
415 self.svn_properties.append(('svn:executable', '.'))
416 elif not self.source_filename and self.is_new:
417 # It's a new file, not from a rename/copy, then there's no property to
418 # delete.
419 self.svn_properties.append(('svn:executable', None))
420 return
421
422 match = re.match(r'^--- (.*)$', line)
423 if match:
424 if last_line[:3] in ('---', '+++'):
425 self._fail('--- and +++ are reversed')
426 if match.group(1) == '/dev/null':
427 self.is_new = True
428 elif self.mangle(match.group(1)) != old:
429 # git patches are always well formatted, do not allow random filenames.
430 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
431 if not lines or not lines[0].startswith('+++'):
432 self._fail('Missing git diff output name.')
433 return
434
435 match = re.match(r'^\+\+\+ (.*)$', line)
436 if match:
437 if not last_line.startswith('---'):
438 self._fail('Unexpected git diff: --- not following +++.')
439 if '/dev/null' == match.group(1):
440 self.is_delete = True
441 elif self.filename_utf8 != self.mangle(match.group(1)):
442 self._fail(
443 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
444 if lines:
445 self._fail('Crap after +++')
446 # We're done.
447 return
448
449 def _verify_svn_header(self):
450 """Sanity checks the header.
451
452 A svn diff can contain only property changes, in that case there will be no
453 proper header. To make things worse, this property change header is
454 localized.
455 """
456 lines = self.diff_header.splitlines()
457 last_line = ''
458
459 while lines:
460 line = lines.pop(0)
461 self._verify_svn_header_process_line(lines, line, last_line)
462 last_line = line
463
464 # Cheap check to make sure the file name is at least mentioned in the
465 # 'diff' header. That the only remaining invariant.
466 if not self.filename_utf8 in self.diff_header:
467 self._fail('Diff seems corrupted.')
468
469 def _verify_svn_header_process_line(self, lines, line, last_line):
470 """Processes a single line of the header.
471
472 Returns True if it should continue looping.
473 """
474 match = re.match(r'^--- ([^\t]+).*$', line)
475 if match:
476 if last_line[:3] in ('---', '+++'):
477 self._fail('--- and +++ are reversed')
478 if match.group(1) == '/dev/null':
479 self.is_new = True
480 elif self.mangle(match.group(1)) != self.filename_utf8:
481 # guess the source filename.
482 self.source_filename = match.group(1).decode('utf-8')
483 self.is_new = True
484 if not lines or not lines[0].startswith('+++'):
485 self._fail('Nothing after header.')
486 return
487
488 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
489 if match:
490 if not last_line.startswith('---'):
491 self._fail('Unexpected diff: --- not following +++.')
492 if match.group(1) == '/dev/null':
493 self.is_delete = True
494 elif self.mangle(match.group(1)) != self.filename_utf8:
495 self._fail('Unexpected diff: %s.' % match.group(1))
496 if lines:
497 self._fail('Crap after +++')
498 # We're done.
499 return
500
501 def dump(self):
502 """Dumps itself in a verbose way to help diagnosing."""
503 return str(self) + '\n' + self.get(True)
504
505
506 class PatchSet(object):
507 """A list of FilePatch* objects."""
508
509 def __init__(self, patches):
510 for p in patches:
511 assert isinstance(p, FilePatchBase)
512
513 def key(p):
514 """Sort by ordering of application.
515
516 File move are first.
517 Deletes are last.
518 """
519 # The bool is necessary because None < 'string' but the reverse is needed.
520 return (
521 p.is_delete,
522 # False is before True, so files *with* a source file will be first.
523 not bool(p.source_filename),
524 p.source_filename_utf8,
525 p.filename_utf8)
526
527 self.patches = sorted(patches, key=key)
528
529 def set_relpath(self, relpath):
530 """Used to offset the patch into a subdirectory."""
531 for patch in self.patches:
532 patch.set_relpath(relpath)
533
534 def __iter__(self):
535 for patch in self.patches:
536 yield patch
537
538 def __getitem__(self, key):
539 return self.patches[key]
540
541 @property
542 def filenames(self):
543 return [p.filename for p in self.patches]
OLDNEW
« no previous file with comments | « third_party/depot_tools_patch/__init__.py ('k') | third_party/depot_tools_patch/test_support/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698