Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Unified Diff: third_party/depot_tools/patch.py

Issue 27575002: Patch path filtering script. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Now using patch.py and its test data Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/depot_tools/patch.py
diff --git a/third_party/depot_tools/patch.py b/third_party/depot_tools/patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b65ce1cfaed3bc1fbb70551f9cc43346a8f01de
--- /dev/null
+++ b/third_party/depot_tools/patch.py
@@ -0,0 +1,543 @@
+# coding=utf8
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utility functions to handle patches."""
iannucci 2013/12/05 21:40:57 holy snapdragons! I don't think we need this stuff
kjellander_chromium 2013/12/10 20:46:02 I don't agree this is that bad, in fact it's what
iannucci 2013/12/10 21:32:23 Yeah... frankly, they're both bad. Having proper d
+
+import posixpath
+import os
+import re
+
+
+class UnsupportedPatchFormat(Exception):
+ def __init__(self, filename, status):
+ super(UnsupportedPatchFormat, self).__init__(filename, status)
+ self.filename = filename
+ self.status = status
+
+ def __str__(self):
+ out = 'Can\'t process patch for file %s.' % self.filename
+ if self.status:
+ out += '\n%s' % self.status
+ return out
+
+
+class FilePatchBase(object):
+ """Defines a single file being modified.
+
+ '/' is always used instead of os.sep for consistency.
+ """
+ is_delete = False
+ is_binary = False
+ is_new = False
+
+ def __init__(self, filename):
+ assert self.__class__ is not FilePatchBase
+ self.filename = self._process_filename(filename)
+ # Set when the file is copied or moved.
+ self.source_filename = None
+
+ @property
+ def filename_utf8(self):
+ return self.filename.encode('utf-8')
+
+ @property
+ def source_filename_utf8(self):
+ if self.source_filename is not None:
+ return self.source_filename.encode('utf-8')
+
+ @staticmethod
+ def _process_filename(filename):
+ filename = filename.replace('\\', '/')
+ # Blacklist a few characters for simplicity.
+ for i in ('%', '$', '..', '\'', '"'):
+ if i in filename:
+ raise UnsupportedPatchFormat(
+ filename, 'Can\'t use \'%s\' in filename.' % i)
+ for i in ('/', 'CON', 'COM'):
+ if filename.startswith(i):
+ raise UnsupportedPatchFormat(
+ filename, 'Filename can\'t start with \'%s\'.' % i)
+ return filename
+
+ def set_relpath(self, relpath):
+ if not relpath:
+ return
+ relpath = relpath.replace('\\', '/')
+ if relpath[0] == '/':
+ self._fail('Relative path starts with %s' % relpath[0])
+ self.filename = self._process_filename(
+ posixpath.join(relpath, self.filename))
+ if self.source_filename:
+ self.source_filename = self._process_filename(
+ posixpath.join(relpath, self.source_filename))
+
+ def _fail(self, msg):
+ """Shortcut function to raise UnsupportedPatchFormat."""
+ raise UnsupportedPatchFormat(self.filename, msg)
+
+ def __str__(self):
+ # Use a status-like board.
+ out = ''
+ if self.is_binary:
+ out += 'B'
+ else:
+ out += ' '
+ if self.is_delete:
+ out += 'D'
+ else:
+ out += ' '
+ if self.is_new:
+ out += 'N'
+ else:
+ out += ' '
+ if self.source_filename:
+ out += 'R'
+ else:
+ out += ' '
+ out += ' '
+ if self.source_filename:
+ out += '%s->' % self.source_filename_utf8
+ return out + self.filename_utf8
+
+ def dump(self):
+ """Dumps itself in a verbose way to help diagnosing."""
+ return str(self)
+
+
+class FilePatchDelete(FilePatchBase):
+ """Deletes a file."""
+ is_delete = True
+
+ def __init__(self, filename, is_binary):
+ super(FilePatchDelete, self).__init__(filename)
+ self.is_binary = is_binary
+
+
+class FilePatchBinary(FilePatchBase):
+ """Content of a new binary file."""
+ is_binary = True
+
+ def __init__(self, filename, data, svn_properties, is_new):
+ super(FilePatchBinary, self).__init__(filename)
+ self.data = data
+ self.svn_properties = svn_properties or []
+ self.is_new = is_new
+
+ def get(self):
+ return self.data
+
+ def __str__(self):
+ return str(super(FilePatchBinary, self)) + ' %d bytes' % len(self.data)
+
+
+class Hunk(object):
+ """Parsed hunk data container."""
+
+ def __init__(self, start_src, lines_src, start_dst, lines_dst):
+ self.start_src = start_src
+ self.lines_src = lines_src
+ self.start_dst = start_dst
+ self.lines_dst = lines_dst
+ self.variation = self.lines_dst - self.lines_src
+ self.text = []
+
+ def __repr__(self):
+ return '%s<(%d, %d) to (%d, %d)>' % (
+ self.__class__.__name__,
+ self.start_src, self.lines_src, self.start_dst, self.lines_dst)
+
+
+class FilePatchDiff(FilePatchBase):
+ """Patch for a single file."""
+
+ def __init__(self, filename, diff, svn_properties):
+ super(FilePatchDiff, self).__init__(filename)
+ if not diff:
+ self._fail('File doesn\'t have a diff.')
+ self.diff_header, self.diff_hunks = self._split_header(diff)
+ self.svn_properties = svn_properties or []
+ self.is_git_diff = self._is_git_diff_header(self.diff_header)
+ self.patchlevel = 0
+ if self.is_git_diff:
+ self._verify_git_header()
+ else:
+ self._verify_svn_header()
+ self.hunks = self._split_hunks()
+ if self.source_filename and not self.is_new:
+ self._fail('If source_filename is set, is_new must be also be set')
+
+ def get(self, for_git):
+ if for_git or not self.source_filename:
+ return self.diff_header + self.diff_hunks
+ else:
+ # patch is stupid. It patches the source_filename instead so get rid of
+ # any source_filename reference if needed.
+ return (
+ self.diff_header.replace(
+ self.source_filename_utf8, self.filename_utf8) +
+ self.diff_hunks)
+
+ def set_relpath(self, relpath):
+ old_filename = self.filename_utf8
+ old_source_filename = self.source_filename_utf8 or self.filename_utf8
+ super(FilePatchDiff, self).set_relpath(relpath)
+ # Update the header too.
+ filename = self.filename_utf8
+ source_filename = self.source_filename_utf8 or self.filename_utf8
+ lines = self.diff_header.splitlines(True)
+ for i, line in enumerate(lines):
+ if line.startswith('diff --git'):
+ lines[i] = line.replace(
+ 'a/' + old_source_filename, source_filename).replace(
+ 'b/' + old_filename, filename)
+ elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
+ lines[i] = line.replace(old_source_filename, source_filename)
+ elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
+ lines[i] = line.replace(old_filename, filename)
+ self.diff_header = ''.join(lines)
+
+ def _split_header(self, diff):
+ """Splits a diff in two: the header and the hunks."""
+ header = []
+ hunks = diff.splitlines(True)
+ while hunks:
+ header.append(hunks.pop(0))
+ if header[-1].startswith('--- '):
+ break
+ else:
+ # Some diff may not have a ---/+++ set like a git rename with no change or
+ # a svn diff with only property change.
+ pass
+
+ if hunks:
+ if not hunks[0].startswith('+++ '):
+ self._fail('Inconsistent header')
+ header.append(hunks.pop(0))
+ if hunks:
+ if not hunks[0].startswith('@@ '):
+ self._fail('Inconsistent hunk header')
+
+ # Mangle any \\ in the header to /.
+ header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
+ basename = os.path.basename(self.filename_utf8)
+ for i in xrange(len(header)):
+ if (header[i].split(' ', 1)[0] in header_lines or
+ header[i].endswith(basename)):
+ header[i] = header[i].replace('\\', '/')
+ return ''.join(header), ''.join(hunks)
+
+ @staticmethod
+ def _is_git_diff_header(diff_header):
+ """Returns True if the diff for a single files was generated with git."""
+ # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
+ # Rename partial change:
+ # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
+ # Rename no change:
+ # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
+ return any(l.startswith('diff --git') for l in diff_header.splitlines())
+
+ def _split_hunks(self):
+ """Splits the hunks and does verification."""
+ hunks = []
+ for line in self.diff_hunks.splitlines(True):
+ if line.startswith('@@'):
+ match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
+ # File add will result in "-0,0 +1" but file deletion will result in
+ # "-1,N +0,0" where N is the number of lines deleted. That's from diff
+ # and svn diff. git diff doesn't exhibit this behavior.
+ # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
+ # "@@ -1 +1,N @@" is also valid where N is the length of the new file.
+ if not match:
+ self._fail('Hunk header is unparsable')
+ count = match.group(1).count(',')
+ if not count:
+ start_src = int(match.group(1))
+ lines_src = 1
+ elif count == 1:
+ start_src, lines_src = map(int, match.group(1).split(',', 1))
+ else:
+ self._fail('Hunk header is malformed')
+
+ count = match.group(2).count(',')
+ if not count:
+ start_dst = int(match.group(2))
+ lines_dst = 1
+ elif count == 1:
+ start_dst, lines_dst = map(int, match.group(2).split(',', 1))
+ else:
+ self._fail('Hunk header is malformed')
+ new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
+ if hunks:
+ if new_hunk.start_src <= hunks[-1].start_src:
+ self._fail('Hunks source lines are not ordered')
+ if new_hunk.start_dst <= hunks[-1].start_dst:
+ self._fail('Hunks destination lines are not ordered')
+ hunks.append(new_hunk)
+ continue
+ hunks[-1].text.append(line)
+
+ if len(hunks) == 1:
+ if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
+ self.is_new = True
+ if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
+ self.is_delete = True
+
+ if self.is_new and self.is_delete:
+ self._fail('Hunk header is all 0')
+
+ if not self.is_new and not self.is_delete:
+ for hunk in hunks:
+ variation = (
+ len([1 for i in hunk.text if i.startswith('+')]) -
+ len([1 for i in hunk.text if i.startswith('-')]))
+ if variation != hunk.variation:
+ self._fail(
+ 'Hunk header is incorrect: %d vs %d; %r' % (
+ variation, hunk.variation, hunk))
+ if not hunk.start_src:
+ self._fail(
+ 'Hunk header start line is incorrect: %d' % hunk.start_src)
+ if not hunk.start_dst:
+ self._fail(
+ 'Hunk header start line is incorrect: %d' % hunk.start_dst)
+ hunk.start_src -= 1
+ hunk.start_dst -= 1
+ if self.is_new and hunks:
+ hunks[0].start_dst -= 1
+ if self.is_delete and hunks:
+ hunks[0].start_src -= 1
+ return hunks
+
+ def mangle(self, string):
+ """Mangle a file path."""
+ return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
+
+ def _verify_git_header(self):
+ """Sanity checks the header.
+
+ Expects the following format:
+
+ <garbage>
+ diff --git (|a/)<filename> (|b/)<filename>
+ <similarity>
+ <filemode changes>
+ <index>
+ <copy|rename from>
+ <copy|rename to>
+ --- <filename>
+ +++ <filename>
+
+ Everything is optional except the diff --git line.
+ """
+ lines = self.diff_header.splitlines()
+
+ # Verify the diff --git line.
+ old = None
+ new = None
+ while lines:
+ match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
+ if not match:
+ continue
+ if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
+ self.patchlevel = 1
+ old = self.mangle(match.group(1))
+ new = self.mangle(match.group(2))
+
+ # The rename is about the new file so the old file can be anything.
+ if new not in (self.filename_utf8, 'dev/null'):
+ self._fail('Unexpected git diff output name %s.' % new)
+ if old == 'dev/null' and new == 'dev/null':
+ self._fail('Unexpected /dev/null git diff.')
+ break
+
+ if not old or not new:
+ self._fail('Unexpected git diff; couldn\'t find git header.')
+
+ if old not in (self.filename_utf8, 'dev/null'):
+ # Copy or rename.
+ self.source_filename = old.decode('utf-8')
+ self.is_new = True
+
+ last_line = ''
+
+ while lines:
+ line = lines.pop(0)
+ self._verify_git_header_process_line(lines, line, last_line)
+ last_line = line
+
+ # Cheap check to make sure the file name is at least mentioned in the
+ # 'diff' header. That the only remaining invariant.
+ if not self.filename_utf8 in self.diff_header:
+ self._fail('Diff seems corrupted.')
+
+ def _verify_git_header_process_line(self, lines, line, last_line):
+ """Processes a single line of the header.
+
+ Returns True if it should continue looping.
+
+ Format is described to
+ http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
+ """
+ match = re.match(r'^(rename|copy) from (.+)$', line)
+ old = self.source_filename_utf8 or self.filename_utf8
+ if match:
+ if old != match.group(2):
+ self._fail('Unexpected git diff input name for line %s.' % line)
+ if not lines or not lines[0].startswith('%s to ' % match.group(1)):
+ self._fail(
+ 'Confused %s from/to git diff for line %s.' %
+ (match.group(1), line))
+ return
+
+ match = re.match(r'^(rename|copy) to (.+)$', line)
+ if match:
+ if self.filename_utf8 != match.group(2):
+ self._fail('Unexpected git diff output name for line %s.' % line)
+ if not last_line.startswith('%s from ' % match.group(1)):
+ self._fail(
+ 'Confused %s from/to git diff for line %s.' %
+ (match.group(1), line))
+ return
+
+ match = re.match(r'^deleted file mode (\d{6})$', line)
+ if match:
+ # It is necessary to parse it because there may be no hunk, like when the
+ # file was empty.
+ self.is_delete = True
+ return
+
+ match = re.match(r'^new(| file) mode (\d{6})$', line)
+ if match:
+ mode = match.group(2)
+ # Only look at owner ACL for executable.
+ if bool(int(mode[4]) & 1):
+ self.svn_properties.append(('svn:executable', '.'))
+ elif not self.source_filename and self.is_new:
+ # It's a new file, not from a rename/copy, then there's no property to
+ # delete.
+ self.svn_properties.append(('svn:executable', None))
+ return
+
+ match = re.match(r'^--- (.*)$', line)
+ if match:
+ if last_line[:3] in ('---', '+++'):
+ self._fail('--- and +++ are reversed')
+ if match.group(1) == '/dev/null':
+ self.is_new = True
+ elif self.mangle(match.group(1)) != old:
+ # git patches are always well formatted, do not allow random filenames.
+ self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
+ if not lines or not lines[0].startswith('+++'):
+ self._fail('Missing git diff output name.')
+ return
+
+ match = re.match(r'^\+\+\+ (.*)$', line)
+ if match:
+ if not last_line.startswith('---'):
+ self._fail('Unexpected git diff: --- not following +++.')
+ if '/dev/null' == match.group(1):
+ self.is_delete = True
+ elif self.filename_utf8 != self.mangle(match.group(1)):
+ self._fail(
+ 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
+ if lines:
+ self._fail('Crap after +++')
+ # We're done.
+ return
+
+ def _verify_svn_header(self):
+ """Sanity checks the header.
+
+ A svn diff can contain only property changes, in that case there will be no
+ proper header. To make things worse, this property change header is
+ localized.
+ """
+ lines = self.diff_header.splitlines()
+ last_line = ''
+
+ while lines:
+ line = lines.pop(0)
+ self._verify_svn_header_process_line(lines, line, last_line)
+ last_line = line
+
+ # Cheap check to make sure the file name is at least mentioned in the
+ # 'diff' header. That the only remaining invariant.
+ if not self.filename_utf8 in self.diff_header:
+ self._fail('Diff seems corrupted.')
+
+ def _verify_svn_header_process_line(self, lines, line, last_line):
+ """Processes a single line of the header.
+
+ Returns True if it should continue looping.
+ """
+ match = re.match(r'^--- ([^\t]+).*$', line)
+ if match:
+ if last_line[:3] in ('---', '+++'):
+ self._fail('--- and +++ are reversed')
+ if match.group(1) == '/dev/null':
+ self.is_new = True
+ elif self.mangle(match.group(1)) != self.filename_utf8:
+ # guess the source filename.
+ self.source_filename = match.group(1).decode('utf-8')
+ self.is_new = True
+ if not lines or not lines[0].startswith('+++'):
+ self._fail('Nothing after header.')
+ return
+
+ match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
+ if match:
+ if not last_line.startswith('---'):
+ self._fail('Unexpected diff: --- not following +++.')
+ if match.group(1) == '/dev/null':
+ self.is_delete = True
+ elif self.mangle(match.group(1)) != self.filename_utf8:
+ self._fail('Unexpected diff: %s.' % match.group(1))
+ if lines:
+ self._fail('Crap after +++')
+ # We're done.
+ return
+
+ def dump(self):
+ """Dumps itself in a verbose way to help diagnosing."""
+ return str(self) + '\n' + self.get(True)
+
+
+class PatchSet(object):
+ """A list of FilePatch* objects."""
+
+ def __init__(self, patches):
+ for p in patches:
+ assert isinstance(p, FilePatchBase)
+
+ def key(p):
+ """Sort by ordering of application.
+
+ File move are first.
+ Deletes are last.
+ """
+ # The bool is necessary because None < 'string' but the reverse is needed.
+ return (
+ p.is_delete,
+ # False is before True, so files *with* a source file will be first.
+ not bool(p.source_filename),
+ p.source_filename_utf8,
+ p.filename_utf8)
+
+ self.patches = sorted(patches, key=key)
+
+ def set_relpath(self, relpath):
+ """Used to offset the patch into a subdirectory."""
+ for patch in self.patches:
+ patch.set_relpath(relpath)
+
+ def __iter__(self):
+ for patch in self.patches:
+ yield patch
+
+ def __getitem__(self, key):
+ return self.patches[key]
+
+ @property
+ def filenames(self):
+ return [p.filename for p in self.patches]

Powered by Google App Engine
This is Rietveld 408576698