OLD | NEW |
---|---|
1 # Copyright (C) 2009 Google Inc. All rights reserved. | 1 # Copyright (C) 2009 Google Inc. All rights reserved. |
2 # | 2 # |
3 # Redistribution and use in source and binary forms, with or without | 3 # Redistribution and use in source and binary forms, with or without |
4 # modification, are permitted provided that the following conditions are | 4 # modification, are permitted provided that the following conditions are |
5 # met: | 5 # met: |
6 # | 6 # |
7 # * Redistributions of source code must retain the above copyright | 7 # * Redistributions of source code must retain the above copyright |
8 # notice, this list of conditions and the following disclaimer. | 8 # notice, this list of conditions and the following disclaimer. |
9 # * Redistributions in binary form must reproduce the above | 9 # * Redistributions in binary form must reproduce the above |
10 # copyright notice, this list of conditions and the following disclaimer | 10 # copyright notice, this list of conditions and the following disclaimer |
(...skipping 16 matching lines...) Expand all Loading... | |
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | 28 |
29 """WebKit's Python module for interacting with patches.""" | 29 """WebKit's Python module for interacting with patches.""" |
30 | 30 |
31 import logging | 31 import logging |
32 import re | 32 import re |
33 | 33 |
34 _log = logging.getLogger(__name__) | 34 _log = logging.getLogger(__name__) |
35 | 35 |
36 | 36 |
37 # FIXME: This is broken. We should compile our regexps up-front | |
38 # instead of using a custom cache. | |
39 _regexp_compile_cache = {} | |
40 | |
41 | |
42 # FIXME: This function should be removed. | |
43 def match(pattern, string): | |
44 """Matches the string with the pattern, caching the compiled regexp.""" | |
45 if not pattern in _regexp_compile_cache: | |
46 _regexp_compile_cache[pattern] = re.compile(pattern) | |
47 return _regexp_compile_cache[pattern].match(string) | |
48 | |
49 | |
50 # FIXME: This belongs on DiffParser (e.g. as to_svn_diff()). | |
51 def git_diff_to_svn_diff(line): | |
52 """Converts a git formatted diff line to a svn formatted line. | |
53 | |
54 Args: | |
55 line: A string representing a line of the diff. | |
56 """ | |
57 # FIXME: This list should be a class member on DiffParser. | |
58 # These regexp patterns should be compiled once instead of every time. | |
59 conversion_patterns = (("^diff --git \w/(.+) \w/(?P<FilePath>.+)", lambda ma tched: "Index: " + matched.group('FilePath') + "\n"), | |
60 ("^new file.*", lambda matched: "\n"), | |
61 ("^index (([0-9a-f]{7}\.\.[0-9a-f]{7})|([0-9a-f]{40}\ .\.[0-9a-f]{40})) [0-9]{6}", lambda matched: "================================== =================================\n"), | |
62 ("^--- \w/(?P<FilePath>.+)", lambda matched: "--- " + matched.group('FilePath') + "\n"), | |
63 ("^\+\+\+ \w/(?P<FilePath>.+)", lambda matched: "+++ " + matched.group('FilePath') + "\n")) | |
64 | |
65 for pattern, conversion in conversion_patterns: | |
66 matched = match(pattern, line) | |
67 if matched: | |
68 return conversion(matched) | |
69 return line | |
70 | |
71 | |
72 # This function exists so we can unittest get_diff_converter function | |
73 def svn_diff_to_svn_diff(line): | |
74 return line | |
75 | |
76 | |
77 # FIXME: This method belongs on DiffParser | |
78 def get_diff_converter(lines): | |
79 """Gets a converter function of diff lines. | |
80 | |
81 Args: | |
82 lines: The lines of a diff file. | |
83 If this line is git formatted, we'll return a | |
84 converter from git to SVN. | |
85 """ | |
86 for i, line in enumerate(lines[:-1]): | |
87 # Stop when we find the first patch | |
88 if line[:3] == "+++" and lines[i + 1] == "---": | |
89 break | |
90 if match(r"^diff --git \w/", line): | |
91 return git_diff_to_svn_diff | |
92 return svn_diff_to_svn_diff | |
93 | |
94 _INITIAL_STATE = 1 | 37 _INITIAL_STATE = 1 |
95 _DECLARED_FILE_PATH = 2 | 38 _DECLARED_FILE_PATH = 2 |
96 _PROCESSING_CHUNK = 3 | 39 _PROCESSING_CHUNK = 3 |
97 | 40 |
98 | 41 |
99 class DiffFile(object): | 42 class DiffFile(object): |
100 """Contains the information for one file in a patch. | 43 """Contains the information for one file in a patch. |
101 | 44 |
102 The field "lines" is a list which contains tuples in this format: | 45 The field "lines" is a list which contains tuples in this format: |
103 (deleted_line_number, new_line_number, line_string) | 46 (deleted_line_number, new_line_number, line_string) |
(...skipping 23 matching lines...) Expand all Loading... | |
127 | 70 |
128 | 71 |
129 # If this is going to be called DiffParser, it should be a re-useable parser. | 72 # If this is going to be called DiffParser, it should be a re-useable parser. |
130 # Otherwise we should rename it to ParsedDiff or just Diff. | 73 # Otherwise we should rename it to ParsedDiff or just Diff. |
131 class DiffParser(object): | 74 class DiffParser(object): |
132 """A parser for a patch file. | 75 """A parser for a patch file. |
133 | 76 |
134 The field "files" is a dict whose key is the filename and value is | 77 The field "files" is a dict whose key is the filename and value is |
135 a DiffFile object. | 78 a DiffFile object. |
136 """ | 79 """ |
80 conversion_patterns = ( | |
81 (re.compile("^diff --git \w/(.+) \w/(?P<FilePath>.+)"), lambda matched: "Index: " + matched.group('FilePath') + "\n"), | |
82 (re.compile("^new file.*"), lambda matched: "\n"), | |
83 (re.compile("^index (([0-9a-f]{7}\.\.[0-9a-f]{7})|([0-9a-f]{40}\.\.[0-9a -f]{40})) [0-9]{6}"), lambda matched: ("=" * 67) + "\n"), | |
84 (re.compile("^--- \w/(?P<FilePath>.+)"), lambda matched: "--- " + matche d.group('FilePath') + "\n"), | |
85 (re.compile("^\+\+\+ \w/(?P<FilePath>.+)"), lambda matched: "+++ " + mat ched.group('FilePath') + "\n"), | |
86 ) | |
87 | |
88 index_pattern = re.compile(r"^Index: (?P<FilePath>.+)") | |
89 lines_changed_pattern = re.compile(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P <NewStartLine>\d+)(,\d+)? @@") | |
90 diff_git_pattern = re.compile(r"^diff --git \w/") | |
Dirk Pranke
2014/10/08 01:45:47
I'm not generally a fan of class-level constants t
elecro
2014/10/08 08:14:16
Yeah, we can move them, but the FIXMEs mentioned t
| |
137 | 91 |
138 def __init__(self, diff_input): | 92 def __init__(self, diff_input): |
139 """Parses a diff. | 93 """Parses a diff. |
140 | 94 |
141 Args: | 95 Args: |
142 diff_input: An iterable object. | 96 diff_input: An iterable object. |
143 """ | 97 """ |
144 self.files = self._parse_into_diff_files(diff_input) | 98 self.files = self._parse_into_diff_files(diff_input) |
145 | 99 |
146 # FIXME: This function is way too long and needs to be broken up. | 100 # FIXME: This function is way too long and needs to be broken up. |
147 def _parse_into_diff_files(self, diff_input): | 101 def _parse_into_diff_files(self, diff_input): |
148 files = {} | 102 files = {} |
149 state = _INITIAL_STATE | 103 state = _INITIAL_STATE |
150 current_file = None | 104 current_file = None |
151 old_diff_line = None | 105 old_diff_line = None |
152 new_diff_line = None | 106 new_diff_line = None |
153 transform_line = get_diff_converter(diff_input) | 107 transform_line = self.get_diff_converter(diff_input) |
154 for line in diff_input: | 108 for line in diff_input: |
155 line = line.rstrip("\n") | 109 line = line.rstrip("\n") |
156 line = transform_line(line) | 110 line = transform_line(line) |
157 | 111 |
158 file_declaration = match(r"^Index: (?P<FilePath>.+)", line) | 112 file_declaration = self.index_pattern.match(line) |
159 if file_declaration: | 113 if file_declaration: |
160 filename = file_declaration.group('FilePath') | 114 filename = file_declaration.group('FilePath') |
161 current_file = DiffFile(filename) | 115 current_file = DiffFile(filename) |
162 files[filename] = current_file | 116 files[filename] = current_file |
163 state = _DECLARED_FILE_PATH | 117 state = _DECLARED_FILE_PATH |
164 continue | 118 continue |
165 | 119 |
166 lines_changed = match(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P<NewS tartLine>\d+)(,\d+)? @@", line) | 120 lines_changed = self.lines_changed_pattern.match(line) |
167 if lines_changed: | 121 if lines_changed: |
168 if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK: | 122 if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK: |
169 _log.error('Unexpected line change without file path ' | 123 _log.error('Unexpected line change without file path ' |
170 'declaration: %r' % line) | 124 'declaration: %r' % line) |
171 old_diff_line = int(lines_changed.group('OldStartLine')) | 125 old_diff_line = int(lines_changed.group('OldStartLine')) |
172 new_diff_line = int(lines_changed.group('NewStartLine')) | 126 new_diff_line = int(lines_changed.group('NewStartLine')) |
173 state = _PROCESSING_CHUNK | 127 state = _PROCESSING_CHUNK |
174 continue | 128 continue |
175 | 129 |
176 if state == _PROCESSING_CHUNK: | 130 if state == _PROCESSING_CHUNK: |
177 if line.startswith('+'): | 131 if line.startswith('+'): |
178 current_file.add_new_line(new_diff_line, line[1:]) | 132 current_file.add_new_line(new_diff_line, line[1:]) |
179 new_diff_line += 1 | 133 new_diff_line += 1 |
180 elif line.startswith('-'): | 134 elif line.startswith('-'): |
181 current_file.add_deleted_line(old_diff_line, line[1:]) | 135 current_file.add_deleted_line(old_diff_line, line[1:]) |
182 old_diff_line += 1 | 136 old_diff_line += 1 |
183 elif line.startswith(' '): | 137 elif line.startswith(' '): |
184 current_file.add_unchanged_line(old_diff_line, new_diff_line , line[1:]) | 138 current_file.add_unchanged_line(old_diff_line, new_diff_line , line[1:]) |
185 old_diff_line += 1 | 139 old_diff_line += 1 |
186 new_diff_line += 1 | 140 new_diff_line += 1 |
187 elif line == '\\ No newline at end of file': | 141 elif line == '\\ No newline at end of file': |
188 # Nothing to do. We may still have some added lines. | 142 # Nothing to do. We may still have some added lines. |
189 pass | 143 pass |
190 else: | 144 else: |
191 _log.error('Unexpected diff format when parsing a ' | 145 _log.error('Unexpected diff format when parsing a ' |
192 'chunk: %r' % line) | 146 'chunk: %r' % line) |
193 return files | 147 return files |
148 | |
149 @classmethod | |
150 def get_diff_converter(cls, lines): | |
151 """Gets a converter function of diff lines. | |
152 | |
153 Args: | |
154 lines: The lines of a diff file. | |
155 If this line is git formatted, we'll return a | |
156 converter from git to SVN. | |
157 """ | |
158 for i, line in enumerate(lines[:-1]): | |
159 if cls.diff_git_pattern.match(line): | |
160 return cls.git_diff_to_svn_diff | |
161 return cls.svn_diff_to_svn_diff | |
162 | |
163 @classmethod | |
164 def git_diff_to_svn_diff(cls, line): | |
165 """Converts a git formatted diff line to a svn formatted line. | |
166 | |
167 Args: | |
168 line: A string representing a line of the diff. | |
169 """ | |
170 for pattern, conversion in cls.conversion_patterns: | |
171 matched = pattern.match(line) | |
172 if matched: | |
173 return conversion(matched) | |
174 return line | |
175 | |
176 # This function exists so we can unittest get_diff_converter function | |
177 @classmethod | |
178 def svn_diff_to_svn_diff(cls, line): | |
179 return line | |
OLD | NEW |