OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Wrapper script to help run clang tools across Chromium code. | |
7 | |
8 The clang tool implementation doesn't take advantage of multiple cores, and if | |
9 it fails mysteriously in the middle, all the generated replacements will be | |
10 lost. | |
11 | |
12 Unfortunately, if the work is simply sharded across multiple cores by running | |
13 multiple RefactoringTools, problems arise when they attempt to rewrite a file at | |
14 the same time. To work around that, clang tools that are run using this tool | |
15 should output edits to stdout in the following format: | |
16 ==== BEGIN EDITS ==== | |
17 r:<file path>:<offset>:<length>:<replacement text> | |
18 r:<file path>:<offset>:<length>:<replacement text> | |
19 ...etc... | |
20 ==== END EDITS ==== | |
21 | |
22 Any generated edits are applied once the clang tool has finished running | |
23 across Chromium, regardless of whether some instances failed or not. | |
Nico
2013/03/29 22:30:36
Ok, I can see this being useful. Maybe we should t
dcheng
2013/03/29 22:42:55
I've had this discussion with them internally alre
| |
24 """ | |
25 | |
26 import collections | |
27 import functools | |
28 import multiprocessing | |
29 import os.path | |
30 import subprocess | |
31 import sys | |
32 | |
33 | |
34 Edit = collections.namedtuple( | |
35 'Edit', ('edit_type', 'offset', 'length', 'replacement')) | |
36 | |
37 | |
38 def _GetFilesFromGit(paths = None): | |
39 """Gets the list of files in the git repository. | |
40 | |
41 Args: | |
42 paths: Prefix filter for the returned paths. May contain multiple entries. | |
43 """ | |
44 args = ['git', 'ls-files'] | |
45 if paths: | |
46 args.extend(paths) | |
47 command = subprocess.Popen(args, stdout=subprocess.PIPE) | |
48 output, _ = command.communicate() | |
49 return output.splitlines() | |
50 | |
51 | |
52 def _ExecuteTool(toolname, build_directory, filename): | |
53 """Executes the tool. | |
54 | |
55 This is defined outside the class so it can be pickled for the multiprocessing | |
56 module. | |
57 | |
58 Args: | |
59 toolname: Path to the tool to execute. | |
60 build_directory: Directory that contains the compile database. | |
61 filename: The file to run the tool over. | |
62 | |
63 Returns: | |
64 A dictionary that must contain the key "status" and a boolean value | |
65 associated with it. | |
66 | |
67 If status is True, then the corresponding stdout is stored with the key | |
68 "stdout" in the dictionary. | |
69 | |
70 Otherwise, the filename and the output from stderr are associated with the | |
71 keys "filename" and "stderr" respectively. | |
72 """ | |
73 command = subprocess.Popen((toolname, '-p', build_directory, filename), | |
74 stdout=subprocess.PIPE, | |
75 stderr=subprocess.PIPE) | |
76 stdout, stderr = command.communicate() | |
77 if command.returncode != 0: | |
78 return {'status': False, 'filename': filename, 'stderr': stderr} | |
79 else: | |
80 return {'status': True, 'stdout': stdout} | |
81 | |
82 | |
83 class _CompilerDispatcher(object): | |
84 """Multiprocessing controller for running clang tools in parallel.""" | |
85 | |
86 def __init__(self, toolname, build_directory, filenames): | |
87 """Initializer method. | |
88 | |
89 Args: | |
90 toolname: Path to the tool to execute. | |
91 build_directory: Directory that contains the compile database. | |
92 filenames: The files to run the tool over. | |
93 """ | |
94 self.__toolname = toolname | |
95 self.__build_directory = build_directory | |
96 self.__filenames = filenames | |
97 self.__success_count = 0 | |
98 self.__failed_count = 0 | |
99 self.__edits = collections.defaultdict(list) | |
100 | |
101 @property | |
102 def edits(self): | |
103 return self.__edits | |
104 | |
105 def Run(self): | |
106 """Does the grunt work.""" | |
107 pool = multiprocessing.Pool() | |
108 result_iterator = pool.imap_unordered( | |
109 functools.partial(_ExecuteTool, self.__toolname, | |
110 self.__build_directory), | |
111 self.__filenames) | |
112 for result in result_iterator: | |
113 self.__ProcessResult(result) | |
114 sys.stdout.write('\n') | |
115 sys.stdout.flush() | |
116 | |
117 def __ProcessResult(self, result): | |
118 """Handles result processing. | |
119 | |
120 Args: | |
121 result: The result dictionary returned by _ExecuteTool. | |
122 """ | |
123 if result['status']: | |
124 self.__success_count += 1 | |
125 self.__AddEditsFromStdout(result['stdout']) | |
126 else: | |
127 self.__failed_count += 1 | |
128 sys.stdout.write('\nFailed to process %s\n' % result['filename']) | |
129 sys.stdout.write(result['stderr']) | |
130 sys.stdout.write('\n') | |
131 percentage = ( | |
132 float(self.__success_count + self.__failed_count) / | |
133 len(self.__filenames)) * 100 | |
134 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( | |
135 self.__success_count, self.__failed_count, percentage)) | |
136 sys.stdout.flush() | |
137 | |
138 def __AddEditsFromStdout(self, stdout): | |
139 """Extracts and add the list of edits generated on the tool's stdout. | |
140 | |
141 The expected format is documented at the top of this file. | |
142 Args: | |
143 stdout: The stdout from running the clang tool. | |
144 """ | |
145 lines = stdout.splitlines() | |
146 start_index = lines.index('==== BEGIN EDITS ====') | |
147 end_index = lines.index('==== END EDITS ====') | |
148 for line in lines[start_index + 1:end_index]: | |
149 edit_type, path, offset, length, replacement = line.split(':', 4) | |
150 # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile. | |
151 self.__edits[path[6:]].append( | |
152 Edit(edit_type, int(offset), int(length), replacement)) | |
153 | |
154 | |
155 def _ApplyEdits(edits): | |
156 """Apply the generated edits. | |
157 | |
158 Args: | |
159 edits: A dict mapping filenames to Edit instances that apply to that file. | |
160 """ | |
161 edit_count = 0 | |
162 for k, v in edits.iteritems(): | |
163 # Sort the edits and iterate through them in reverse order. Sorting allows | |
164 # duplicate edits to be quickly skipped, while reversing means that | |
165 # subsequent edits don't need to have their offsets updated with each edit | |
166 # applied. | |
167 v.sort() | |
168 last_edit = None | |
169 with open(k, 'rb+') as f: | |
170 contents = bytearray(f.read()) | |
171 for edit in reversed(v): | |
172 if edit == last_edit: | |
173 continue | |
174 last_edit = edit | |
175 contents[edit.offset:edit.offset + edit.length] = edit.replacement | |
176 if not edit.replacement: | |
177 _ExtendDeletionIfElementIsInList(contents, edit.offset) | |
178 edit_count += 1 | |
179 f.seek(0) | |
180 f.truncate() | |
181 f.write(contents) | |
182 print 'Applied %d edits to %d files' % (edit_count, len(edits)) | |
183 | |
184 | |
185 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) | |
186 | |
187 | |
188 def _ExtendDeletionIfElementIsInList(contents, offset): | |
189 """Extends the range of a deletion if the deleted element was part of a list. | |
190 | |
191 This rewriter helper makes it eay refactoring tools to remove elements from a | |
192 list. Even if a matcher callback knows that it is removing an element from a | |
193 list, it may not have enough information to accurately remove the list | |
194 element; for example, another matcher callback may end up removing an adjacent | |
195 list element, or all the list elements may end up being removed. | |
196 | |
197 With this helper, refactoring tools can simply remove the list element and not | |
198 worry about having to include the comma in the replacement. | |
199 | |
200 Args: | |
201 contents: A bytearray with the deletion already applied. | |
202 offset: The offset in the bytearray where the deleted range used to be. | |
203 """ | |
204 may_be_first_initializer = False | |
205 left_trim_count = 0 | |
206 for byte in reversed(contents[:offset]): | |
207 left_trim_count += 1 | |
208 if byte in _WHITESPACE_BYTES: | |
209 continue | |
210 if byte == 0x2c: # Comma | |
211 # A preceding comma means that this is not the first element of a list. | |
212 # Extend the deletion leftwards to include the comma. | |
213 del contents[offset - left_trim_count:offset] | |
214 return | |
215 if byte == 0x3a: # Colon | |
216 # A preceding colon signals that this may be the first element of an | |
217 # initializer list. | |
218 may_be_first_initializer = True | |
219 break | |
220 # Break the loop on all other characters. This is either: | |
221 # - not a list element. | |
222 # - the only element left in the list. | |
223 # - the first element in the list. | |
224 break | |
225 | |
226 right_trim_count = 0 | |
227 for byte in contents[offset:]: | |
228 right_trim_count += 1 | |
229 if byte in _WHITESPACE_BYTES: | |
230 continue | |
231 if byte == 0x2c: # Comma | |
232 # Removing the first element of a list, so extend the deletion rightwards | |
233 # to include the trailing comma. | |
234 del contents[offset:offset + right_trim_count] | |
235 return | |
236 if may_be_first_initializer and byte == 0x7b: # Left brace | |
237 # Removing the only initializer left, so extend the deletion leftwards to | |
238 # include the preceding colon. | |
239 del contents[offset - left_trim_count:offset] | |
240 return | |
241 # Break the loop on all other characters. No special handling is required at | |
242 # this point. | |
243 break | |
244 | |
245 | |
246 def main(argv): | |
247 if len(argv) < 2: | |
248 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' | |
249 print ' <clang tool> is the clang tool that should be run.' | |
250 print ' <compile db> is the directory that contains the compile database' | |
251 print ' <path 1> <path2> ... can be used to filter what files are edited' | |
252 sys.exit(1) | |
253 | |
254 # TODO(dcheng): Assert that we're running from chromium/src. | |
255 filenames = frozenset(_GetFilesFromGit(argv[2:])) | |
256 # Filter out files that aren't C/C++/Obj-C/Obj-C++. | |
257 extensions = frozenset(('.c', '.cc', '.m', '.mm')) | |
258 dispatcher = _CompilerDispatcher(argv[0], argv[1], | |
259 [f for f in filenames | |
260 if os.path.splitext(f)[1] in extensions]) | |
261 dispatcher.Run() | |
262 # Filter out edits to files that aren't in the git repository, since it's not | |
263 # useful to modify files that aren't under source control--typically, these | |
264 # are generated files or files in a git submodule that's not part of Chromium. | |
265 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() | |
266 if k in filenames}) | |
267 # TODO(dcheng): Consider clang-formatting the result to avoid egregious style | |
268 # violations. | |
269 | |
270 | |
271 if __name__ == '__main__': | |
272 sys.exit(main(sys.argv[1:])) | |
OLD | NEW |