OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 """Wrapper script to help run clang tools across Chromium code. | 5 """Wrapper script to help run clang tools across Chromium code. |
6 | 6 |
7 How to use this tool: | 7 How to use this tool: |
8 If you want to run the tool across all Chromium code: | 8 If you want to run the tool across all Chromium code: |
9 run_tool.py <tool> <path/to/compiledb> | 9 run_tool.py <tool> <path/to/compiledb> |
10 | 10 |
11 If you want to include all files mentioned in the compilation database: | 11 If you want to include all files mentioned in the compilation database |
12 (this will also include generated files, unlike the previous command): | |
12 run_tool.py <tool> <path/to/compiledb> --all | 13 run_tool.py <tool> <path/to/compiledb> --all |
13 | 14 |
14 If you only want to run the tool across just chrome/browser and content/browser: | 15 If you only want to run the tool across just chrome/browser and content/browser: |
15 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser | 16 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser |
16 | 17 |
17 Please see https://chromium.googlesource.com/chromium/src/+/master/docs/clang_to ol_refactoring.md for more | 18 Please see docs/clang_tool_refactoring.md for more information, which documents |
18 information, which documents the entire automated refactoring flow in Chromium. | 19 the entire automated refactoring flow in Chromium. |
19 | 20 |
20 Why use this tool: | 21 Why use this tool: |
21 The clang tool implementation doesn't take advantage of multiple cores, and if | 22 The clang tool implementation doesn't take advantage of multiple cores, and if |
22 it fails mysteriously in the middle, all the generated replacements will be | 23 it fails mysteriously in the middle, all the generated replacements will be |
23 lost. | 24 lost. Additionally, if the work is simply sharded across multiple cores by |
25 running multiple RefactoringTools, problems arise when they attempt to rewrite a | |
26 file at the same time. | |
24 | 27 |
25 Unfortunately, if the work is simply sharded across multiple cores by running | 28 run_tool.py will |
26 multiple RefactoringTools, problems arise when they attempt to rewrite a file at | 29 1) run multiple instances of clang tool in parallel |
27 the same time. To work around that, clang tools that are run using this tool | 30 2) gather stdout from clang tool invocations |
28 should output edits to stdout in the following format: | 31 3) "atomically" forward #2 to stdout |
29 | 32 |
30 ==== BEGIN EDITS ==== | 33 Output of run_tool.py can be piped into extract_edits.py and then into |
31 r:<file path>:<offset>:<length>:<replacement text> | 34 apply_edits.py. These tools will extract individual edits and apply them to the |
32 r:<file path>:<offset>:<length>:<replacement text> | 35 source files. These tools assumme the clang tool emits the edits in the |
danakj
2016/12/23 15:45:06
assume
Łukasz Anforowicz
2016/12/27 22:33:27
Done.
| |
33 ...etc... | 36 following format: |
34 ==== END EDITS ==== | 37 ... |
38 ==== BEGIN EDITS ==== | |
39 r:::<file path>:::<offset>:::<length>:::<replacement text> | |
40 r:::<file path>:::<offset>:::<length>:::<replacement text> | |
41 ...etc... | |
42 ==== END EDITS ==== | |
43 ... | |
35 | 44 |
36 Any generated edits are applied once the clang tool has finished running | 45 extract_edits.py extracts only lines between BEGIN/END EDITS markers |
37 across Chromium, regardless of whether some instances failed or not. | 46 apply_edits.py reads edit lines from stdin and applies the edits |
38 """ | 47 """ |
39 | 48 |
40 import argparse | 49 import argparse |
41 import collections | |
42 import functools | 50 import functools |
43 import multiprocessing | 51 import multiprocessing |
44 import os | 52 import os |
45 import os.path | 53 import os.path |
46 import subprocess | 54 import subprocess |
47 import sys | 55 import sys |
56 import threading | |
48 | 57 |
49 script_dir = os.path.dirname(os.path.realpath(__file__)) | 58 script_dir = os.path.dirname(os.path.realpath(__file__)) |
50 tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) | 59 tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) |
51 sys.path.insert(0, tool_dir) | 60 sys.path.insert(0, tool_dir) |
52 | 61 |
53 from clang import compile_db | 62 from clang import compile_db |
54 | 63 |
55 Edit = collections.namedtuple('Edit', | |
56 ('edit_type', 'offset', 'length', 'replacement')) | |
57 | |
58 | 64 |
59 def _GetFilesFromGit(paths=None): | 65 def _GetFilesFromGit(paths=None): |
60 """Gets the list of files in the git repository. | 66 """Gets the list of files in the git repository. |
61 | 67 |
62 Args: | 68 Args: |
63 paths: Prefix filter for the returned paths. May contain multiple entries. | 69 paths: Prefix filter for the returned paths. May contain multiple entries. |
64 """ | 70 """ |
65 args = [] | 71 args = [] |
66 if sys.platform == 'win32': | 72 if sys.platform == 'win32': |
67 args.append('git.bat') | 73 args.append('git.bat') |
(...skipping 10 matching lines...) Expand all Loading... | |
78 def _GetFilesFromCompileDB(build_directory): | 84 def _GetFilesFromCompileDB(build_directory): |
79 """ Gets the list of files mentioned in the compilation database. | 85 """ Gets the list of files mentioned in the compilation database. |
80 | 86 |
81 Args: | 87 Args: |
82 build_directory: Directory that contains the compile database. | 88 build_directory: Directory that contains the compile database. |
83 """ | 89 """ |
84 return [os.path.join(entry['directory'], entry['file']) | 90 return [os.path.join(entry['directory'], entry['file']) |
85 for entry in compile_db.Read(build_directory)] | 91 for entry in compile_db.Read(build_directory)] |
86 | 92 |
87 | 93 |
88 def _ExtractEditsFromStdout(build_directory, stdout): | 94 _atomically_forward_tool_output_mutex = threading.Lock() |
89 """Extracts generated list of edits from the tool's stdout. | 95 def _AtomicallyForwardToolOutput(stdout): |
danakj
2016/12/23 15:45:06
name this |text| or something not |stdout|, which
dcheng
2016/12/27 07:30:14
Rather than doing this, should we just 'return' th
Łukasz Anforowicz
2016/12/27 22:33:27
Done (printing to stdout from the dispatcher). Th
| |
96 """Atomically forwards contents of |stdout| arg to stdout. | |
90 | 97 |
91 The expected format is documented at the top of this file. | 98 Atomically means that we don't allow 2 *interleaved* forwardings to take |
92 | 99 place. |
93 Args: | |
94 build_directory: Directory that contains the compile database. Used to | |
95 normalize the filenames. | |
96 stdout: The stdout from running the clang tool. | |
97 | |
98 Returns: | |
99 A dictionary mapping filenames to the associated edits. | |
100 """ | 100 """ |
101 lines = stdout.splitlines() | 101 with _atomically_forward_tool_output_mutex: |
102 start_index = lines.index('==== BEGIN EDITS ====') | 102 sys.stdout.write(stdout) |
103 end_index = lines.index('==== END EDITS ====') | 103 sys.stdout.flush() |
104 edits = collections.defaultdict(list) | |
105 for line in lines[start_index + 1:end_index]: | |
106 try: | |
107 edit_type, path, offset, length, replacement = line.split(':::', 4) | |
108 replacement = replacement.replace('\0', '\n') | |
109 # Normalize the file path emitted by the clang tool. | |
110 path = os.path.realpath(os.path.join(build_directory, path)) | |
111 edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) | |
112 except ValueError: | |
113 print 'Unable to parse edit: %s' % line | |
114 return edits | |
115 | 104 |
116 | 105 |
117 def _ExecuteTool(toolname, tool_args, build_directory, filename): | 106 def _ExecuteTool(toolname, tool_args, build_directory, filename): |
118 """Executes the tool. | 107 """Executes the tool. |
119 | 108 |
120 This is defined outside the class so it can be pickled for the multiprocessing | 109 This is defined outside the class so it can be pickled for the multiprocessing |
121 module. | 110 module. |
122 | 111 |
123 Args: | 112 Args: |
124 toolname: Path to the tool to execute. | 113 toolname: Path to the tool to execute. |
125 tool_args: Arguments to be passed to the tool. Can be None. | 114 tool_args: Arguments to be passed to the tool. Can be None. |
126 build_directory: Directory that contains the compile database. | 115 build_directory: Directory that contains the compile database. |
127 filename: The file to run the tool over. | 116 filename: The file to run the tool over. |
128 | 117 |
129 Returns: | 118 Returns: |
130 A dictionary that must contain the key "status" and a boolean value | 119 A dictionary that must contain the key "status" and a boolean value |
131 associated with it. | 120 associated with it. |
132 | 121 |
133 If status is True, then the generated edits are stored with the key "edits" | 122 If status is True, then the generated edits are stored with the key "edits" |
danakj
2016/12/23 15:45:06
needs update
Łukasz Anforowicz
2016/12/27 22:33:27
Done. Thanks for catching this.
| |
134 in the dictionary. | 123 in the dictionary. |
135 | 124 |
136 Otherwise, the filename and the output from stderr are associated with the | 125 Otherwise, the filename and the output from stderr are associated with the |
137 keys "filename" and "stderr" respectively. | 126 keys "filename" and "stderr" respectively. |
138 """ | 127 """ |
139 args = [toolname, '-p', build_directory, filename] | 128 args = [toolname, '-p', build_directory, filename] |
140 if (tool_args): | 129 if (tool_args): |
141 args.extend(tool_args) | 130 args.extend(tool_args) |
142 command = subprocess.Popen( | 131 command = subprocess.Popen( |
143 args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 132 args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
144 stdout, stderr = command.communicate() | 133 stdout, stderr = command.communicate() |
145 if command.returncode != 0: | 134 if command.returncode != 0: |
146 return {'status': False, 'filename': filename, 'stderr': stderr} | 135 return {'status': False, 'filename': filename, 'stderr': stderr} |
147 else: | 136 else: |
148 return {'status': True, | 137 _AtomicallyForwardToolOutput(stdout) |
149 'edits': _ExtractEditsFromStdout(build_directory, stdout)} | 138 return {'status': True, 'filename': filename} |
150 | 139 |
151 | 140 |
152 class _CompilerDispatcher(object): | 141 class _CompilerDispatcher(object): |
153 """Multiprocessing controller for running clang tools in parallel.""" | 142 """Multiprocessing controller for running clang tools in parallel.""" |
154 | 143 |
155 def __init__(self, toolname, tool_args, build_directory, filenames): | 144 def __init__(self, toolname, tool_args, build_directory, filenames): |
156 """Initializer method. | 145 """Initializer method. |
157 | 146 |
158 Args: | 147 Args: |
159 toolname: Path to the tool to execute. | 148 toolname: Path to the tool to execute. |
160 tool_args: Arguments to be passed to the tool. Can be None. | 149 tool_args: Arguments to be passed to the tool. Can be None. |
161 build_directory: Directory that contains the compile database. | 150 build_directory: Directory that contains the compile database. |
162 filenames: The files to run the tool over. | 151 filenames: The files to run the tool over. |
163 """ | 152 """ |
164 self.__toolname = toolname | 153 self.__toolname = toolname |
165 self.__tool_args = tool_args | 154 self.__tool_args = tool_args |
166 self.__build_directory = build_directory | 155 self.__build_directory = build_directory |
167 self.__filenames = filenames | 156 self.__filenames = filenames |
168 self.__success_count = 0 | 157 self.__success_count = 0 |
169 self.__failed_count = 0 | 158 self.__failed_count = 0 |
170 self.__edit_count = 0 | |
171 self.__edits = collections.defaultdict(list) | |
172 | |
173 @property | |
174 def edits(self): | |
175 return self.__edits | |
176 | 159 |
177 @property | 160 @property |
178 def failed_count(self): | 161 def failed_count(self): |
179 return self.__failed_count | 162 return self.__failed_count |
180 | 163 |
181 def Run(self): | 164 def Run(self): |
182 """Does the grunt work.""" | 165 """Does the grunt work.""" |
183 pool = multiprocessing.Pool() | 166 pool = multiprocessing.Pool() |
184 result_iterator = pool.imap_unordered( | 167 result_iterator = pool.imap_unordered( |
185 functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, | 168 functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, |
186 self.__build_directory), | 169 self.__build_directory), |
187 self.__filenames) | 170 self.__filenames) |
188 for result in result_iterator: | 171 for result in result_iterator: |
189 self.__ProcessResult(result) | 172 self.__ProcessResult(result) |
190 sys.stdout.write('\n') | 173 sys.stderr.write('\n') |
191 sys.stdout.flush() | 174 sys.stderr.flush() |
danakj
2016/12/23 15:45:06
stderr doesnt need flush?
Łukasz Anforowicz
2016/12/27 22:33:26
Done.
| |
192 | 175 |
193 def __ProcessResult(self, result): | 176 def __ProcessResult(self, result): |
194 """Handles result processing. | 177 """Handles result processing. |
195 | 178 |
196 Args: | 179 Args: |
197 result: The result dictionary returned by _ExecuteTool. | 180 result: The result dictionary returned by _ExecuteTool. |
198 """ | 181 """ |
199 if result['status']: | 182 if result['status']: |
200 self.__success_count += 1 | 183 self.__success_count += 1 |
201 for k, v in result['edits'].iteritems(): | |
202 self.__edits[k].extend(v) | |
203 self.__edit_count += len(v) | |
204 else: | 184 else: |
205 self.__failed_count += 1 | 185 self.__failed_count += 1 |
206 sys.stdout.write('\nFailed to process %s\n' % result['filename']) | 186 sys.stderr.write('\nFailed to process %s\n' % result['filename']) |
207 sys.stdout.write(result['stderr']) | 187 sys.stderr.write(result['stderr']) |
208 sys.stdout.write('\n') | 188 sys.stderr.write('\n') |
209 percentage = (float(self.__success_count + self.__failed_count) / | 189 done_count = self.__success_count + self.__failed_count |
210 len(self.__filenames)) * 100 | 190 percentage = (float(done_count) / len(self.__filenames)) * 100 |
211 sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % | 191 sys.stderr.write( |
212 (self.__success_count, self.__failed_count, | 192 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % |
213 self.__edit_count, percentage)) | 193 (done_count, self.__toolname, self.__failed_count, percentage)) |
214 sys.stdout.flush() | 194 sys.stderr.flush() |
danakj
2016/12/23 15:45:06
same
Łukasz Anforowicz
2016/12/27 22:33:27
Done.
| |
215 | |
216 | |
217 def _ApplyEdits(edits): | |
218 """Apply the generated edits. | |
219 | |
220 Args: | |
221 edits: A dict mapping filenames to Edit instances that apply to that file. | |
222 """ | |
223 edit_count = 0 | |
224 for k, v in edits.iteritems(): | |
225 # Sort the edits and iterate through them in reverse order. Sorting allows | |
226 # duplicate edits to be quickly skipped, while reversing means that | |
227 # subsequent edits don't need to have their offsets updated with each edit | |
228 # applied. | |
229 v.sort() | |
230 last_edit = None | |
231 with open(k, 'rb+') as f: | |
232 contents = bytearray(f.read()) | |
233 for edit in reversed(v): | |
234 if edit == last_edit: | |
235 continue | |
236 last_edit = edit | |
237 contents[edit.offset:edit.offset + edit.length] = edit.replacement | |
238 if not edit.replacement: | |
239 _ExtendDeletionIfElementIsInList(contents, edit.offset) | |
240 edit_count += 1 | |
241 f.seek(0) | |
242 f.truncate() | |
243 f.write(contents) | |
244 print 'Applied %d edits to %d files' % (edit_count, len(edits)) | |
245 | |
246 | |
247 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) | |
248 | |
249 | |
250 def _ExtendDeletionIfElementIsInList(contents, offset): | |
251 """Extends the range of a deletion if the deleted element was part of a list. | |
252 | |
253 This rewriter helper makes it easy for refactoring tools to remove elements | |
254 from a list. Even if a matcher callback knows that it is removing an element | |
255 from a list, it may not have enough information to accurately remove the list | |
256 element; for example, another matcher callback may end up removing an adjacent | |
257 list element, or all the list elements may end up being removed. | |
258 | |
259 With this helper, refactoring tools can simply remove the list element and not | |
260 worry about having to include the comma in the replacement. | |
261 | |
262 Args: | |
263 contents: A bytearray with the deletion already applied. | |
264 offset: The offset in the bytearray where the deleted range used to be. | |
265 """ | |
266 char_before = char_after = None | |
267 left_trim_count = 0 | |
268 for byte in reversed(contents[:offset]): | |
269 left_trim_count += 1 | |
270 if byte in _WHITESPACE_BYTES: | |
271 continue | |
272 if byte in (ord(','), ord(':'), ord('('), ord('{')): | |
273 char_before = chr(byte) | |
274 break | |
275 | |
276 right_trim_count = 0 | |
277 for byte in contents[offset:]: | |
278 right_trim_count += 1 | |
279 if byte in _WHITESPACE_BYTES: | |
280 continue | |
281 if byte == ord(','): | |
282 char_after = chr(byte) | |
283 break | |
284 | |
285 if char_before: | |
286 if char_after: | |
287 del contents[offset:offset + right_trim_count] | |
288 elif char_before in (',', ':'): | |
289 del contents[offset - left_trim_count:offset] | |
290 | 195 |
291 | 196 |
292 def main(): | 197 def main(): |
293 parser = argparse.ArgumentParser() | 198 parser = argparse.ArgumentParser() |
294 parser.add_argument('tool', help='clang tool to run') | 199 parser.add_argument('tool', help='clang tool to run') |
295 parser.add_argument('--all', action='store_true') | 200 parser.add_argument('--all', action='store_true') |
296 parser.add_argument( | 201 parser.add_argument( |
297 '--generate-compdb', | 202 '--generate-compdb', |
298 action='store_true', | 203 action='store_true', |
299 help='regenerate the compile database before running the tool') | 204 help='regenerate the compile database before running the tool') |
(...skipping 12 matching lines...) Expand all Loading... | |
312 os.environ['PATH'] = '%s%s%s' % ( | 217 os.environ['PATH'] = '%s%s%s' % ( |
313 os.path.abspath(os.path.join( | 218 os.path.abspath(os.path.join( |
314 os.path.dirname(__file__), | 219 os.path.dirname(__file__), |
315 '../../../third_party/llvm-build/Release+Asserts/bin')), | 220 '../../../third_party/llvm-build/Release+Asserts/bin')), |
316 os.pathsep, | 221 os.pathsep, |
317 os.environ['PATH']) | 222 os.environ['PATH']) |
318 | 223 |
319 if args.generate_compdb: | 224 if args.generate_compdb: |
320 compile_db.GenerateWithNinja(args.compile_database) | 225 compile_db.GenerateWithNinja(args.compile_database) |
321 | 226 |
322 filenames = set(_GetFilesFromGit(args.path_filter)) | |
323 if args.all: | 227 if args.all: |
324 source_filenames = set(_GetFilesFromCompileDB(args.compile_database)) | 228 source_filenames = set(_GetFilesFromCompileDB(args.compile_database)) |
325 else: | 229 else: |
230 git_filenames = set(_GetFilesFromGit(args.path_filter)) | |
326 # Filter out files that aren't C/C++/Obj-C/Obj-C++. | 231 # Filter out files that aren't C/C++/Obj-C/Obj-C++. |
327 extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) | 232 extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) |
328 source_filenames = [f | 233 source_filenames = [f |
329 for f in filenames | 234 for f in git_filenames |
330 if os.path.splitext(f)[1] in extensions] | 235 if os.path.splitext(f)[1] in extensions] |
236 | |
331 dispatcher = _CompilerDispatcher(args.tool, args.tool_args, | 237 dispatcher = _CompilerDispatcher(args.tool, args.tool_args, |
332 args.compile_database, | 238 args.compile_database, |
333 source_filenames) | 239 source_filenames) |
334 dispatcher.Run() | 240 dispatcher.Run() |
335 # Filter out edits to files that aren't in the git repository, since it's not | |
336 # useful to modify files that aren't under source control--typically, these | |
337 # are generated files or files in a git submodule that's not part of Chromium. | |
338 _ApplyEdits({k: v | |
339 for k, v in dispatcher.edits.iteritems() | |
340 if os.path.realpath(k) in filenames}) | |
341 return -dispatcher.failed_count | 241 return -dispatcher.failed_count |
342 | 242 |
343 | 243 |
344 if __name__ == '__main__': | 244 if __name__ == '__main__': |
345 sys.exit(main()) | 245 sys.exit(main()) |
OLD | NEW |