Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Wrapper script to help run clang tools across Chromium code. | |
| 7 | |
| 8 The clang tool implementation doesn't take advantage of multiple cores, and if | |
| 9 it fails mysteriously in the middle, all the generated replacements will be | |
| 10 lost. | |
| 11 | |
| 12 Unfortunately, if the work is simply sharded across multiple cores by running | |
| 13 multiple RefactoringTools, problems arise when they attempt to rewrite a file at | |
| 14 the same time. To work around that, clang tools that are run using this tool | |
| 15 should output edits to stdout in the following format: | |
| 16 ==== BEGIN EDITS ==== | |
| 17 r:<file path>:<offset>:<length>:<replacement text> | |
| 18 r:<file path>:<offset>:<length>:<replacement text> | |
| 19 ...etc... | |
| 20 ==== END EDITS ==== | |
| 21 | |
| 22 Any generated edits are applied once the clang tool has finished running | |
| 23 across Chromium, regardless of whether some instances failed or not. | |
|
Nico
2013/03/29 22:30:36
Ok, I can see this being useful. Maybe we should t
dcheng
2013/03/29 22:42:55
I've had this discussion with them internally alre
| |
| 24 """ | |
| 25 | |
| 26 import collections | |
| 27 import functools | |
| 28 import multiprocessing | |
| 29 import os.path | |
| 30 import subprocess | |
| 31 import sys | |
| 32 | |
| 33 | |
| 34 Edit = collections.namedtuple( | |
| 35 'Edit', ('edit_type', 'offset', 'length', 'replacement')) | |
| 36 | |
| 37 | |
| 38 def _GetFilesFromGit(paths = None): | |
| 39 """Gets the list of files in the git repository. | |
| 40 | |
| 41 Args: | |
| 42 paths: Prefix filter for the returned paths. May contain multiple entries. | |
| 43 """ | |
| 44 args = ['git', 'ls-files'] | |
| 45 if paths: | |
| 46 args.extend(paths) | |
| 47 command = subprocess.Popen(args, stdout=subprocess.PIPE) | |
| 48 output, _ = command.communicate() | |
| 49 return output.splitlines() | |
| 50 | |
| 51 | |
| 52 def _ExecuteTool(toolname, build_directory, filename): | |
| 53 """Executes the tool. | |
| 54 | |
| 55 This is defined outside the class so it can be pickled for the multiprocessing | |
| 56 module. | |
| 57 | |
| 58 Args: | |
| 59 toolname: Path to the tool to execute. | |
| 60 build_directory: Directory that contains the compile database. | |
| 61 filename: The file to run the tool over. | |
| 62 | |
| 63 Returns: | |
| 64 A dictionary that must contain the key "status" and a boolean value | |
| 65 associated with it. | |
| 66 | |
| 67 If status is True, then the corresponding stdout is stored with the key | |
| 68 "stdout" in the dictionary. | |
| 69 | |
| 70 Otherwise, the filename and the output from stderr are associated with the | |
| 71 keys "filename" and "stderr" respectively. | |
| 72 """ | |
| 73 command = subprocess.Popen((toolname, '-p', build_directory, filename), | |
| 74 stdout=subprocess.PIPE, | |
| 75 stderr=subprocess.PIPE) | |
| 76 stdout, stderr = command.communicate() | |
| 77 if command.returncode != 0: | |
| 78 return {'status': False, 'filename': filename, 'stderr': stderr} | |
| 79 else: | |
| 80 return {'status': True, 'stdout': stdout} | |
| 81 | |
| 82 | |
| 83 class _CompilerDispatcher(object): | |
| 84 """Multiprocessing controller for running clang tools in parallel.""" | |
| 85 | |
| 86 def __init__(self, toolname, build_directory, filenames): | |
| 87 """Initializer method. | |
| 88 | |
| 89 Args: | |
| 90 toolname: Path to the tool to execute. | |
| 91 build_directory: Directory that contains the compile database. | |
| 92 filenames: The files to run the tool over. | |
| 93 """ | |
| 94 self.__toolname = toolname | |
| 95 self.__build_directory = build_directory | |
| 96 self.__filenames = filenames | |
| 97 self.__success_count = 0 | |
| 98 self.__failed_count = 0 | |
| 99 self.__edits = collections.defaultdict(list) | |
| 100 | |
| 101 @property | |
| 102 def edits(self): | |
| 103 return self.__edits | |
| 104 | |
| 105 def Run(self): | |
| 106 """Does the grunt work.""" | |
| 107 pool = multiprocessing.Pool() | |
| 108 result_iterator = pool.imap_unordered( | |
| 109 functools.partial(_ExecuteTool, self.__toolname, | |
| 110 self.__build_directory), | |
| 111 self.__filenames) | |
| 112 for result in result_iterator: | |
| 113 self.__ProcessResult(result) | |
| 114 sys.stdout.write('\n') | |
| 115 sys.stdout.flush() | |
| 116 | |
| 117 def __ProcessResult(self, result): | |
| 118 """Handles result processing. | |
| 119 | |
| 120 Args: | |
| 121 result: The result dictionary returned by _ExecuteTool. | |
| 122 """ | |
| 123 if result['status']: | |
| 124 self.__success_count += 1 | |
| 125 self.__AddEditsFromStdout(result['stdout']) | |
| 126 else: | |
| 127 self.__failed_count += 1 | |
| 128 sys.stdout.write('\nFailed to process %s\n' % result['filename']) | |
| 129 sys.stdout.write(result['stderr']) | |
| 130 sys.stdout.write('\n') | |
| 131 percentage = ( | |
| 132 float(self.__success_count + self.__failed_count) / | |
| 133 len(self.__filenames)) * 100 | |
| 134 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( | |
| 135 self.__success_count, self.__failed_count, percentage)) | |
| 136 sys.stdout.flush() | |
| 137 | |
| 138 def __AddEditsFromStdout(self, stdout): | |
| 139 """Extracts and add the list of edits generated on the tool's stdout. | |
| 140 | |
| 141 The expected format is documented at the top of this file. | |
| 142 Args: | |
| 143 stdout: The stdout from running the clang tool. | |
| 144 """ | |
| 145 lines = stdout.splitlines() | |
| 146 start_index = lines.index('==== BEGIN EDITS ====') | |
| 147 end_index = lines.index('==== END EDITS ====') | |
| 148 for line in lines[start_index + 1:end_index]: | |
| 149 edit_type, path, offset, length, replacement = line.split(':', 4) | |
| 150 # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile. | |
| 151 self.__edits[path[6:]].append( | |
| 152 Edit(edit_type, int(offset), int(length), replacement)) | |
| 153 | |
| 154 | |
| 155 def _ApplyEdits(edits): | |
| 156 """Apply the generated edits. | |
| 157 | |
| 158 Args: | |
| 159 edits: A dict mapping filenames to Edit instances that apply to that file. | |
| 160 """ | |
| 161 edit_count = 0 | |
| 162 for k, v in edits.iteritems(): | |
| 163 # Sort the edits and iterate through them in reverse order. Sorting allows | |
| 164 # duplicate edits to be quickly skipped, while reversing means that | |
| 165 # subsequent edits don't need to have their offsets updated with each edit | |
| 166 # applied. | |
| 167 v.sort() | |
| 168 last_edit = None | |
| 169 with open(k, 'rb+') as f: | |
| 170 contents = bytearray(f.read()) | |
| 171 for edit in reversed(v): | |
| 172 if edit == last_edit: | |
| 173 continue | |
| 174 last_edit = edit | |
| 175 contents[edit.offset:edit.offset + edit.length] = edit.replacement | |
| 176 if not edit.replacement: | |
| 177 _ExtendDeletionIfElementIsInList(contents, edit.offset) | |
| 178 edit_count += 1 | |
| 179 f.seek(0) | |
| 180 f.truncate() | |
| 181 f.write(contents) | |
| 182 print 'Applied %d edits to %d files' % (edit_count, len(edits)) | |
| 183 | |
| 184 | |
| 185 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) | |
| 186 | |
| 187 | |
| 188 def _ExtendDeletionIfElementIsInList(contents, offset): | |
| 189 """Extends the range of a deletion if the deleted element was part of a list. | |
| 190 | |
| 191 This rewriter helper makes it eay refactoring tools to remove elements from a | |
| 192 list. Even if a matcher callback knows that it is removing an element from a | |
| 193 list, it may not have enough information to accurately remove the list | |
| 194 element; for example, another matcher callback may end up removing an adjacent | |
| 195 list element, or all the list elements may end up being removed. | |
| 196 | |
| 197 With this helper, refactoring tools can simply remove the list element and not | |
| 198 worry about having to include the comma in the replacement. | |
| 199 | |
| 200 Args: | |
| 201 contents: A bytearray with the deletion already applied. | |
| 202 offset: The offset in the bytearray where the deleted range used to be. | |
| 203 """ | |
| 204 may_be_first_initializer = False | |
| 205 left_trim_count = 0 | |
| 206 for byte in reversed(contents[:offset]): | |
| 207 left_trim_count += 1 | |
| 208 if byte in _WHITESPACE_BYTES: | |
| 209 continue | |
| 210 if byte == 0x2c: # Comma | |
| 211 # A preceding comma means that this is not the first element of a list. | |
| 212 # Extend the deletion leftwards to include the comma. | |
| 213 del contents[offset - left_trim_count:offset] | |
| 214 return | |
| 215 if byte == 0x3a: # Colon | |
| 216 # A preceding colon signals that this may be the first element of an | |
| 217 # initializer list. | |
| 218 may_be_first_initializer = True | |
| 219 break | |
| 220 # Break the loop on all other characters. This is either: | |
| 221 # - not a list element. | |
| 222 # - the only element left in the list. | |
| 223 # - the first element in the list. | |
| 224 break | |
| 225 | |
| 226 right_trim_count = 0 | |
| 227 for byte in contents[offset:]: | |
| 228 right_trim_count += 1 | |
| 229 if byte in _WHITESPACE_BYTES: | |
| 230 continue | |
| 231 if byte == 0x2c: # Comma | |
| 232 # Removing the first element of a list, so extend the deletion rightwards | |
| 233 # to include the trailing comma. | |
| 234 del contents[offset:offset + right_trim_count] | |
| 235 return | |
| 236 if may_be_first_initializer and byte == 0x7b: # Left brace | |
| 237 # Removing the only initializer left, so extend the deletion leftwards to | |
| 238 # include the preceding colon. | |
| 239 del contents[offset - left_trim_count:offset] | |
| 240 return | |
| 241 # Break the loop on all other characters. No special handling is required at | |
| 242 # this point. | |
| 243 break | |
| 244 | |
| 245 | |
| 246 def main(argv): | |
| 247 if len(argv) < 2: | |
| 248 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' | |
| 249 print ' <clang tool> is the clang tool that should be run.' | |
| 250 print ' <compile db> is the directory that contains the compile database' | |
| 251 print ' <path 1> <path2> ... can be used to filter what files are edited' | |
| 252 sys.exit(1) | |
| 253 | |
| 254 # TODO(dcheng): Assert that we're running from chromium/src. | |
| 255 filenames = frozenset(_GetFilesFromGit(argv[2:])) | |
| 256 # Filter out files that aren't C/C++/Obj-C/Obj-C++. | |
| 257 extensions = frozenset(('.c', '.cc', '.m', '.mm')) | |
| 258 dispatcher = _CompilerDispatcher(argv[0], argv[1], | |
| 259 [f for f in filenames | |
| 260 if os.path.splitext(f)[1] in extensions]) | |
| 261 dispatcher.Run() | |
| 262 # Filter out edits to files that aren't in the git repository, since it's not | |
| 263 # useful to modify files that aren't under source control--typically, these | |
| 264 # are generated files or files in a git submodule that's not part of Chromium. | |
| 265 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() | |
| 266 if k in filenames}) | |
| 267 # TODO(dcheng): Consider clang-formatting the result to avoid egregious style | |
| 268 # violations. | |
| 269 | |
| 270 | |
| 271 if __name__ == '__main__': | |
| 272 sys.exit(main(sys.argv[1:])) | |
| OLD | NEW |