OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Wrapper script to help run clang tools across Chromium code. |
| 7 |
| 8 The clang tool implementation doesn't take advantage of multiple cores, and if |
| 9 it fails mysteriously in the middle, all the generated replacements will be |
| 10 lost. |
| 11 |
| 12 Unfortunately, if the work is simply sharded across multiple cores by running |
| 13 multiple RefactoringTools, problems arise when they attempt to rewrite a file at |
| 14 the same time. To work around that, clang tools that are run using this tool |
| 15 should output edits to stdout in the following format: |
| 16 ==== BEGIN EDITS ==== |
| 17 r:<file path>:<offset>:<length>:<replacement text> |
| 18 r:<file path>:<offset>:<length>:<replacement text> |
| 19 ...etc... |
| 20 ==== END EDITS ==== |
| 21 |
| 22 Any generated edits are applied once the clang tool has finished running |
| 23 across Chromium, regardless of whether some instances failed or not. |
| 24 """ |
| 25 |
| 26 import collections |
| 27 import functools |
| 28 import multiprocessing |
| 29 import os.path |
| 30 import subprocess |
| 31 import sys |
| 32 |
| 33 |
| 34 Edit = collections.namedtuple( |
| 35 'Edit', ('edit_type', 'offset', 'length', 'replacement')) |
| 36 |
| 37 |
| 38 def _GetFilesFromGit(paths = None): |
| 39 """Gets the list of files in the git repository. |
| 40 |
| 41 Args: |
| 42 paths: Prefix filter for the returned paths. May contain multiple entries. |
| 43 """ |
| 44 args = ['git', 'ls-files'] |
| 45 if paths: |
| 46 args.extend(paths) |
| 47 command = subprocess.Popen(args, stdout=subprocess.PIPE) |
| 48 output, _ = command.communicate() |
| 49 return output.splitlines() |
| 50 |
| 51 |
| 52 def _ExtractEditsFromStdout(stdout): |
| 53 """Extracts generated list of edits from the tool's stdout. |
| 54 |
| 55 The expected format is documented at the top of this file. |
| 56 |
| 57 Args: |
| 58 stdout: The stdout from running the clang tool. |
| 59 |
| 60 Returns: |
| 61 A dictionary mapping filenames to the associated edits. |
| 62 """ |
| 63 lines = stdout.splitlines() |
| 64 start_index = lines.index('==== BEGIN EDITS ====') |
| 65 end_index = lines.index('==== END EDITS ====') |
| 66 edits = collections.defaultdict(list) |
| 67 for line in lines[start_index + 1:end_index]: |
| 68 try: |
| 69 edit_type, path, offset, length, replacement = line.split(':', 4) |
| 70 # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile. |
| 71 edits[path[6:]].append( |
| 72 Edit(edit_type, int(offset), int(length), replacement)) |
| 73 except ValueError: |
| 74 print 'Unable to parse edit: %s' % line |
| 75 return edits |
| 76 |
| 77 |
| 78 def _ExecuteTool(toolname, build_directory, filename): |
| 79 """Executes the tool. |
| 80 |
| 81 This is defined outside the class so it can be pickled for the multiprocessing |
| 82 module. |
| 83 |
| 84 Args: |
| 85 toolname: Path to the tool to execute. |
| 86 build_directory: Directory that contains the compile database. |
| 87 filename: The file to run the tool over. |
| 88 |
| 89 Returns: |
| 90 A dictionary that must contain the key "status" and a boolean value |
| 91 associated with it. |
| 92 |
| 93 If status is True, then the generated edits are stored with the key "edits" |
| 94 in the dictionary. |
| 95 |
| 96 Otherwise, the filename and the output from stderr are associated with the |
| 97 keys "filename" and "stderr" respectively. |
| 98 """ |
| 99 command = subprocess.Popen((toolname, '-p', build_directory, filename), |
| 100 stdout=subprocess.PIPE, |
| 101 stderr=subprocess.PIPE) |
| 102 stdout, stderr = command.communicate() |
| 103 if command.returncode != 0: |
| 104 return {'status': False, 'filename': filename, 'stderr': stderr} |
| 105 else: |
| 106 return {'status': True, 'edits': _ExtractEditsFromStdout(stdout)} |
| 107 |
| 108 |
| 109 class _CompilerDispatcher(object): |
| 110 """Multiprocessing controller for running clang tools in parallel.""" |
| 111 |
| 112 def __init__(self, toolname, build_directory, filenames): |
| 113 """Initializer method. |
| 114 |
| 115 Args: |
| 116 toolname: Path to the tool to execute. |
| 117 build_directory: Directory that contains the compile database. |
| 118 filenames: The files to run the tool over. |
| 119 """ |
| 120 self.__toolname = toolname |
| 121 self.__build_directory = build_directory |
| 122 self.__filenames = filenames |
| 123 self.__success_count = 0 |
| 124 self.__failed_count = 0 |
| 125 self.__edits = collections.defaultdict(list) |
| 126 |
| 127 @property |
| 128 def edits(self): |
| 129 return self.__edits |
| 130 |
| 131 def Run(self): |
| 132 """Does the grunt work.""" |
| 133 pool = multiprocessing.Pool() |
| 134 result_iterator = pool.imap_unordered( |
| 135 functools.partial(_ExecuteTool, self.__toolname, |
| 136 self.__build_directory), |
| 137 self.__filenames) |
| 138 for result in result_iterator: |
| 139 self.__ProcessResult(result) |
| 140 sys.stdout.write('\n') |
| 141 sys.stdout.flush() |
| 142 |
| 143 def __ProcessResult(self, result): |
| 144 """Handles result processing. |
| 145 |
| 146 Args: |
| 147 result: The result dictionary returned by _ExecuteTool. |
| 148 """ |
| 149 if result['status']: |
| 150 self.__success_count += 1 |
| 151 for k, v in result['edits'].iteritems(): |
| 152 self.__edits[k].extend(v) |
| 153 else: |
| 154 self.__failed_count += 1 |
| 155 sys.stdout.write('\nFailed to process %s\n' % result['filename']) |
| 156 sys.stdout.write(result['stderr']) |
| 157 sys.stdout.write('\n') |
| 158 percentage = ( |
| 159 float(self.__success_count + self.__failed_count) / |
| 160 len(self.__filenames)) * 100 |
| 161 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( |
| 162 self.__success_count, self.__failed_count, percentage)) |
| 163 sys.stdout.flush() |
| 164 |
| 165 |
| 166 def _ApplyEdits(edits): |
| 167 """Apply the generated edits. |
| 168 |
| 169 Args: |
| 170 edits: A dict mapping filenames to Edit instances that apply to that file. |
| 171 """ |
| 172 edit_count = 0 |
| 173 for k, v in edits.iteritems(): |
| 174 # Sort the edits and iterate through them in reverse order. Sorting allows |
| 175 # duplicate edits to be quickly skipped, while reversing means that |
| 176 # subsequent edits don't need to have their offsets updated with each edit |
| 177 # applied. |
| 178 v.sort() |
| 179 last_edit = None |
| 180 with open(k, 'rb+') as f: |
| 181 contents = bytearray(f.read()) |
| 182 for edit in reversed(v): |
| 183 if edit == last_edit: |
| 184 continue |
| 185 last_edit = edit |
| 186 contents[edit.offset:edit.offset + edit.length] = edit.replacement |
| 187 if not edit.replacement: |
| 188 _ExtendDeletionIfElementIsInList(contents, edit.offset) |
| 189 edit_count += 1 |
| 190 f.seek(0) |
| 191 f.truncate() |
| 192 f.write(contents) |
| 193 print 'Applied %d edits to %d files' % (edit_count, len(edits)) |
| 194 |
| 195 |
| 196 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) |
| 197 |
| 198 |
| 199 def _ExtendDeletionIfElementIsInList(contents, offset): |
| 200 """Extends the range of a deletion if the deleted element was part of a list. |
| 201 |
| 202 This rewriter helper makes it easy for refactoring tools to remove elements |
| 203 from a list. Even if a matcher callback knows that it is removing an element |
| 204 from a list, it may not have enough information to accurately remove the list |
| 205 element; for example, another matcher callback may end up removing an adjacent |
| 206 list element, or all the list elements may end up being removed. |
| 207 |
| 208 With this helper, refactoring tools can simply remove the list element and not |
| 209 worry about having to include the comma in the replacement. |
| 210 |
| 211 Args: |
| 212 contents: A bytearray with the deletion already applied. |
| 213 offset: The offset in the bytearray where the deleted range used to be. |
| 214 """ |
| 215 may_be_first_initializer = False |
| 216 left_trim_count = 0 |
| 217 for byte in reversed(contents[:offset]): |
| 218 left_trim_count += 1 |
| 219 if byte in _WHITESPACE_BYTES: |
| 220 continue |
| 221 if byte == 0x2c: # Comma |
| 222 # A preceding comma means that this is not the first element of a list. |
| 223 # Extend the deletion leftwards to include the comma. |
| 224 del contents[offset - left_trim_count:offset] |
| 225 return |
| 226 if byte == 0x3a: # Colon |
| 227 # A preceding colon signals that this may be the first element of an |
| 228 # initializer list. |
| 229 may_be_first_initializer = True |
| 230 break |
| 231 # Break the loop on all other characters. This is either: |
| 232 # - not a list element. |
| 233 # - the only element left in the list. |
| 234 # - the first element in the list. |
| 235 break |
| 236 |
| 237 right_trim_count = 0 |
| 238 for byte in contents[offset:]: |
| 239 right_trim_count += 1 |
| 240 if byte in _WHITESPACE_BYTES: |
| 241 continue |
| 242 if byte == 0x2c: # Comma |
| 243 # Removing the first element of a list, so extend the deletion rightwards |
| 244 # to include the trailing comma. |
| 245 del contents[offset:offset + right_trim_count] |
| 246 return |
| 247 if may_be_first_initializer and byte == 0x7b: # Left brace |
| 248 # Removing the only initializer left, so extend the deletion leftwards to |
| 249 # include the preceding colon. |
| 250 del contents[offset - left_trim_count:offset] |
| 251 return |
| 252 # Break the loop on all other characters. No special handling is required at |
| 253 # this point. |
| 254 break |
| 255 |
| 256 |
| 257 def main(argv): |
| 258 if len(argv) < 2: |
| 259 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' |
| 260 print ' <clang tool> is the clang tool that should be run.' |
| 261 print ' <compile db> is the directory that contains the compile database' |
| 262 print ' <path 1> <path2> ... can be used to filter what files are edited' |
| 263 sys.exit(1) |
| 264 |
| 265 # TODO(dcheng): Assert that we're running from chromium/src. |
| 266 filenames = frozenset(_GetFilesFromGit(argv[2:])) |
| 267 # Filter out files that aren't C/C++/Obj-C/Obj-C++. |
| 268 extensions = frozenset(('.c', '.cc', '.m', '.mm')) |
| 269 dispatcher = _CompilerDispatcher(argv[0], argv[1], |
| 270 [f for f in filenames |
| 271 if os.path.splitext(f)[1] in extensions]) |
| 272 dispatcher.Run() |
| 273 # Filter out edits to files that aren't in the git repository, since it's not |
| 274 # useful to modify files that aren't under source control--typically, these |
| 275 # are generated files or files in a git submodule that's not part of Chromium. |
| 276 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() |
| 277 if k in filenames}) |
| 278 # TODO(dcheng): Consider clang-formatting the result to avoid egregious style |
| 279 # violations. |
| 280 |
| 281 |
| 282 if __name__ == '__main__': |
| 283 sys.exit(main(sys.argv[1:])) |
OLD | NEW |