Index: tools/clang/scripts/run_tool.py |
diff --git a/tools/clang/scripts/run_tool.py b/tools/clang/scripts/run_tool.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..6eed3ca865bceab7be79d9a7ddfd51f93d4f21d6 |
--- /dev/null |
+++ b/tools/clang/scripts/run_tool.py |
@@ -0,0 +1,282 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Wrapper script to help run clang tools across Chromium code. |
+ |
+How to use this tool: |
+If you want to run the tool across all Chromium code: |
+run_tool.py <tool> <path/to/compiledb> |
+ |
+If you only want to run the tool across just chrome/browser and content/browser: |
+run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser |
+ |
+Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more |
+information, which documents the entire automated refactoring flow in Chromium. |
+ |
+Why use this tool: |
+The clang tool implementation doesn't take advantage of multiple cores, and if |
+it fails mysteriously in the middle, all the generated replacements will be |
+lost. |
+ |
+Unfortunately, if the work is simply sharded across multiple cores by running |
+multiple RefactoringTools, problems arise when they attempt to rewrite a file at |
+the same time. To work around that, clang tools that are run using this tool |
+should output edits to stdout in the following format: |
+ |
+==== BEGIN EDITS ==== |
+r:<file path>:<offset>:<length>:<replacement text> |
+r:<file path>:<offset>:<length>:<replacement text> |
+...etc... |
+==== END EDITS ==== |
+ |
+Any generated edits are applied once the clang tool has finished running |
+across Chromium, regardless of whether some instances failed or not. |
+""" |
+ |
+import collections |
+import functools |
+import multiprocessing |
+import os.path |
+import subprocess |
+import sys |
+ |
+ |
+Edit = collections.namedtuple( |
+ 'Edit', ('edit_type', 'offset', 'length', 'replacement')) |
+ |
+ |
+def _GetFilesFromGit(paths = None): |
+ """Gets the list of files in the git repository. |
+ |
+ Args: |
+ paths: Prefix filter for the returned paths. May contain multiple entries. |
+ """ |
+ args = ['git', 'ls-files'] |
+ if paths: |
+ args.extend(paths) |
+ command = subprocess.Popen(args, stdout=subprocess.PIPE) |
+ output, _ = command.communicate() |
+ return output.splitlines() |
+ |
+ |
+def _ExtractEditsFromStdout(build_directory, stdout): |
+ """Extracts generated list of edits from the tool's stdout. |
+ |
+ The expected format is documented at the top of this file. |
+ |
+ Args: |
+ build_directory: Directory that contains the compile database. Used to |
+ normalize the filenames. |
+ stdout: The stdout from running the clang tool. |
+ |
+ Returns: |
+ A dictionary mapping filenames to the associated edits. |
+ """ |
+ lines = stdout.splitlines() |
+ start_index = lines.index('==== BEGIN EDITS ====') |
+ end_index = lines.index('==== END EDITS ====') |
+ edits = collections.defaultdict(list) |
+ for line in lines[start_index + 1:end_index]: |
+ try: |
+ edit_type, path, offset, length, replacement = line.split(':', 4) |
+ # Normalize the file path emitted by the clang tool to be relative to the |
+ # current working directory. |
+ path = os.path.relpath(os.path.join(build_directory, path)) |
+ edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) |
+ except ValueError: |
+ print 'Unable to parse edit: %s' % line |
+ return edits |
+ |
+ |
+def _ExecuteTool(toolname, build_directory, filename): |
+ """Executes the tool. |
+ |
+ This is defined outside the class so it can be pickled for the multiprocessing |
+ module. |
+ |
+ Args: |
+ toolname: Path to the tool to execute. |
+ build_directory: Directory that contains the compile database. |
+ filename: The file to run the tool over. |
+ |
+ Returns: |
+ A dictionary that must contain the key "status" and a boolean value |
+ associated with it. |
+ |
+ If status is True, then the generated edits are stored with the key "edits" |
+ in the dictionary. |
+ |
+ Otherwise, the filename and the output from stderr are associated with the |
+ keys "filename" and "stderr" respectively. |
+ """ |
+ command = subprocess.Popen((toolname, '-p', build_directory, filename), |
+ stdout=subprocess.PIPE, |
+ stderr=subprocess.PIPE) |
+ stdout, stderr = command.communicate() |
+ if command.returncode != 0: |
+ return {'status': False, 'filename': filename, 'stderr': stderr} |
+ else: |
+ return {'status': True, |
+ 'edits': _ExtractEditsFromStdout(build_directory, stdout)} |
+ |
+ |
+class _CompilerDispatcher(object): |
+ """Multiprocessing controller for running clang tools in parallel.""" |
+ |
+ def __init__(self, toolname, build_directory, filenames): |
+ """Initializer method. |
+ |
+ Args: |
+ toolname: Path to the tool to execute. |
+ build_directory: Directory that contains the compile database. |
+ filenames: The files to run the tool over. |
+ """ |
+ self.__toolname = toolname |
+ self.__build_directory = build_directory |
+ self.__filenames = filenames |
+ self.__success_count = 0 |
+ self.__failed_count = 0 |
+ self.__edits = collections.defaultdict(list) |
+ |
+ @property |
+ def edits(self): |
+ return self.__edits |
+ |
+ def Run(self): |
+ """Does the grunt work.""" |
+ pool = multiprocessing.Pool() |
+ result_iterator = pool.imap_unordered( |
+ functools.partial(_ExecuteTool, self.__toolname, |
+ self.__build_directory), |
+ self.__filenames) |
+ for result in result_iterator: |
+ self.__ProcessResult(result) |
+ sys.stdout.write('\n') |
+ sys.stdout.flush() |
+ |
+ def __ProcessResult(self, result): |
+ """Handles result processing. |
+ |
+ Args: |
+ result: The result dictionary returned by _ExecuteTool. |
+ """ |
+ if result['status']: |
+ self.__success_count += 1 |
+ for k, v in result['edits'].iteritems(): |
+ self.__edits[k].extend(v) |
+ else: |
+ self.__failed_count += 1 |
+ sys.stdout.write('\nFailed to process %s\n' % result['filename']) |
+ sys.stdout.write(result['stderr']) |
+ sys.stdout.write('\n') |
+ percentage = ( |
+ float(self.__success_count + self.__failed_count) / |
+ len(self.__filenames)) * 100 |
+ sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % ( |
+ self.__success_count, self.__failed_count, percentage)) |
+ sys.stdout.flush() |
+ |
+ |
+def _ApplyEdits(edits): |
+ """Apply the generated edits. |
+ |
+ Args: |
+ edits: A dict mapping filenames to Edit instances that apply to that file. |
+ """ |
+ edit_count = 0 |
+ for k, v in edits.iteritems(): |
+ # Sort the edits and iterate through them in reverse order. Sorting allows |
+ # duplicate edits to be quickly skipped, while reversing means that |
+ # subsequent edits don't need to have their offsets updated with each edit |
+ # applied. |
+ v.sort() |
+ last_edit = None |
+ with open(k, 'rb+') as f: |
+ contents = bytearray(f.read()) |
+ for edit in reversed(v): |
+ if edit == last_edit: |
+ continue |
+ last_edit = edit |
+ contents[edit.offset:edit.offset + edit.length] = edit.replacement |
+ if not edit.replacement: |
+ _ExtendDeletionIfElementIsInList(contents, edit.offset) |
+ edit_count += 1 |
+ f.seek(0) |
+ f.truncate() |
+ f.write(contents) |
+ print 'Applied %d edits to %d files' % (edit_count, len(edits)) |
+ |
+ |
+_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) |
+ |
+ |
+def _ExtendDeletionIfElementIsInList(contents, offset): |
+ """Extends the range of a deletion if the deleted element was part of a list. |
+ |
+ This rewriter helper makes it easy for refactoring tools to remove elements |
+ from a list. Even if a matcher callback knows that it is removing an element |
+ from a list, it may not have enough information to accurately remove the list |
+ element; for example, another matcher callback may end up removing an adjacent |
+ list element, or all the list elements may end up being removed. |
+ |
+ With this helper, refactoring tools can simply remove the list element and not |
+ worry about having to include the comma in the replacement. |
+ |
+ Args: |
+ contents: A bytearray with the deletion already applied. |
+ offset: The offset in the bytearray where the deleted range used to be. |
+ """ |
+ char_before = char_after = None |
+ left_trim_count = 0 |
+ for byte in reversed(contents[:offset]): |
+ left_trim_count += 1 |
+ if byte in _WHITESPACE_BYTES: |
+ continue |
+ if byte in (ord(','), ord(':'), ord('('), ord('{')): |
+ char_before = chr(byte) |
+ break |
+ |
+ right_trim_count = 0 |
+ for byte in contents[offset:]: |
+ right_trim_count += 1 |
+ if byte in _WHITESPACE_BYTES: |
+ continue |
+ if byte == ord(','): |
+ char_after = chr(byte) |
+ break |
+ |
+ if char_before: |
+ if char_after: |
+ del contents[offset:offset + right_trim_count] |
+ elif char_before in (',', ':'): |
+ del contents[offset - left_trim_count:offset] |
+ |
+ |
+def main(argv): |
+ if len(argv) < 2: |
+ print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' |
+ print ' <clang tool> is the clang tool that should be run.' |
+ print ' <compile db> is the directory that contains the compile database' |
+ print ' <path 1> <path2> ... can be used to filter what files are edited' |
+ sys.exit(1) |
+ |
+ filenames = frozenset(_GetFilesFromGit(argv[2:])) |
+ # Filter out files that aren't C/C++/Obj-C/Obj-C++. |
+ extensions = frozenset(('.c', '.cc', '.m', '.mm')) |
+ dispatcher = _CompilerDispatcher(argv[0], argv[1], |
+ [f for f in filenames |
+ if os.path.splitext(f)[1] in extensions]) |
+ dispatcher.Run() |
+ # Filter out edits to files that aren't in the git repository, since it's not |
+ # useful to modify files that aren't under source control--typically, these |
+ # are generated files or files in a git submodule that's not part of Chromium. |
+ _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() |
+ if k in filenames}) |
+ # TODO(dcheng): Consider clang-formatting the result to avoid egregious style |
+ # violations. |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main(sys.argv[1:])) |