| Index: tools/clang/scripts/run_tool.py
|
| diff --git a/tools/clang/scripts/run_tool.py b/tools/clang/scripts/run_tool.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..ee5f3a5b272809e679b6acadbcca334517541940
|
| --- /dev/null
|
| +++ b/tools/clang/scripts/run_tool.py
|
| @@ -0,0 +1,283 @@
|
| +#!/usr/bin/env python
|
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Wrapper script to help run clang tools across Chromium code.
|
| +
|
| +The clang tool implementation doesn't take advantage of multiple cores, and if
|
| +it fails mysteriously in the middle, all the generated replacements will be
|
| +lost.
|
| +
|
| +Unfortunately, if the work is simply sharded across multiple cores by running
|
| +multiple RefactoringTools, problems arise when they attempt to rewrite a file at
|
| +the same time. To work around that, clang tools that are run using this tool
|
| +should output edits to stdout in the following format:
|
| +==== BEGIN EDITS ====
|
| +r:<file path>:<offset>:<length>:<replacement text>
|
| +r:<file path>:<offset>:<length>:<replacement text>
|
| +...etc...
|
| +==== END EDITS ====
|
| +
|
| +Any generated edits are applied once the clang tool has finished running
|
| +across Chromium, regardless of whether some instances failed or not.
|
| +"""
|
| +
|
| +import collections
|
| +import functools
|
| +import multiprocessing
|
| +import os.path
|
| +import subprocess
|
| +import sys
|
| +
|
| +
|
| +Edit = collections.namedtuple(
|
| + 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
|
| +
|
| +
|
| +def _GetFilesFromGit(paths = None):
|
| + """Gets the list of files in the git repository.
|
| +
|
| + Args:
|
| + paths: Prefix filter for the returned paths. May contain multiple entries.
|
| + """
|
| + args = ['git', 'ls-files']
|
| + if paths:
|
| + args.extend(paths)
|
| + command = subprocess.Popen(args, stdout=subprocess.PIPE)
|
| + output, _ = command.communicate()
|
| + return output.splitlines()
|
| +
|
| +
|
| +def _ExtractEditsFromStdout(stdout):
|
| + """Extracts generated list of edits from the tool's stdout.
|
| +
|
| + The expected format is documented at the top of this file.
|
| +
|
| + Args:
|
| + stdout: The stdout from running the clang tool.
|
| +
|
| + Returns:
|
| + A dictionary mapping filenames to the associated edits.
|
| + """
|
| + lines = stdout.splitlines()
|
| + start_index = lines.index('==== BEGIN EDITS ====')
|
| + end_index = lines.index('==== END EDITS ====')
|
| + edits = collections.defaultdict(list)
|
| + for line in lines[start_index + 1:end_index]:
|
| + try:
|
| + edit_type, path, offset, length, replacement = line.split(':', 4)
|
| + # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile.
|
| + edits[path[6:]].append(
|
| + Edit(edit_type, int(offset), int(length), replacement))
|
| + except ValueError:
|
| + print 'Unable to parse edit: %s' % line
|
| + return edits
|
| +
|
| +
|
| +def _ExecuteTool(toolname, build_directory, filename):
|
| + """Executes the tool.
|
| +
|
| + This is defined outside the class so it can be pickled for the multiprocessing
|
| + module.
|
| +
|
| + Args:
|
| + toolname: Path to the tool to execute.
|
| + build_directory: Directory that contains the compile database.
|
| + filename: The file to run the tool over.
|
| +
|
| + Returns:
|
| + A dictionary that must contain the key "status" and a boolean value
|
| + associated with it.
|
| +
|
| + If status is True, then the generated edits are stored with the key "edits"
|
| + in the dictionary.
|
| +
|
| + Otherwise, the filename and the output from stderr are associated with the
|
| + keys "filename" and "stderr" respectively.
|
| + """
|
| + command = subprocess.Popen((toolname, '-p', build_directory, filename),
|
| + stdout=subprocess.PIPE,
|
| + stderr=subprocess.PIPE)
|
| + stdout, stderr = command.communicate()
|
| + if command.returncode != 0:
|
| + return {'status': False, 'filename': filename, 'stderr': stderr}
|
| + else:
|
| + return {'status': True, 'edits': _ExtractEditsFromStdout(stdout)}
|
| +
|
| +
|
| +class _CompilerDispatcher(object):
|
| + """Multiprocessing controller for running clang tools in parallel."""
|
| +
|
| + def __init__(self, toolname, build_directory, filenames):
|
| + """Initializer method.
|
| +
|
| + Args:
|
| + toolname: Path to the tool to execute.
|
| + build_directory: Directory that contains the compile database.
|
| + filenames: The files to run the tool over.
|
| + """
|
| + self.__toolname = toolname
|
| + self.__build_directory = build_directory
|
| + self.__filenames = filenames
|
| + self.__success_count = 0
|
| + self.__failed_count = 0
|
| + self.__edits = collections.defaultdict(list)
|
| +
|
| + @property
|
| + def edits(self):
|
| + return self.__edits
|
| +
|
| + def Run(self):
|
| + """Does the grunt work."""
|
| + pool = multiprocessing.Pool()
|
| + result_iterator = pool.imap_unordered(
|
| + functools.partial(_ExecuteTool, self.__toolname,
|
| + self.__build_directory),
|
| + self.__filenames)
|
| + for result in result_iterator:
|
| + self.__ProcessResult(result)
|
| + sys.stdout.write('\n')
|
| + sys.stdout.flush()
|
| +
|
| + def __ProcessResult(self, result):
|
| + """Handles result processing.
|
| +
|
| + Args:
|
| + result: The result dictionary returned by _ExecuteTool.
|
| + """
|
| + if result['status']:
|
| + self.__success_count += 1
|
| + for k, v in result['edits'].iteritems():
|
| + self.__edits[k].extend(v)
|
| + else:
|
| + self.__failed_count += 1
|
| + sys.stdout.write('\nFailed to process %s\n' % result['filename'])
|
| + sys.stdout.write(result['stderr'])
|
| + sys.stdout.write('\n')
|
| + percentage = (
|
| + float(self.__success_count + self.__failed_count) /
|
| + len(self.__filenames)) * 100
|
| + sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
|
| + self.__success_count, self.__failed_count, percentage))
|
| + sys.stdout.flush()
|
| +
|
| +
|
| +def _ApplyEdits(edits):
|
| + """Apply the generated edits.
|
| +
|
| + Args:
|
| + edits: A dict mapping filenames to Edit instances that apply to that file.
|
| + """
|
| + edit_count = 0
|
| + for k, v in edits.iteritems():
|
| + # Sort the edits and iterate through them in reverse order. Sorting allows
|
| + # duplicate edits to be quickly skipped, while reversing means that
|
| + # subsequent edits don't need to have their offsets updated with each edit
|
| + # applied.
|
| + v.sort()
|
| + last_edit = None
|
| + with open(k, 'rb+') as f:
|
| + contents = bytearray(f.read())
|
| + for edit in reversed(v):
|
| + if edit == last_edit:
|
| + continue
|
| + last_edit = edit
|
| + contents[edit.offset:edit.offset + edit.length] = edit.replacement
|
| + if not edit.replacement:
|
| + _ExtendDeletionIfElementIsInList(contents, edit.offset)
|
| + edit_count += 1
|
| + f.seek(0)
|
| + f.truncate()
|
| + f.write(contents)
|
| + print 'Applied %d edits to %d files' % (edit_count, len(edits))
|
| +
|
| +
|
| +_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
|
| +
|
| +
|
| +def _ExtendDeletionIfElementIsInList(contents, offset):
|
| + """Extends the range of a deletion if the deleted element was part of a list.
|
| +
|
| + This rewriter helper makes it easy for refactoring tools to remove elements
|
| + from a list. Even if a matcher callback knows that it is removing an element
|
| + from a list, it may not have enough information to accurately remove the list
|
| + element; for example, another matcher callback may end up removing an adjacent
|
| + list element, or all the list elements may end up being removed.
|
| +
|
| + With this helper, refactoring tools can simply remove the list element and not
|
| + worry about having to include the comma in the replacement.
|
| +
|
| + Args:
|
| + contents: A bytearray with the deletion already applied.
|
| + offset: The offset in the bytearray where the deleted range used to be.
|
| + """
|
| + may_be_first_initializer = False
|
| + left_trim_count = 0
|
| + for byte in reversed(contents[:offset]):
|
| + left_trim_count += 1
|
| + if byte in _WHITESPACE_BYTES:
|
| + continue
|
| + if byte == 0x2c: # Comma
|
| + # A preceding comma means that this is not the first element of a list.
|
| + # Extend the deletion leftwards to include the comma.
|
| + del contents[offset - left_trim_count:offset]
|
| + return
|
| + if byte == 0x3a: # Colon
|
| + # A preceding colon signals that this may be the first element of an
|
| + # initializer list.
|
| + may_be_first_initializer = True
|
| + break
|
| + # Break the loop on all other characters. This is either:
|
| + # - not a list element.
|
| + # - the only element left in the list.
|
| + # - the first element in the list.
|
| + break
|
| +
|
| + right_trim_count = 0
|
| + for byte in contents[offset:]:
|
| + right_trim_count += 1
|
| + if byte in _WHITESPACE_BYTES:
|
| + continue
|
| + if byte == 0x2c: # Comma
|
| + # Removing the first element of a list, so extend the deletion rightwards
|
| + # to include the trailing comma.
|
| + del contents[offset:offset + right_trim_count]
|
| + return
|
| + if may_be_first_initializer and byte == 0x7b: # Left brace
|
| + # Removing the only initializer left, so extend the deletion leftwards to
|
| + # include the preceding colon.
|
| + del contents[offset - left_trim_count:offset]
|
| + return
|
| + # Break the loop on all other characters. No special handling is required at
|
| + # this point.
|
| + break
|
| +
|
| +
|
| +def main(argv):
|
| + if len(argv) < 2:
|
| + print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
|
| + print ' <clang tool> is the clang tool that should be run.'
|
| + print ' <compile db> is the directory that contains the compile database'
|
| + print ' <path 1> <path2> ... can be used to filter what files are edited'
|
| + sys.exit(1)
|
| +
|
| + # TODO(dcheng): Assert that we're running from chromium/src.
|
| + filenames = frozenset(_GetFilesFromGit(argv[2:]))
|
| + # Filter out files that aren't C/C++/Obj-C/Obj-C++.
|
| + extensions = frozenset(('.c', '.cc', '.m', '.mm'))
|
| + dispatcher = _CompilerDispatcher(argv[0], argv[1],
|
| + [f for f in filenames
|
| + if os.path.splitext(f)[1] in extensions])
|
| + dispatcher.Run()
|
| + # Filter out edits to files that aren't in the git repository, since it's not
|
| + # useful to modify files that aren't under source control--typically, these
|
| + # are generated files or files in a git submodule that's not part of Chromium.
|
| + _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
|
| + if k in filenames})
|
| + # TODO(dcheng): Consider clang-formatting the result to avoid egregious style
|
| + # violations.
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main(sys.argv[1:]))
|
|
|