| Index: tools/clang/scripts/run_tool.py
 | 
| diff --git a/tools/clang/scripts/run_tool.py b/tools/clang/scripts/run_tool.py
 | 
| new file mode 100755
 | 
| index 0000000000000000000000000000000000000000..56cd5d24f8be2f400190136d9705972f9fb1e6c4
 | 
| --- /dev/null
 | 
| +++ b/tools/clang/scripts/run_tool.py
 | 
| @@ -0,0 +1,339 @@
 | 
| +#!/usr/bin/env python
 | 
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved.
 | 
| +# Use of this source code is governed by a BSD-style license that can be
 | 
| +# found in the LICENSE file.
 | 
| +
 | 
| +"""Wrapper script to help run clang tools across Chromium code.
 | 
| +
 | 
| +How to use this tool:
 | 
| +If you want to run the tool across all Chromium code:
 | 
| +run_tool.py <tool> <path/to/compiledb>
 | 
| +
 | 
| +If you want to include all files mentioned in the compilation database:
 | 
| +run_tool.py <tool> <path/to/compiledb> --all
 | 
| +
 | 
| +If you only want to run the tool across just chrome/browser and content/browser:
 | 
| +run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
 | 
| +
 | 
| +Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
 | 
| +information, which documents the entire automated refactoring flow in Chromium.
 | 
| +
 | 
| +Why use this tool:
 | 
| +The clang tool implementation doesn't take advantage of multiple cores, and if
 | 
| +it fails mysteriously in the middle, all the generated replacements will be
 | 
| +lost.
 | 
| +
 | 
| +Unfortunately, if the work is simply sharded across multiple cores by running
 | 
| +multiple RefactoringTools, problems arise when they attempt to rewrite a file at
 | 
| +the same time. To work around that, clang tools that are run using this tool
 | 
| +should output edits to stdout in the following format:
 | 
| +
 | 
| +==== BEGIN EDITS ====
 | 
| +r:<file path>:<offset>:<length>:<replacement text>
 | 
| +r:<file path>:<offset>:<length>:<replacement text>
 | 
| +...etc...
 | 
| +==== END EDITS ====
 | 
| +
 | 
| +Any generated edits are applied once the clang tool has finished running
 | 
| +across Chromium, regardless of whether some instances failed or not.
 | 
| +"""
 | 
| +
 | 
| +import collections
 | 
| +import functools
 | 
| +import json
 | 
| +import multiprocessing
 | 
| +import os.path
 | 
| +import pipes
 | 
| +import subprocess
 | 
| +import sys
 | 
| +
 | 
| +
 | 
| +Edit = collections.namedtuple(
 | 
| +    'Edit', ('edit_type', 'offset', 'length', 'replacement'))
 | 
| +
 | 
| +
 | 
| +def _GetFilesFromGit(paths = None):
 | 
| +  """Gets the list of files in the git repository.
 | 
| +
 | 
| +  Args:
 | 
| +    paths: Prefix filter for the returned paths. May contain multiple entries.
 | 
| +  """
 | 
| +  args = []
 | 
| +  if sys.platform == 'win32':
 | 
| +    args.append('git.bat')
 | 
| +  else:
 | 
| +    args.append('git')
 | 
| +  args.append('ls-files')
 | 
| +  if paths:
 | 
| +    args.extend(paths)
 | 
| +  command = subprocess.Popen(args, stdout=subprocess.PIPE)
 | 
| +  output, _ = command.communicate()
 | 
| +  return [os.path.realpath(p) for p in output.splitlines()]
 | 
| +
 | 
| +
 | 
| +def _GetFilesFromCompileDB(build_directory):
 | 
| +  """ Gets the list of files mentioned in the compilation database.
 | 
| +
 | 
| +  Args:
 | 
| +    build_directory: Directory that contains the compile database.
 | 
| +  """
 | 
| +  compiledb_path = os.path.join(build_directory, 'compile_commands.json')
 | 
| +  with open(compiledb_path, 'rb') as compiledb_file:
 | 
| +    json_commands = json.load(compiledb_file)
 | 
| +
 | 
| +  return [os.path.join(entry['directory'], entry['file'])
 | 
| +          for entry in json_commands]
 | 
| +
 | 
| +
 | 
| +def _ExtractEditsFromStdout(build_directory, stdout):
 | 
| +  """Extracts generated list of edits from the tool's stdout.
 | 
| +
 | 
| +  The expected format is documented at the top of this file.
 | 
| +
 | 
| +  Args:
 | 
| +    build_directory: Directory that contains the compile database. Used to
 | 
| +      normalize the filenames.
 | 
| +    stdout: The stdout from running the clang tool.
 | 
| +
 | 
| +  Returns:
 | 
| +    A dictionary mapping filenames to the associated edits.
 | 
| +  """
 | 
| +  lines = stdout.splitlines()
 | 
| +  start_index = lines.index('==== BEGIN EDITS ====')
 | 
| +  end_index = lines.index('==== END EDITS ====')
 | 
| +  edits = collections.defaultdict(list)
 | 
| +  for line in lines[start_index + 1:end_index]:
 | 
| +    try:
 | 
| +      edit_type, path, offset, length, replacement = line.split(':::', 4)
 | 
| +      replacement = replacement.replace("\0", "\n");
 | 
| +      # Normalize the file path emitted by the clang tool.
 | 
| +      path = os.path.realpath(os.path.join(build_directory, path))
 | 
| +      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
 | 
| +    except ValueError:
 | 
| +      print 'Unable to parse edit: %s' % line
 | 
| +  return edits
 | 
| +
 | 
| +
 | 
| +def _ExecuteTool(toolname, build_directory, filename):
 | 
| +  """Executes the tool.
 | 
| +
 | 
| +  This is defined outside the class so it can be pickled for the multiprocessing
 | 
| +  module.
 | 
| +
 | 
| +  Args:
 | 
| +    toolname: Path to the tool to execute.
 | 
| +    build_directory: Directory that contains the compile database.
 | 
| +    filename: The file to run the tool over.
 | 
| +
 | 
| +  Returns:
 | 
| +    A dictionary that must contain the key "status" and a boolean value
 | 
| +    associated with it.
 | 
| +
 | 
| +    If status is True, then the generated edits are stored with the key "edits"
 | 
| +    in the dictionary.
 | 
| +
 | 
| +    Otherwise, the filename and the output from stderr are associated with the
 | 
| +    keys "filename" and "stderr" respectively.
 | 
| +  """
 | 
| +  command = subprocess.Popen((toolname, '-p', build_directory, filename),
 | 
| +                             stdout=subprocess.PIPE,
 | 
| +                             stderr=subprocess.PIPE)
 | 
| +  stdout, stderr = command.communicate()
 | 
| +  if command.returncode != 0:
 | 
| +    return {'status': False, 'filename': filename, 'stderr': stderr}
 | 
| +  else:
 | 
| +    return {'status': True,
 | 
| +            'edits': _ExtractEditsFromStdout(build_directory, stdout)}
 | 
| +
 | 
| +
 | 
| +class _CompilerDispatcher(object):
 | 
| +  """Multiprocessing controller for running clang tools in parallel."""
 | 
| +
 | 
| +  def __init__(self, toolname, build_directory, filenames):
 | 
| +    """Initializer method.
 | 
| +
 | 
| +    Args:
 | 
| +      toolname: Path to the tool to execute.
 | 
| +      build_directory: Directory that contains the compile database.
 | 
| +      filenames: The files to run the tool over.
 | 
| +    """
 | 
| +    self.__toolname = toolname
 | 
| +    self.__build_directory = build_directory
 | 
| +    self.__filenames = filenames
 | 
| +    self.__success_count = 0
 | 
| +    self.__failed_count = 0
 | 
| +    self.__edit_count = 0
 | 
| +    self.__edits = collections.defaultdict(list)
 | 
| +
 | 
| +  @property
 | 
| +  def edits(self):
 | 
| +    return self.__edits
 | 
| +
 | 
| +  @property
 | 
| +  def failed_count(self):
 | 
| +    return self.__failed_count
 | 
| +
 | 
| +  def Run(self):
 | 
| +    """Does the grunt work."""
 | 
| +    pool = multiprocessing.Pool()
 | 
| +    result_iterator = pool.imap_unordered(
 | 
| +        functools.partial(_ExecuteTool, self.__toolname,
 | 
| +                          self.__build_directory),
 | 
| +        self.__filenames)
 | 
| +    for result in result_iterator:
 | 
| +      self.__ProcessResult(result)
 | 
| +    sys.stdout.write('\n')
 | 
| +    sys.stdout.flush()
 | 
| +
 | 
| +  def __ProcessResult(self, result):
 | 
| +    """Handles result processing.
 | 
| +
 | 
| +    Args:
 | 
| +      result: The result dictionary returned by _ExecuteTool.
 | 
| +    """
 | 
| +    if result['status']:
 | 
| +      self.__success_count += 1
 | 
| +      for k, v in result['edits'].iteritems():
 | 
| +        self.__edits[k].extend(v)
 | 
| +        self.__edit_count += len(v)
 | 
| +    else:
 | 
| +      self.__failed_count += 1
 | 
| +      sys.stdout.write('\nFailed to process %s\n' % result['filename'])
 | 
| +      sys.stdout.write(result['stderr'])
 | 
| +      sys.stdout.write('\n')
 | 
| +    percentage = (
 | 
| +        float(self.__success_count + self.__failed_count) /
 | 
| +        len(self.__filenames)) * 100
 | 
| +    sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
 | 
| +        self.__success_count, self.__failed_count, self.__edit_count,
 | 
| +        percentage))
 | 
| +    sys.stdout.flush()
 | 
| +
 | 
| +
 | 
| +def _ApplyEdits(edits, clang_format_diff_path):
 | 
| +  """Apply the generated edits.
 | 
| +
 | 
| +  Args:
 | 
| +    edits: A dict mapping filenames to Edit instances that apply to that file.
 | 
| +    clang_format_diff_path: Path to the clang-format-diff.py helper to help
 | 
| +      automatically reformat diffs to avoid style violations. Pass None if the
 | 
| +      clang-format step should be skipped.
 | 
| +  """
 | 
| +  edit_count = 0
 | 
| +  for k, v in edits.iteritems():
 | 
| +    # Sort the edits and iterate through them in reverse order. Sorting allows
 | 
| +    # duplicate edits to be quickly skipped, while reversing means that
 | 
| +    # subsequent edits don't need to have their offsets updated with each edit
 | 
| +    # applied.
 | 
| +    v.sort()
 | 
| +    last_edit = None
 | 
| +    with open(k, 'rb+') as f:
 | 
| +      contents = bytearray(f.read())
 | 
| +      for edit in reversed(v):
 | 
| +        if edit == last_edit:
 | 
| +          continue
 | 
| +        last_edit = edit
 | 
| +        contents[edit.offset:edit.offset + edit.length] = edit.replacement
 | 
| +        if not edit.replacement:
 | 
| +          _ExtendDeletionIfElementIsInList(contents, edit.offset)
 | 
| +        edit_count += 1
 | 
| +      f.seek(0)
 | 
| +      f.truncate()
 | 
| +      f.write(contents)
 | 
| +    if clang_format_diff_path:
 | 
| +      # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
 | 
| +      # uses python2.7. Use the deprecated interface until Chrome uses a newer
 | 
| +      # Python.
 | 
| +      if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
 | 
| +          pipes.quote(k), clang_format_diff_path), shell=True) != 0:
 | 
| +        print 'clang-format failed for %s' % k
 | 
| +  print 'Applied %d edits to %d files' % (edit_count, len(edits))
 | 
| +
 | 
| +
 | 
| +_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
 | 
| +
 | 
| +
 | 
| +def _ExtendDeletionIfElementIsInList(contents, offset):
 | 
| +  """Extends the range of a deletion if the deleted element was part of a list.
 | 
| +
 | 
| +  This rewriter helper makes it easy for refactoring tools to remove elements
 | 
| +  from a list. Even if a matcher callback knows that it is removing an element
 | 
| +  from a list, it may not have enough information to accurately remove the list
 | 
| +  element; for example, another matcher callback may end up removing an adjacent
 | 
| +  list element, or all the list elements may end up being removed.
 | 
| +
 | 
| +  With this helper, refactoring tools can simply remove the list element and not
 | 
| +  worry about having to include the comma in the replacement.
 | 
| +
 | 
| +  Args:
 | 
| +    contents: A bytearray with the deletion already applied.
 | 
| +    offset: The offset in the bytearray where the deleted range used to be.
 | 
| +  """
 | 
| +  char_before = char_after = None
 | 
| +  left_trim_count = 0
 | 
| +  for byte in reversed(contents[:offset]):
 | 
| +    left_trim_count += 1
 | 
| +    if byte in _WHITESPACE_BYTES:
 | 
| +      continue
 | 
| +    if byte in (ord(','), ord(':'), ord('('), ord('{')):
 | 
| +      char_before = chr(byte)
 | 
| +    break
 | 
| +
 | 
| +  right_trim_count = 0
 | 
| +  for byte in contents[offset:]:
 | 
| +    right_trim_count += 1
 | 
| +    if byte in _WHITESPACE_BYTES:
 | 
| +      continue
 | 
| +    if byte == ord(','):
 | 
| +      char_after = chr(byte)
 | 
| +    break
 | 
| +
 | 
| +  if char_before:
 | 
| +    if char_after:
 | 
| +      del contents[offset:offset + right_trim_count]
 | 
| +    elif char_before in (',', ':'):
 | 
| +      del contents[offset - left_trim_count:offset]
 | 
| +
 | 
| +
 | 
| +def main(argv):
 | 
| +  if len(argv) < 2:
 | 
| +    print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
 | 
| +    print '  <clang tool> is the clang tool that should be run.'
 | 
| +    print '  <compile db> is the directory that contains the compile database'
 | 
| +    print '  <path 1> <path2> ... can be used to filter what files are edited'
 | 
| +    return 1
 | 
| +
 | 
| +  clang_format_diff_path = os.path.join(
 | 
| +      os.path.dirname(os.path.realpath(__file__)),
 | 
| +      '../../../third_party/llvm/tools/clang/tools/clang-format',
 | 
| +      'clang-format-diff.py')
 | 
| +  # TODO(dcheng): Allow this to be controlled with a flag as well.
 | 
| +  # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
 | 
| +  # on Windows.
 | 
| +  if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32':
 | 
| +    clang_format_diff_path = None
 | 
| +
 | 
| +  if len(argv) == 3 and argv[2] == '--all':
 | 
| +    filenames = set(_GetFilesFromCompileDB(argv[1]))
 | 
| +    source_filenames = filenames
 | 
| +  else:
 | 
| +    filenames = set(_GetFilesFromGit(argv[2:]))
 | 
| +    # Filter out files that aren't C/C++/Obj-C/Obj-C++.
 | 
| +    extensions = frozenset(('.c', '.cc', '.m', '.mm'))
 | 
| +    source_filenames = [f for f in filenames
 | 
| +                        if os.path.splitext(f)[1] in extensions]
 | 
| +  dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames)
 | 
| +  dispatcher.Run()
 | 
| +  # Filter out edits to files that aren't in the git repository, since it's not
 | 
| +  # useful to modify files that aren't under source control--typically, these
 | 
| +  # are generated files or files in a git submodule that's not part of Chromium.
 | 
| +  _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
 | 
| +                    if os.path.realpath(k) in filenames},
 | 
| +              clang_format_diff_path)
 | 
| +  if dispatcher.failed_count != 0:
 | 
| +    return 2
 | 
| +  return 0
 | 
| +
 | 
| +
 | 
| +if __name__ == '__main__':
 | 
| +  sys.exit(main(sys.argv[1:]))
 | 
| 
 |