tools/clang/scripts/apply_edits.py - Issue 2599193002: Split run_tool.py into run_tool.py, extract_edits.py and apply_edits.py

Unified Diff: tools/clang/scripts/apply_edits.py

Issue 2599193002: Split run_tool.py into run_tool.py, extract_edits.py and apply_edits.py (Closed)

Patch Set: Addressed remaining nits. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/clang/scripts/apply_edits.py

diff --git a/tools/clang/scripts/run_tool.py b/tools/clang/scripts/apply_edits.py

similarity index 33%

copy from tools/clang/scripts/run_tool.py

copy to tools/clang/scripts/apply_edits.py

index 42b085eb1a6f63e84ce778c7d3f80b1d09c273ea..7d373a95511c29bce85925d49e19b3c7fa357cfd 100755

--- a/tools/clang/scripts/run_tool.py

+++ b/tools/clang/scripts/apply_edits.py

@@ -2,39 +2,18 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

-"""Wrapper script to help run clang tools across Chromium code.

+"""Applies edits generated by a clang tool that was run on Chromium code.

-How to use this tool:

-If you want to run the tool across all Chromium code:

-run_tool.py <tool> <path/to/compiledb>

+Synopsis:

-If you want to include all files mentioned in the compilation database:

-run_tool.py <tool> <path/to/compiledb> --all

+ cat run_tool.out | extract_edits.py | apply_edits.py <build dir> <filters...>

-If you only want to run the tool across just chrome/browser and content/browser:

-run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser

+For example - to apply edits only to WTF sources:

-Please see https://chromium.googlesource.com/chromium/src/+/master/docs/clang_tool_refactoring.md for more

-information, which documents the entire automated refactoring flow in Chromium.

+ ... | apply_edits.py out/gn third_party/WebKit/Source/wtf

-Why use this tool:

-The clang tool implementation doesn't take advantage of multiple cores, and if

-it fails mysteriously in the middle, all the generated replacements will be

-lost.

-Unfortunately, if the work is simply sharded across multiple cores by running

-multiple RefactoringTools, problems arise when they attempt to rewrite a file at

-the same time. To work around that, clang tools that are run using this tool

-should output edits to stdout in the following format:

-==== BEGIN EDITS ====

-r:<file path>:<offset>:<length>:<replacement text>

-...etc...

-==== END EDITS ====

-Any generated edits are applied once the clang tool has finished running

-across Chromium, regardless of whether some instances failed or not.

+In addition to filters specified on the command line, the tool also skips edits

+that apply to files that are not covered by git.

"""

import argparse

@@ -75,17 +54,7 @@ def _GetFilesFromGit(paths=None):

return [os.path.realpath(p) for p in output.splitlines()]

-def _GetFilesFromCompileDB(build_directory):

- """ Gets the list of files mentioned in the compilation database.

- Args:

- build_directory: Directory that contains the compile database.

- """

- return [os.path.join(entry['directory'], entry['file'])

- for entry in compile_db.Read(build_directory)]

-def _ExtractEditsFromStdout(build_directory, stdout):

+def _ParseEditsFromStdin(build_directory):

"""Extracts generated list of edits from the tool's stdout.

The expected format is documented at the top of this file.

@@ -98,120 +67,69 @@ def _ExtractEditsFromStdout(build_directory, stdout):

Returns:

A dictionary mapping filenames to the associated edits.

"""

- lines = stdout.splitlines()

- start_index = lines.index('==== BEGIN EDITS ====')

- end_index = lines.index('==== END EDITS ====')

+ path_to_resolved_path = {}

+ def _ResolvePath(path):

+ if path in path_to_resolved_path:

+ return path_to_resolved_path[path]

+ if not os.path.isfile(path):

+ resolved_path = os.path.realpath(os.path.join(build_directory, path))

+ else:

+ resolved_path = path

+ if not os.path.isfile(resolved_path):

+ sys.stderr.write('Edit applies to a non-existent file: %s\n' % path)

+ resolved_path = None

+ path_to_resolved_path[path] = resolved_path

+ return resolved_path

edits = collections.defaultdict(list)

- for line in lines[start_index + 1:end_index]:

+ for line in sys.stdin:

+ line = line.rstrip("\n\r")

try:

edit_type, path, offset, length, replacement = line.split(':::', 4)

replacement = replacement.replace('\0', '\n')

- # Normalize the file path emitted by the clang tool.

- path = os.path.realpath(os.path.join(build_directory, path))

+ path = _ResolvePath(path)

+ if not path: continue

edits[path].append(Edit(edit_type, int(offset), int(length), replacement))

except ValueError:

- print 'Unable to parse edit: %s' % line

+ sys.stderr.write('Unable to parse edit: %s\n' % line)

return edits

-def _ExecuteTool(toolname, tool_args, build_directory, filename):

- """Executes the tool.

- This is defined outside the class so it can be pickled for the multiprocessing

- module.

- Args:

- toolname: Path to the tool to execute.

- tool_args: Arguments to be passed to the tool. Can be None.

- build_directory: Directory that contains the compile database.

- filename: The file to run the tool over.

- Returns:

- A dictionary that must contain the key "status" and a boolean value

- associated with it.

- If status is True, then the generated edits are stored with the key "edits"

- in the dictionary.

- Otherwise, the filename and the output from stderr are associated with the

- keys "filename" and "stderr" respectively.

- """

- args = [toolname, '-p', build_directory, filename]

- if (tool_args):

- args.extend(tool_args)

- command = subprocess.Popen(

- args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

- stdout, stderr = command.communicate()

- if command.returncode != 0:

- return {'status': False, 'filename': filename, 'stderr': stderr}

- else:

- return {'status': True,

- 'edits': _ExtractEditsFromStdout(build_directory, stdout)}

-class _CompilerDispatcher(object):

- """Multiprocessing controller for running clang tools in parallel."""

- def __init__(self, toolname, tool_args, build_directory, filenames):

- """Initializer method.

- Args:

- toolname: Path to the tool to execute.

- tool_args: Arguments to be passed to the tool. Can be None.

- build_directory: Directory that contains the compile database.

- filenames: The files to run the tool over.

- """

- self.__toolname = toolname

- self.__tool_args = tool_args

- self.__build_directory = build_directory

- self.__filenames = filenames

- self.__success_count = 0

- self.__failed_count = 0

- self.__edit_count = 0

- self.__edits = collections.defaultdict(list)

- @property

- def edits(self):

- return self.__edits

- @property

- def failed_count(self):

- return self.__failed_count

- def Run(self):

- """Does the grunt work."""

- pool = multiprocessing.Pool()

- result_iterator = pool.imap_unordered(

- functools.partial(_ExecuteTool, self.__toolname, self.__tool_args,

- self.__build_directory),

- self.__filenames)

- for result in result_iterator:

- self.__ProcessResult(result)

- sys.stdout.write('\n')

- sys.stdout.flush()

- def __ProcessResult(self, result):

- """Handles result processing.

- Args:

- result: The result dictionary returned by _ExecuteTool.

- """

- if result['status']:

- self.__success_count += 1

- for k, v in result['edits'].iteritems():

- self.__edits[k].extend(v)

- self.__edit_count += len(v)

- else:

- self.__failed_count += 1

- sys.stdout.write('\nFailed to process %s\n' % result['filename'])

- sys.stdout.write(result['stderr'])

- sys.stdout.write('\n')

- percentage = (float(self.__success_count + self.__failed_count) /

- len(self.__filenames)) * 100

- sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' %

- (self.__success_count, self.__failed_count,

- self.__edit_count, percentage))

- sys.stdout.flush()

+def _ApplyEditsToSingleFile(filename, edits):

+ # Sort the edits and iterate through them in reverse order. Sorting allows

+ # duplicate edits to be quickly skipped, while reversing means that

+ # subsequent edits don't need to have their offsets updated with each edit

+ # applied.

+ edit_count = 0

+ error_count = 0

+ edits.sort()

+ last_edit = None

+ with open(filename, 'rb+') as f:

+ contents = bytearray(f.read())

+ for edit in reversed(edits):

+ if edit == last_edit:

+ continue

+ if (last_edit is not None and edit.edit_type == last_edit.edit_type and

+ edit.offset == last_edit.offset and edit.length == last_edit.length):

+ sys.stderr.write(

+ 'Conflicting edit: %s at offset %d, length %d: "%s" != "%s"\n' %

+ (filename, edit.offset, edit.length, edit.replacement,

+ last_edit.replacement))

+ error_count += 1

+ continue

+ last_edit = edit

+ contents[edit.offset:edit.offset + edit.length] = edit.replacement

+ if not edit.replacement:

+ _ExtendDeletionIfElementIsInList(contents, edit.offset)

+ edit_count += 1

+ f.seek(0)

+ f.truncate()

+ f.write(contents)

+ return (edit_count, error_count)

def _ApplyEdits(edits):

@@ -221,27 +139,19 @@ def _ApplyEdits(edits):

edits: A dict mapping filenames to Edit instances that apply to that file.

"""

edit_count = 0

+ error_count = 0

+ done_files = 0

for k, v in edits.iteritems():

- # Sort the edits and iterate through them in reverse order. Sorting allows

- # duplicate edits to be quickly skipped, while reversing means that

- # subsequent edits don't need to have their offsets updated with each edit

- # applied.

- v.sort()

- last_edit = None

- with open(k, 'rb+') as f:

- contents = bytearray(f.read())

- for edit in reversed(v):

- if edit == last_edit:

- continue

- last_edit = edit

- contents[edit.offset:edit.offset + edit.length] = edit.replacement

- if not edit.replacement:

- _ExtendDeletionIfElementIsInList(contents, edit.offset)

- edit_count += 1

- f.seek(0)

- f.truncate()

- f.write(contents)

- print 'Applied %d edits to %d files' % (edit_count, len(edits))

+ tmp_edit_count, tmp_error_count = _ApplyEditsToSingleFile(k, v)

+ edit_count += tmp_edit_count

+ error_count += tmp_error_count

+ done_files += 1

+ percentage = (float(done_files) / len(edits)) * 100

+ sys.stderr.write('Applied %d edits (%d errors) to %d files [%.2f%%]\r' %

+ (edit_count, error_count, done_files, percentage))

+ sys.stderr.write('\n')

+ return -error_count

_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))

@@ -291,54 +201,20 @@ def _ExtendDeletionIfElementIsInList(contents, offset):

def main():

parser = argparse.ArgumentParser()

- parser.add_argument('tool', help='clang tool to run')

- parser.add_argument('--all', action='store_true')

parser.add_argument(

- '--generate-compdb',

- action='store_true',

- help='regenerate the compile database before running the tool')

- parser.add_argument(

- 'compile_database',

- help='path to the directory that contains the compile database')

+ 'build_directory',

+ help='path to the build dir (dir that edit paths are relative to)')

parser.add_argument(

'path_filter',

nargs='*',

help='optional paths to filter what files the tool is run on')

- parser.add_argument(

- '--tool-args', nargs='*',

- help='optional arguments passed to the tool')

args = parser.parse_args()

- os.environ['PATH'] = '%s%s%s' % (

- os.path.abspath(os.path.join(

- os.path.dirname(__file__),

- '../../../third_party/llvm-build/Release+Asserts/bin')),

- os.pathsep,

- os.environ['PATH'])

- if args.generate_compdb:

- compile_db.GenerateWithNinja(args.compile_database)

filenames = set(_GetFilesFromGit(args.path_filter))

- if args.all:

- source_filenames = set(_GetFilesFromCompileDB(args.compile_database))

- else:

- # Filter out files that aren't C/C++/Obj-C/Obj-C++.

- extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm'))

- source_filenames = [f

- for f in filenames

- if os.path.splitext(f)[1] in extensions]

- dispatcher = _CompilerDispatcher(args.tool, args.tool_args,

- args.compile_database,

- source_filenames)

- dispatcher.Run()

- # Filter out edits to files that aren't in the git repository, since it's not

- # useful to modify files that aren't under source control--typically, these

- # are generated files or files in a git submodule that's not part of Chromium.

- _ApplyEdits({k: v

- for k, v in dispatcher.edits.iteritems()

- if os.path.realpath(k) in filenames})

- return -dispatcher.failed_count

+ edits = _ParseEditsFromStdin(args.build_directory)

+ return _ApplyEdits(

+ {k: v for k, v in edits.iteritems()

+ if os.path.realpath(k) in filenames})

if __name__ == '__main__':

« no previous file with comments | « docs/clang_tool_refactoring.md ('k') | tools/clang/scripts/extract_edits.py » ('j') | no next file with comments »