tools/clang/scripts/run_tool.py - Issue 12746010: Implement clang tool that converts std::string("") to std::string().

Side by Side Diff: tools/clang/scripts/run_tool.py

Issue 12746010: Implement clang tool that converts std::string("") to std::string(). (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Update tool and rewriter to handle initializers. Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Wrapper script to help run clang tools across Chromium code.

	7

	8 The clang tool implementation doesn't take advantage of multiple cores, and if

	9 it fails mysteriously in the middle, all the generated replacements will be

	10 lost.

	11

	12 Unfortunately, if the work is simply sharded across multiple cores by running

	13 multiple RefactoringTools, problems arise when they attempt to rewrite a file at

	14 the same time. To work around that, clang tools that are run using this tool

	15 should output edits to stdout in the following format:

	16 ==== BEGIN EDITS ====

	17 r:<file path>:<offset>:<length>:<replacement text>

	18 r:<file path>:<offset>:<length>:<replacement text>

	19 ...etc...

	20 ==== END EDITS ====

	21

	22 Any generated edits are applied once the clang tool has finished running

	23 across Chromium, regardless of whether some instances failed or not.
	Nico 2013/03/29 22:30:36 Ok, I can see this being useful. Maybe we should t Ok, I can see this being useful. Maybe we should talk to klimek@ about moving these features upstream though eventually. dcheng 2013/03/29 22:42:55 I've had this discussion with them internally alre Show quoted text On 2013/03/29 22:30:36, Nico wrote: > Ok, I can see this being useful. Maybe we should talk to klimek@ about moving > these features upstream though eventually. I've had this discussion with them internally already. I think it will be upstreamed eventually but it's not going to happen just yet.
	24 """

	25

	26 import collections

	27 import functools

	28 import multiprocessing

	29 import os.path

	30 import subprocess

	31 import sys

	32

	33

	34 Edit = collections.namedtuple(

	35 'Edit', ('edit_type', 'offset', 'length', 'replacement'))

	36

	37

	38 def _GetFilesFromGit(paths = None):

	39 """Gets the list of files in the git repository.

	40

	41 Args:

	42 paths: Prefix filter for the returned paths. May contain multiple entries.

	43 """

	44 args = ['git', 'ls-files']

	45 if paths:

	46 args.extend(paths)

	47 command = subprocess.Popen(args, stdout=subprocess.PIPE)

	48 output, _ = command.communicate()

	49 return output.splitlines()

	50

	51

	52 def _ExecuteTool(toolname, build_directory, filename):

	53 """Executes the tool.

	54

	55 This is defined outside the class so it can be pickled for the multiprocessing

	56 module.

	57

	58 Args:

	59 toolname: Path to the tool to execute.

	60 build_directory: Directory that contains the compile database.

	61 filename: The file to run the tool over.

	62

	63 Returns:

	64 A dictionary that must contain the key "status" and a boolean value

	65 associated with it.

	66

	67 If status is True, then the corresponding stdout is stored with the key

	68 "stdout" in the dictionary.

	69

	70 Otherwise, the filename and the output from stderr are associated with the

	71 keys "filename" and "stderr" respectively.

	72 """

	73 command = subprocess.Popen((toolname, '-p', build_directory, filename),

	74 stdout=subprocess.PIPE,

	75 stderr=subprocess.PIPE)

	76 stdout, stderr = command.communicate()

	77 if command.returncode != 0:

	78 return {'status': False, 'filename': filename, 'stderr': stderr}

	79 else:

	80 return {'status': True, 'stdout': stdout}

	81

	82

	83 class _CompilerDispatcher(object):

	84 """Multiprocessing controller for running clang tools in parallel."""

	85

	86 def __init__(self, toolname, build_directory, filenames):

	87 """Initializer method.

	88

	89 Args:

	90 toolname: Path to the tool to execute.

	91 build_directory: Directory that contains the compile database.

	92 filenames: The files to run the tool over.

	93 """

	94 self.__toolname = toolname

	95 self.__build_directory = build_directory

	96 self.__filenames = filenames

	97 self.__success_count = 0

	98 self.__failed_count = 0

	99 self.__edits = collections.defaultdict(list)

	100

	101 @property

	102 def edits(self):

	103 return self.__edits

	104

	105 def Run(self):

	106 """Does the grunt work."""

	107 pool = multiprocessing.Pool()

	108 result_iterator = pool.imap_unordered(

	109 functools.partial(_ExecuteTool, self.__toolname,

	110 self.__build_directory),

	111 self.__filenames)

	112 for result in result_iterator:

	113 self.__ProcessResult(result)

	114 sys.stdout.write('\n')

	115 sys.stdout.flush()

	116

	117 def __ProcessResult(self, result):

	118 """Handles result processing.

	119

	120 Args:

	121 result: The result dictionary returned by _ExecuteTool.

	122 """

	123 if result['status']:

	124 self.__success_count += 1

	125 self.__AddEditsFromStdout(result['stdout'])

	126 else:

	127 self.__failed_count += 1

	128 sys.stdout.write('\nFailed to process %s\n' % result['filename'])

	129 sys.stdout.write(result['stderr'])

	130 sys.stdout.write('\n')

	131 percentage = (

	132 float(self.__success_count + self.__failed_count) /

	133 len(self.__filenames)) * 100

	134 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (

	135 self.__success_count, self.__failed_count, percentage))

	136 sys.stdout.flush()

	137

	138 def __AddEditsFromStdout(self, stdout):

	139 """Extracts and add the list of edits generated on the tool's stdout.

	140

	141 The expected format is documented at the top of this file.

	142 Args:

	143 stdout: The stdout from running the clang tool.

	144 """

	145 lines = stdout.splitlines()

	146 start_index = lines.index('==== BEGIN EDITS ====')

	147 end_index = lines.index('==== END EDITS ====')

	148 for line in lines[start_index + 1:end_index]:

	149 edit_type, path, offset, length, replacement = line.split(':', 4)

	150 # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile.

	151 self.__edits[path[6:]].append(

	152 Edit(edit_type, int(offset), int(length), replacement))

	153

	154

	155 def _ApplyEdits(edits):

	156 """Apply the generated edits.

	157

	158 Args:

	159 edits: A dict mapping filenames to Edit instances that apply to that file.

	160 """

	161 edit_count = 0

	162 for k, v in edits.iteritems():

	163 # Sort the edits and iterate through them in reverse order. Sorting allows

	164 # duplicate edits to be quickly skipped, while reversing means that

	165 # subsequent edits don't need to have their offsets updated with each edit

	166 # applied.

	167 v.sort()

	168 last_edit = None

	169 with open(k, 'rb+') as f:

	170 contents = bytearray(f.read())

	171 for edit in reversed(v):

	172 if edit == last_edit:

	173 continue

	174 last_edit = edit

	175 contents[edit.offset:edit.offset + edit.length] = edit.replacement

	176 if not edit.replacement:

	177 _ExtendDeletionIfElementIsInList(contents, edit.offset)

	178 edit_count += 1

	179 f.seek(0)

	180 f.truncate()

	181 f.write(contents)

	182 print 'Applied %d edits to %d files' % (edit_count, len(edits))

	183

	184

	185 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))

	186

	187

	188 def _ExtendDeletionIfElementIsInList(contents, offset):

	189 """Extends the range of a deletion if the deleted element was part of a list.

	190

	191 This rewriter helper makes it eay refactoring tools to remove elements from a

	192 list. Even if a matcher callback knows that it is removing an element from a

	193 list, it may not have enough information to accurately remove the list

	194 element; for example, another matcher callback may end up removing an adjacent

	195 list element, or all the list elements may end up being removed.

	196

	197 With this helper, refactoring tools can simply remove the list element and not

	198 worry about having to include the comma in the replacement.

	199

	200 Args:

	201 contents: A bytearray with the deletion already applied.

	202 offset: The offset in the bytearray where the deleted range used to be.

	203 """

	204 may_be_first_initializer = False

	205 left_trim_count = 0

	206 for byte in reversed(contents[:offset]):

	207 left_trim_count += 1

	208 if byte in _WHITESPACE_BYTES:

	209 continue

	210 if byte == 0x2c: # Comma

	211 # A preceding comma means that this is not the first element of a list.

	212 # Extend the deletion leftwards to include the comma.

	213 del contents[offset - left_trim_count:offset]

	214 return

	215 if byte == 0x3a: # Colon

	216 # A preceding colon signals that this may be the first element of an

	217 # initializer list.

	218 may_be_first_initializer = True

	219 break

	220 # Break the loop on all other characters. This is either:

	221 # - not a list element.

	222 # - the only element left in the list.

	223 # - the first element in the list.

	224 break

	225

	226 right_trim_count = 0

	227 for byte in contents[offset:]:

	228 right_trim_count += 1

	229 if byte in _WHITESPACE_BYTES:

	230 continue

	231 if byte == 0x2c: # Comma

	232 # Removing the first element of a list, so extend the deletion rightwards

	233 # to include the trailing comma.

	234 del contents[offset:offset + right_trim_count]

	235 return

	236 if may_be_first_initializer and byte == 0x7b: # Left brace

	237 # Removing the only initializer left, so extend the deletion leftwards to

	238 # include the preceding colon.

	239 del contents[offset - left_trim_count:offset]

	240 return

	241 # Break the loop on all other characters. No special handling is required at

	242 # this point.

	243 break

	244

	245

	246 def main(argv):

	247 if len(argv) < 2:

	248 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'

	249 print ' <clang tool> is the clang tool that should be run.'

	250 print ' <compile db> is the directory that contains the compile database'

	251 print ' <path 1> <path2> ... can be used to filter what files are edited'

	252 sys.exit(1)

	253

	254 # TODO(dcheng): Assert that we're running from chromium/src.

	255 filenames = frozenset(_GetFilesFromGit(argv[2:]))

	256 # Filter out files that aren't C/C++/Obj-C/Obj-C++.

	257 extensions = frozenset(('.c', '.cc', '.m', '.mm'))

	258 dispatcher = _CompilerDispatcher(argv[0], argv[1],

	259 [f for f in filenames

	260 if os.path.splitext(f)[1] in extensions])

	261 dispatcher.Run()

	262 # Filter out edits to files that aren't in the git repository, since it's not

	263 # useful to modify files that aren't under source control--typically, these

	264 # are generated files or files in a git submodule that's not part of Chromium.

	265 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()

	266 if k in filenames})

	267 # TODO(dcheng): Consider clang-formatting the result to avoid egregious style

	268 # violations.

	269

	270

	271 if __name__ == '__main__':

	272 sys.exit(main(sys.argv[1:]))

OLD	NEW

« no previous file with comments | « tools/clang/empty_string/Makefile ('k') | tools/clang/scripts/update.sh » ('j') | tools/clang/scripts/update.sh » ('J')