Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: tools/clang/scripts/run_tool.py

Issue 12746010: Implement clang tool that converts std::string("") to std::string(). (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: More script cleanup Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Wrapper script to help run clang tools across Chromium code.
7
8 The clang tool implementation doesn't take advantage of multiple cores, and if
9 it fails mysteriously in the middle, all the generated replacements will be
10 lost.
11
12 Unfortunately, if the work is simply sharded across multiple cores by running
13 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
14 the same time. To work around that, clang tools that are run using this tool
15 should output edits to stdout in the following format:
16 ==== BEGIN EDITS ====
17 r:<file path>:<offset>:<length>:<replacement text>
18 r:<file path>:<offset>:<length>:<replacement text>
19 ...etc...
20 ==== END EDITS ====
21
22 Any generated edits are applied once the clang tool has finished running
23 across Chromium, regardless of whether some instances failed or not.
24 """
25
26 import collections
27 import functools
28 import multiprocessing
29 import os.path
30 import subprocess
31 import sys
32
33
34 Edit = collections.namedtuple(
35 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
36
37
38 def _GetFilesFromGit(paths = None):
39 """Gets the list of files in the git repository.
40
41 Args:
42 paths: Prefix filter for the returned paths. May contain multiple entries.
43 """
44 args = ['git', 'ls-files']
45 if paths:
46 args.extend(paths)
47 command = subprocess.Popen(args, stdout=subprocess.PIPE)
48 output, _ = command.communicate()
49 return output.splitlines()
50
51
52 def _ExtractEditsFromStdout(stdout):
53 """Extracts generated list of edits from the tool's stdout.
54
55 The expected format is documented at the top of this file.
56
57 Args:
58 stdout: The stdout from running the clang tool.
59
60 Returns:
61 A dictionary mapping filenames to the associated edits.
62 """
63 lines = stdout.splitlines()
64 start_index = lines.index('==== BEGIN EDITS ====')
65 end_index = lines.index('==== END EDITS ====')
66 edits = collections.defaultdict(list)
67 for line in lines[start_index + 1:end_index]:
68 try:
69 edit_type, path, offset, length, replacement = line.split(':', 4)
70 # TODO(dcheng): [6:] is a horrible hack to trim off ../../ and is fragile.
71 edits[path[6:]].append(
72 Edit(edit_type, int(offset), int(length), replacement))
73 except ValueError:
74 print 'Unable to parse edit: %s' % line
75 return edits
76
77
78 def _ExecuteTool(toolname, build_directory, filename):
79 """Executes the tool.
80
81 This is defined outside the class so it can be pickled for the multiprocessing
82 module.
83
84 Args:
85 toolname: Path to the tool to execute.
86 build_directory: Directory that contains the compile database.
87 filename: The file to run the tool over.
88
89 Returns:
90 A dictionary that must contain the key "status" and a boolean value
91 associated with it.
92
93 If status is True, then the generated edits are stored with the key "edits"
94 in the dictionary.
95
96 Otherwise, the filename and the output from stderr are associated with the
97 keys "filename" and "stderr" respectively.
98 """
99 command = subprocess.Popen((toolname, '-p', build_directory, filename),
100 stdout=subprocess.PIPE,
101 stderr=subprocess.PIPE)
102 stdout, stderr = command.communicate()
103 if command.returncode != 0:
104 return {'status': False, 'filename': filename, 'stderr': stderr}
105 else:
106 return {'status': True, 'edits': _ExtractEditsFromStdout(stdout)}
107
108
109 class _CompilerDispatcher(object):
110 """Multiprocessing controller for running clang tools in parallel."""
111
112 def __init__(self, toolname, build_directory, filenames):
113 """Initializer method.
114
115 Args:
116 toolname: Path to the tool to execute.
117 build_directory: Directory that contains the compile database.
118 filenames: The files to run the tool over.
119 """
120 self.__toolname = toolname
121 self.__build_directory = build_directory
122 self.__filenames = filenames
123 self.__success_count = 0
124 self.__failed_count = 0
125 self.__edits = collections.defaultdict(list)
126
127 @property
128 def edits(self):
129 return self.__edits
130
131 def Run(self):
132 """Does the grunt work."""
133 pool = multiprocessing.Pool()
134 result_iterator = pool.imap_unordered(
135 functools.partial(_ExecuteTool, self.__toolname,
136 self.__build_directory),
137 self.__filenames)
138 for result in result_iterator:
139 self.__ProcessResult(result)
140 sys.stdout.write('\n')
141 sys.stdout.flush()
142
143 def __ProcessResult(self, result):
144 """Handles result processing.
145
146 Args:
147 result: The result dictionary returned by _ExecuteTool.
148 """
149 if result['status']:
150 self.__success_count += 1
151 for k, v in result['edits'].iteritems():
152 self.__edits[k].extend(v)
153 else:
154 self.__failed_count += 1
155 sys.stdout.write('\nFailed to process %s\n' % result['filename'])
156 sys.stdout.write(result['stderr'])
157 sys.stdout.write('\n')
158 percentage = (
159 float(self.__success_count + self.__failed_count) /
160 len(self.__filenames)) * 100
161 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
162 self.__success_count, self.__failed_count, percentage))
163 sys.stdout.flush()
164
165
166 def _ApplyEdits(edits):
167 """Apply the generated edits.
168
169 Args:
170 edits: A dict mapping filenames to Edit instances that apply to that file.
171 """
172 edit_count = 0
173 for k, v in edits.iteritems():
174 # Sort the edits and iterate through them in reverse order. Sorting allows
175 # duplicate edits to be quickly skipped, while reversing means that
176 # subsequent edits don't need to have their offsets updated with each edit
177 # applied.
178 v.sort()
179 last_edit = None
180 with open(k, 'rb+') as f:
181 contents = bytearray(f.read())
182 for edit in reversed(v):
183 if edit == last_edit:
184 continue
185 last_edit = edit
186 contents[edit.offset:edit.offset + edit.length] = edit.replacement
187 if not edit.replacement:
188 _ExtendDeletionIfElementIsInList(contents, edit.offset)
189 edit_count += 1
190 f.seek(0)
191 f.truncate()
192 f.write(contents)
193 print 'Applied %d edits to %d files' % (edit_count, len(edits))
194
195
196 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
197
198
199 def _ExtendDeletionIfElementIsInList(contents, offset):
200 """Extends the range of a deletion if the deleted element was part of a list.
201
202 This rewriter helper makes it easy for refactoring tools to remove elements
203 from a list. Even if a matcher callback knows that it is removing an element
204 from a list, it may not have enough information to accurately remove the list
205 element; for example, another matcher callback may end up removing an adjacent
206 list element, or all the list elements may end up being removed.
207
208 With this helper, refactoring tools can simply remove the list element and not
209 worry about having to include the comma in the replacement.
210
211 Args:
212 contents: A bytearray with the deletion already applied.
213 offset: The offset in the bytearray where the deleted range used to be.
214 """
215 may_be_first_initializer = False
216 left_trim_count = 0
217 for byte in reversed(contents[:offset]):
218 left_trim_count += 1
219 if byte in _WHITESPACE_BYTES:
220 continue
221 if byte == 0x2c: # Comma
222 # A preceding comma means that this is not the first element of a list.
223 # Extend the deletion leftwards to include the comma.
224 del contents[offset - left_trim_count:offset]
225 return
226 if byte == 0x3a: # Colon
227 # A preceding colon signals that this may be the first element of an
228 # initializer list.
229 may_be_first_initializer = True
230 break
231 # Break the loop on all other characters. This is either:
232 # - not a list element.
233 # - the only element left in the list.
234 # - the first element in the list.
235 break
236
237 right_trim_count = 0
238 for byte in contents[offset:]:
239 right_trim_count += 1
240 if byte in _WHITESPACE_BYTES:
241 continue
242 if byte == 0x2c: # Comma
243 # Removing the first element of a list, so extend the deletion rightwards
244 # to include the trailing comma.
245 del contents[offset:offset + right_trim_count]
246 return
247 if may_be_first_initializer and byte == 0x7b: # Left brace
248 # Removing the only initializer left, so extend the deletion leftwards to
249 # include the preceding colon.
250 del contents[offset - left_trim_count:offset]
251 return
252 # Break the loop on all other characters. No special handling is required at
253 # this point.
254 break
255
256
257 def main(argv):
258 if len(argv) < 2:
259 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
260 print ' <clang tool> is the clang tool that should be run.'
261 print ' <compile db> is the directory that contains the compile database'
262 print ' <path 1> <path2> ... can be used to filter what files are edited'
263 sys.exit(1)
264
265 # TODO(dcheng): Assert that we're running from chromium/src.
266 filenames = frozenset(_GetFilesFromGit(argv[2:]))
267 # Filter out files that aren't C/C++/Obj-C/Obj-C++.
268 extensions = frozenset(('.c', '.cc', '.m', '.mm'))
269 dispatcher = _CompilerDispatcher(argv[0], argv[1],
270 [f for f in filenames
271 if os.path.splitext(f)[1] in extensions])
272 dispatcher.Run()
273 # Filter out edits to files that aren't in the git repository, since it's not
274 # useful to modify files that aren't under source control--typically, these
275 # are generated files or files in a git submodule that's not part of Chromium.
276 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
277 if k in filenames})
278 # TODO(dcheng): Consider clang-formatting the result to avoid egregious style
279 # violations.
280
281
282 if __name__ == '__main__':
283 sys.exit(main(sys.argv[1:]))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698