OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright 2016 the V8 project authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """ | |
tandrii(chromium)
2016/03/07 15:10:22
nit: afair, first line of doc shouldn't be empty,
Michael Achenbach
2016/03/07 16:07:20
Done.
| |
7 Script to transform and merge sancov files into human readable json-format. | |
8 | |
9 The script supports two actions: | |
10 all: Writes a json file with all instrumented lines of all executables. | |
11 merge: Merges sancov files with coverage output into an existing json file. | |
12 | |
13 The json data is structured as follows: | |
14 { | |
15 "version": 1, | |
16 "tests": ["executable1", "executable2", ...], | |
17 "files": { | |
18 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...], | |
19 "file2": [...], | |
20 ... | |
21 } | |
22 } | |
23 | |
24 The executables are sorted and determine the test bit mask. Their index+1 is | |
25 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by | |
26 executable1 and executable3 will have bit_mask == 5. | |
27 | |
28 The line-number-bit_mask pairs are sorted by line number and don't contain | |
29 duplicates. | |
30 | |
31 The sancov tool is expected to be in the llvm compiler-rt third-party | |
32 directory. It's not checked out by default and must be added as a custom deps: | |
33 'v8/third_party/llvm/projects/compiler-rt': | |
34 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git' | |
35 """ | |
36 | |
37 import argparse | |
38 import json | |
39 from multiprocessing import Pool, cpu_count | |
kjellander_chromium
2016/03/04 14:35:22
nit: move down the from-import: "Imports should be
Michael Achenbach
2016/03/04 15:00:25
Done.
| |
40 import os | |
41 import re | |
42 import subprocess | |
43 import sys | |
44 | |
45 | |
46 # Files to exclude from coverage. Dropping their data early adds more speed. | |
47 # The contained cc files are already excluded from instrumentation, but inlined | |
48 # data is referenced through v8's object files. | |
49 EXCLUSIONS = [ | |
50 'buildtools', | |
51 'src/third_party', | |
52 'third_party', | |
53 'test', | |
54 'testing', | |
55 ] | |
56 | |
57 # V8 checkout directory. | |
58 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname( | |
59 os.path.abspath(__file__)))) | |
60 | |
61 # Executable location. TODO(machenbach): Only release is supported for now. | |
62 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release') | |
63 | |
64 # Path prefix added by the llvm symbolizer including trailing slash. | |
65 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '') | |
kjellander_chromium
2016/03/04 14:35:22
it seems more readable to append os.sep than joini
Michael Achenbach
2016/03/08 10:05:46
I leave it as it is now - I think joining os.sep i
| |
66 OUTPUT_PATH_OFFSET = len(OUTPUT_PATH_PREFIX) | |
67 | |
68 # The sancov tool location. | |
69 SANCOV_TOOL = os.path.join( | |
70 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt', | |
71 'lib', 'sanitizer_common', 'scripts', 'sancov.py') | |
72 | |
73 # Simple script to sanitize the PCs from objdump. | |
74 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py') | |
75 | |
76 # The llvm symbolizer location. | |
77 SYMBOLIZER = os.path.join( | |
78 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin', | |
79 'llvm-symbolizer') | |
80 | |
81 # Number of cpus. | |
82 CPUS = cpu_count() | |
83 | |
84 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the | |
85 # executable name in group 1. | |
86 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$') | |
87 | |
88 | |
89 def executables(): | |
90 """Iterates over executable files in the build directory.""" | |
91 for f in os.listdir(BUILD_DIR): | |
92 file_path = os.path.join(BUILD_DIR, f) | |
93 if os.path.isfile(file_path) and os.access(file_path, os.X_OK): | |
94 yield file_path | |
95 | |
96 | |
97 def process_symbolizer_output(output): | |
98 """Post-process llvm symbolizer output. | |
99 | |
100 Excludes files outside the v8 checkout or given in exclusion list above | |
101 from further processing. Drops the character index in each line. | |
102 | |
103 Returns: A mapping of file names to lists of lines. The file names have | |
104 relative paths to the v8 base directory. The lists of lines | |
105 don't contain duplicate lines and are sorted. | |
106 """ | |
107 # Drop path offset when iterating lines. The path is redundant and takes | |
108 # too much space. Drop files outside that path, e.g. generated files in | |
109 # the build dir and absolute paths to c++ library headers. | |
110 def iter_lines(): | |
111 for line in output.strip().splitlines(): | |
112 if line.startswith(OUTPUT_PATH_PREFIX): | |
113 yield line[OUTPUT_PATH_OFFSET:] | |
114 | |
115 # Map file names to sets of instrumented line numbers. | |
116 file_map = {} | |
117 for line in iter_lines(): | |
118 # Drop character number, we only care for line numbers. Each line has the | |
119 # form: <file name>:<line number>:<character number>. | |
120 file_name, number, _ = line.split(':') | |
121 file_map.setdefault(file_name, set([])).add(int(number)) | |
122 | |
123 # Remove exclusion patterns from file map. It's cheaper to do it after the | |
124 # mapping, as there are few excluded files and we don't want to do this | |
125 # check for numerous lines in ordinary files. | |
126 def keep(file_name): | |
127 for e in EXCLUSIONS: | |
128 if file_name.startswith(e): | |
129 return False | |
130 return True | |
131 | |
132 # Return in serializable form and filter. | |
133 return {k:sorted(list(file_map[k])) for k in file_map if keep(k)} | |
134 | |
135 | |
136 def get_instrumented_lines(executable): | |
137 """Return the instrumented lines of an executable. | |
138 | |
139 Called trough multiprocessing pool. | |
140 | |
141 Returns: Post-processed llvm output as returned by process_symbolizer_output. | |
142 """ | |
143 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex | |
144 # numbers. The results are piped into the llvm symbolizer, which outputs for | |
145 # each PC: <file name with abs path>:<line number>:<character number>. | |
146 # We don't call the sancov tool to get more speed. | |
147 process = subprocess.Popen( | |
148 'objdump -d %s | ' | |
149 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ ' | |
150 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | ' | |
151 'grep \'^\s\+[0-9a-f]\+\' -o | ' | |
152 '%s | ' | |
153 '%s --obj %s -functions=none' % | |
154 (executable, SANITIZE_PCS, SYMBOLIZER, executable), | |
155 stdout=subprocess.PIPE, | |
156 stderr=subprocess.PIPE, | |
157 stdin=subprocess.PIPE, | |
158 cwd=BASE_DIR, | |
159 shell=True, | |
160 ) | |
161 output, _ = process.communicate() | |
162 return process_symbolizer_output(output) | |
163 | |
164 | |
165 def merge_instrumented_line_results(exe_list, results): | |
166 """Merge multiprocessing results for all instrumented lines. | |
167 | |
168 Args: | |
169 exe_list: List of all executable names with absolute paths. | |
170 results: List of results as returned by get_instrumented_lines. | |
171 | |
172 Returns: Dict to be used as json data as specified on the top of this page. | |
173 The dictionary contains all instrumented lines of all files | |
174 referenced by all executables. | |
175 """ | |
176 def merge_files(x, y): | |
177 for file_name, lines in y.iteritems(): | |
178 x.setdefault(file_name, set([])).update(lines) | |
179 return x | |
180 result = reduce(merge_files, results, {}) | |
181 | |
182 # Return data as file->lines mapping. The lines are saved as lists | |
183 # with (line number, test bits (as int)). The test bits are initialized with | |
184 # 0, meaning instrumented, but no coverage. | |
185 # The order of the test bits is given with key 'tests'. For now, these are | |
186 # the executable names. We use a _list_ with two items instead of a tuple to | |
187 # ease merging by allowing mutation of the second item. | |
188 return { | |
189 'version': 1, | |
190 'tests': sorted(map(os.path.basename, exe_list)), | |
191 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result}, | |
192 } | |
193 | |
194 | |
195 def write_instrumented(options): | |
196 """Implements the 'all' action of this tool.""" | |
197 exe_list = list(executables()) | |
198 pool = Pool(CPUS) | |
199 try: | |
200 results = pool.imap_unordered(get_instrumented_lines, exe_list) | |
201 finally: | |
202 pool.close() | |
203 | |
204 # Merge muliprocessing results and prepare output data. | |
kjellander_chromium
2016/03/04 14:35:22
multiprocessing.
Michael Achenbach
2016/03/04 15:00:25
Done.
| |
205 data = merge_instrumented_line_results(results) | |
206 | |
207 # Write json output. | |
208 with open(options.json_output, 'w') as f: | |
209 json.dump(data, f, sort_keys=True) | |
210 | |
211 | |
212 def get_covered_lines(args): | |
213 """Return the covered lines of an executable. | |
214 | |
215 Called trough multiprocessing pool. The args are expected to unpack to: | |
216 cov_dir: Folder with sancov files merged by sancov_merger.py. | |
217 executable: The executable that was called to produce the given coverage | |
218 data. | |
219 sancov_file: The merged sancov file with coverage data. | |
220 | |
221 Returns: A tuple of post-processed llvm output as returned by | |
222 process_symbolizer_output and the executable name. | |
223 """ | |
224 cov_dir, executable, sancov_file = args | |
225 | |
226 # Let the sancov tool print the covered PCs and pipe them through the llvm | |
227 # symbolizer. | |
228 process = subprocess.Popen( | |
229 '%s print %s 2> /dev/null | ' | |
230 '%s --obj %s -functions=none' % | |
231 (SANCOV_TOOL, | |
232 os.path.join(cov_dir, sancov_file), | |
233 SYMBOLIZER, | |
234 os.path.join(BUILD_DIR, executable)), | |
235 stdout=subprocess.PIPE, | |
236 stderr=subprocess.PIPE, | |
237 stdin=subprocess.PIPE, | |
238 cwd=BASE_DIR, | |
239 shell=True, | |
240 ) | |
241 output, _ = process.communicate() | |
242 return process_symbolizer_output(output), executable | |
243 | |
244 | |
245 def merge_covered_line_results(data, results): | |
246 """Merge multiprocessing results for covered lines. | |
247 | |
248 The data is mutated, the results are merged into it in place. | |
249 | |
250 Args: | |
251 data: Existing coverage data from json file containing all instrumented | |
252 lines. | |
253 results: List of results as returned by get_covered_lines. | |
254 """ | |
255 | |
256 # List of executables and mapping to the test bit mask. | |
257 exe_list = data['tests'] | |
258 assert len(exe_list) <= 32, 'Max 32 different tests are supported.' | |
259 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)} | |
260 | |
261 def merge_lines(old_lines, new_lines, mask): | |
262 """Merge the coverage data of a list of lines. | |
263 | |
264 Args: | |
265 old_lines: Lines as list of pairs with line number and test bit mask. | |
266 The new lines will be merged into the list in place. | |
267 new_lines: List of new (covered) lines (sorted). | |
268 mask: The bit to be set for covered lines. The bit index is the test | |
269 index of the executable that covered the line. | |
270 """ | |
271 i = 0 | |
272 # Iterate over old and new lines, both are sorted. | |
273 for l in new_lines: | |
274 while old_lines[i][0] < l: | |
275 # Forward instrumented lines not present in this coverage data. | |
276 i += 1 | |
277 # TODO: Add more context to the assert message. | |
278 assert i < len(old_lines), 'Covered line %d not in input file.' % l | |
279 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l | |
280 | |
281 # Add coverage information to the line. | |
282 old_lines[i][1] |= mask | |
283 | |
284 def merge_files(data, result): | |
285 """Merge result into data. | |
286 | |
287 The data is mutated in place. | |
288 | |
289 Args: | |
290 data: Merged coverage data from the previous reduce step. | |
291 result: New result to be merged in. The type is as returned by | |
292 get_covered_lines. | |
293 """ | |
294 file_map, executable = result | |
295 files = data['files'] | |
296 for file_name, lines in file_map.iteritems(): | |
297 merge_lines(files[file_name], lines, test_bit_masks[executable]) | |
298 return data | |
299 | |
300 reduce(merge_files, results, data) | |
301 | |
302 | |
303 def merge(options): | |
304 """Implements the 'merge' action of this tool.""" | |
305 | |
306 # Check if folder with coverage output exists. | |
307 assert (os.path.exists(options.coverage_dir) and | |
308 os.path.isdir(options.coverage_dir)) | |
309 | |
310 # Inputs for multiprocessing. List of tuples of: | |
311 # Coverage dir, executable name, sancov file name. | |
312 inputs = [] | |
313 for f in os.listdir(options.coverage_dir): | |
314 match = SANCOV_FILE_RE.match(f) | |
315 if match: | |
316 inputs.append((options.coverage_dir, match.group(1), f)) | |
317 | |
318 # Post-process covered lines in parallel. | |
319 pool = Pool(CPUS) | |
320 try: | |
321 results = pool.imap_unordered(get_covered_lines, inputs) | |
322 finally: | |
323 pool.close() | |
324 | |
325 # Load existing json data file for merging the results. | |
326 with open(options.json_input, 'r') as f: | |
327 data = json.load(f) | |
328 | |
329 # Merge muliprocessing results. Mutates data. | |
330 merge_covered_line_results(data, results) | |
331 | |
332 # Write merged results to file. | |
333 with open(options.json_output, 'w') as f: | |
334 json.dump(data, f, sort_keys=True) | |
335 | |
336 | |
337 def main(): | |
338 parser = argparse.ArgumentParser() | |
339 parser.add_argument('--coverage-dir', | |
340 help='Path to the sancov output files.') | |
341 parser.add_argument('--json-input', | |
342 help='Path to an existing json file with coverage data.') | |
343 parser.add_argument('--json-output', required=True, | |
344 help='Path to a file to write json output to.') | |
345 parser.add_argument('action', | |
346 help='Action to perform. One of all|merge.') | |
347 | |
348 options = parser.parse_args() | |
349 if options.action.lower() == 'all': | |
350 write_instrumented(options) | |
351 elif options.action.lower() == 'merge': | |
352 if not options.coverage_dir: | |
353 print '--coverage-dir is required' | |
354 return 1 | |
355 if not options.json_input: | |
356 print '--json-input is required' | |
357 return 1 | |
358 merge(options) | |
359 else: | |
360 print 'Unsupported action.' | |
361 return 1 | |
362 return 0 | |
363 | |
364 | |
365 if __name__ == '__main__': | |
366 sys.exit(main()) | |
OLD | NEW |