Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(72)

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Review kjellander Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """
7 Script to transform and merge sancov files into human readable json-format.
8
9 The script supports two actions:
10 all: Writes a json file with all instrumented lines of all executables.
11 merge: Merges sancov files with coverage output into an existing json file.
12
13 The json data is structured as follows:
14 {
15 "version": 1,
16 "tests": ["executable1", "executable2", ...],
17 "files": {
18 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
19 "file2": [...],
20 ...
21 }
22 }
23
24 The executables are sorted and determine the test bit mask. Their index+1 is
25 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
26 executable1 and executable3 will have bit_mask == 5.
27
28 The line-number-bit_mask pairs are sorted by line number and don't contain
29 duplicates.
30
31 The sancov tool is expected to be in the llvm compiler-rt third-party
32 directory. It's not checked out by default and must be added as a custom deps:
33 'v8/third_party/llvm/projects/compiler-rt':
34 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
35 """
36
37 import argparse
38 import json
39 import os
40 import re
41 import subprocess
42 import sys
43
44 from multiprocessing import Pool, cpu_count
45
46
47 # Files to exclude from coverage. Dropping their data early adds more speed.
48 # The contained cc files are already excluded from instrumentation, but inlined
49 # data is referenced through v8's object files.
50 EXCLUSIONS = [
51 'buildtools',
52 'src/third_party',
53 'third_party',
54 'test',
55 'testing',
56 ]
57
58 # V8 checkout directory.
59 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
60 os.path.abspath(__file__))))
61
62 # Executable location. TODO(machenbach): Only release is supported for now.
63 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
64
65 # Path prefix added by the llvm symbolizer including trailing slash.
66 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
67 OUTPUT_PATH_OFFSET = len(OUTPUT_PATH_PREFIX)
68
69 # The sancov tool location.
70 SANCOV_TOOL = os.path.join(
71 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
72 'lib', 'sanitizer_common', 'scripts', 'sancov.py')
73
74 # Simple script to sanitize the PCs from objdump.
75 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')
76
77 # The llvm symbolizer location.
78 SYMBOLIZER = os.path.join(
79 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
80 'llvm-symbolizer')
81
82 # Number of cpus.
83 CPUS = cpu_count()
84
85 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the
86 # executable name in group 1.
87 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
88
89
90 def executables():
91 """Iterates over executable files in the build directory."""
92 for f in os.listdir(BUILD_DIR):
93 file_path = os.path.join(BUILD_DIR, f)
94 if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
95 yield file_path
96
97
98 def process_symbolizer_output(output):
99 """Post-process llvm symbolizer output.
100
101 Excludes files outside the v8 checkout or given in exclusion list above
102 from further processing. Drops the character index in each line.
103
104 Returns: A mapping of file names to lists of lines. The file names have
tandrii(chromium) 2016/03/07 15:10:22 here and next line: s/lines/line numbers
Michael Achenbach 2016/03/07 16:07:21 Done.
105 relative paths to the v8 base directory. The lists of lines
106 don't contain duplicate lines and are sorted.
107 """
108 # Drop path offset when iterating lines. The path is redundant and takes
109 # too much space. Drop files outside that path, e.g. generated files in
110 # the build dir and absolute paths to c++ library headers.
111 def iter_lines():
112 for line in output.strip().splitlines():
113 if line.startswith(OUTPUT_PATH_PREFIX):
114 yield line[OUTPUT_PATH_OFFSET:]
tandrii(chromium) 2016/03/07 15:10:22 nit: i'd use s/OUTPUT_PATH_OFFSET/len(OUTPUT_PATH_
Michael Achenbach 2016/03/07 16:07:21 Done.
115
116 # Map file names to sets of instrumented line numbers.
117 file_map = {}
118 for line in iter_lines():
119 # Drop character number, we only care for line numbers. Each line has the
120 # form: <file name>:<line number>:<character number>.
121 file_name, number, _ = line.split(':')
122 file_map.setdefault(file_name, set([])).add(int(number))
123
124 # Remove exclusion patterns from file map. It's cheaper to do it after the
125 # mapping, as there are few excluded files and we don't want to do this
126 # check for numerous lines in ordinary files.
127 def keep(file_name):
128 for e in EXCLUSIONS:
129 if file_name.startswith(e):
130 return False
131 return True
132
133 # Return in serializable form and filter.
134 return {k:sorted(list(file_map[k])) for k in file_map if keep(k)}
tandrii(chromium) 2016/03/07 15:10:22 nit: space after : also, omit "list", sorted works
Michael Achenbach 2016/03/07 16:07:21 Done.
135
136
137 def get_instrumented_lines(executable):
138 """Return the instrumented lines of an executable.
139
140 Called trough multiprocessing pool.
141
142 Returns: Post-processed llvm output as returned by process_symbolizer_output.
143 """
144 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex
145 # numbers. The results are piped into the llvm symbolizer, which outputs for
146 # each PC: <file name with abs path>:<line number>:<character number>.
147 # We don't call the sancov tool to get more speed.
148 process = subprocess.Popen(
149 'objdump -d %s | '
150 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
151 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
152 'grep \'^\s\+[0-9a-f]\+\' -o | '
153 '%s | '
154 '%s --obj %s -functions=none' %
155 (executable, SANITIZE_PCS, SYMBOLIZER, executable),
156 stdout=subprocess.PIPE,
157 stderr=subprocess.PIPE,
158 stdin=subprocess.PIPE,
159 cwd=BASE_DIR,
160 shell=True,
161 )
162 output, _ = process.communicate()
163 return process_symbolizer_output(output)
164
165
166 def merge_instrumented_line_results(exe_list, results):
167 """Merge multiprocessing results for all instrumented lines.
168
169 Args:
170 exe_list: List of all executable names with absolute paths.
171 results: List of results as returned by get_instrumented_lines.
172
173 Returns: Dict to be used as json data as specified on the top of this page.
174 The dictionary contains all instrumented lines of all files
175 referenced by all executables.
176 """
177 def merge_files(x, y):
178 for file_name, lines in y.iteritems():
179 x.setdefault(file_name, set([])).update(lines)
180 return x
181 result = reduce(merge_files, results, {})
182
183 # Return data as file->lines mapping. The lines are saved as lists
184 # with (line number, test bits (as int)). The test bits are initialized with
185 # 0, meaning instrumented, but no coverage.
186 # The order of the test bits is given with key 'tests'. For now, these are
187 # the executable names. We use a _list_ with two items instead of a tuple to
188 # ease merging by allowing mutation of the second item.
189 return {
190 'version': 1,
191 'tests': sorted(map(os.path.basename, exe_list)),
192 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result},
tandrii(chromium) 2016/03/07 15:10:22 suggestion: s/k/f and s/x/l (at least that'
Michael Achenbach 2016/03/07 16:07:21 Done.
193 }
194
195
196 def write_instrumented(options):
197 """Implements the 'all' action of this tool."""
198 exe_list = list(executables())
199 pool = Pool(CPUS)
200 try:
201 results = pool.imap_unordered(get_instrumented_lines, exe_list)
202 finally:
203 pool.close()
204
205 # Merge multiprocessing results and prepare output data.
206 data = merge_instrumented_line_results(results)
207
208 # Write json output.
209 with open(options.json_output, 'w') as f:
210 json.dump(data, f, sort_keys=True)
211
212
213 def get_covered_lines(args):
214 """Return the covered lines of an executable.
215
216 Called trough multiprocessing pool. The args are expected to unpack to:
217 cov_dir: Folder with sancov files merged by sancov_merger.py.
218 executable: The executable that was called to produce the given coverage
219 data.
220 sancov_file: The merged sancov file with coverage data.
221
222 Returns: A tuple of post-processed llvm output as returned by
223 process_symbolizer_output and the executable name.
224 """
225 cov_dir, executable, sancov_file = args
226
227 # Let the sancov tool print the covered PCs and pipe them through the llvm
228 # symbolizer.
229 process = subprocess.Popen(
230 '%s print %s 2> /dev/null | '
231 '%s --obj %s -functions=none' %
232 (SANCOV_TOOL,
233 os.path.join(cov_dir, sancov_file),
234 SYMBOLIZER,
235 os.path.join(BUILD_DIR, executable)),
236 stdout=subprocess.PIPE,
237 stderr=subprocess.PIPE,
238 stdin=subprocess.PIPE,
239 cwd=BASE_DIR,
240 shell=True,
241 )
242 output, _ = process.communicate()
243 return process_symbolizer_output(output), executable
244
245
246 def merge_covered_line_results(data, results):
247 """Merge multiprocessing results for covered lines.
248
249 The data is mutated, the results are merged into it in place.
250
251 Args:
252 data: Existing coverage data from json file containing all instrumented
253 lines.
254 results: List of results as returned by get_covered_lines.
255 """
256
257 # List of executables and mapping to the test bit mask.
258 exe_list = data['tests']
259 assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
tandrii(chromium) 2016/03/07 15:10:22 why 32? ftr, python works just fine even with 67
Michael Achenbach 2016/03/07 16:07:21 ok - but not sure if there's a limit on the JS sid
260 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
261
262 def merge_lines(old_lines, new_lines, mask):
263 """Merge the coverage data of a list of lines.
264
265 Args:
266 old_lines: Lines as list of pairs with line number and test bit mask.
267 The new lines will be merged into the list in place.
268 new_lines: List of new (covered) lines (sorted).
269 mask: The bit to be set for covered lines. The bit index is the test
270 index of the executable that covered the line.
271 """
272 i = 0
tandrii(chromium) 2016/03/07 15:10:22 small suggestion: I personally avoid using index a
Michael Achenbach 2016/03/07 16:07:21 ok - somewhat more readable, but also more expensi
273 # Iterate over old and new lines, both are sorted.
274 for l in new_lines:
275 while old_lines[i][0] < l:
276 # Forward instrumented lines not present in this coverage data.
277 i += 1
278 # TODO: Add more context to the assert message.
279 assert i < len(old_lines), 'Covered line %d not in input file.' % l
280 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
281
282 # Add coverage information to the line.
283 old_lines[i][1] |= mask
284
285 def merge_files(data, result):
286 """Merge result into data.
287
288 The data is mutated in place.
289
290 Args:
291 data: Merged coverage data from the previous reduce step.
292 result: New result to be merged in. The type is as returned by
293 get_covered_lines.
294 """
295 file_map, executable = result
296 files = data['files']
297 for file_name, lines in file_map.iteritems():
298 merge_lines(files[file_name], lines, test_bit_masks[executable])
299 return data
300
301 reduce(merge_files, results, data)
302
303
304 def merge(options):
305 """Implements the 'merge' action of this tool."""
306
307 # Check if folder with coverage output exists.
308 assert (os.path.exists(options.coverage_dir) and
309 os.path.isdir(options.coverage_dir))
310
311 # Inputs for multiprocessing. List of tuples of:
312 # Coverage dir, executable name, sancov file name.
313 inputs = []
314 for f in os.listdir(options.coverage_dir):
315 match = SANCOV_FILE_RE.match(f)
316 if match:
317 inputs.append((options.coverage_dir, match.group(1), f))
318
319 # Post-process covered lines in parallel.
320 pool = Pool(CPUS)
321 try:
322 results = pool.imap_unordered(get_covered_lines, inputs)
323 finally:
324 pool.close()
325
326 # Load existing json data file for merging the results.
327 with open(options.json_input, 'r') as f:
328 data = json.load(f)
329
330 # Merge muliprocessing results. Mutates data.
331 merge_covered_line_results(data, results)
332
333 # Write merged results to file.
334 with open(options.json_output, 'w') as f:
335 json.dump(data, f, sort_keys=True)
336
337
338 def main():
339 parser = argparse.ArgumentParser()
340 parser.add_argument('--coverage-dir',
341 help='Path to the sancov output files.')
342 parser.add_argument('--json-input',
343 help='Path to an existing json file with coverage data.')
344 parser.add_argument('--json-output', required=True,
345 help='Path to a file to write json output to.')
346 parser.add_argument('action',
tandrii(chromium) 2016/03/07 15:10:22 add arg choices = ('all', 'merge') https://docs.
Michael Achenbach 2016/03/07 16:07:21 Done.
347 help='Action to perform. One of all|merge.')
348
349 options = parser.parse_args()
350 if options.action.lower() == 'all':
351 write_instrumented(options)
352 elif options.action.lower() == 'merge':
353 if not options.coverage_dir:
354 print '--coverage-dir is required'
355 return 1
356 if not options.json_input:
357 print '--json-input is required'
358 return 1
359 merge(options)
360 else:
361 print 'Unsupported action.'
362 return 1
363 return 0
364
365
366 if __name__ == '__main__':
367 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698