Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(344)

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Review Andrii Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Script to transform and merge sancov files into human readable json-format.
7
8 The script supports two actions:
9 all: Writes a json file with all instrumented lines of all executables.
10 merge: Merges sancov files with coverage output into an existing json file.
11
12 The json data is structured as follows:
13 {
14 "version": 1,
15 "tests": ["executable1", "executable2", ...],
16 "files": {
17 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
18 "file2": [...],
19 ...
20 }
21 }
22
23 The executables are sorted and determine the test bit mask. Their index+1 is
24 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
25 executable1 and executable3 will have bit_mask == 5.
26
27 The line-number-bit_mask pairs are sorted by line number and don't contain
28 duplicates.
29
30 The sancov tool is expected to be in the llvm compiler-rt third-party
31 directory. It's not checked out by default and must be added as a custom deps:
32 'v8/third_party/llvm/projects/compiler-rt':
33 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
34 """
35
36 import argparse
37 import json
38 import os
39 import re
40 import subprocess
41 import sys
42
43 from multiprocessing import Pool, cpu_count
44
45
46 # Files to exclude from coverage. Dropping their data early adds more speed.
47 # The contained cc files are already excluded from instrumentation, but inlined
48 # data is referenced through v8's object files.
49 EXCLUSIONS = [
50 'buildtools',
51 'src/third_party',
52 'third_party',
53 'test',
54 'testing',
55 ]
56
57 # V8 checkout directory.
58 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
59 os.path.abspath(__file__))))
60
61 # Executable location. TODO(machenbach): Only release is supported for now.
62 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
63
64 # Path prefix added by the llvm symbolizer including trailing slash.
65 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
66
67 # The sancov tool location.
68 SANCOV_TOOL = os.path.join(
69 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
70 'lib', 'sanitizer_common', 'scripts', 'sancov.py')
71
72 # Simple script to sanitize the PCs from objdump.
73 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')
74
75 # The llvm symbolizer location.
76 SYMBOLIZER = os.path.join(
77 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
78 'llvm-symbolizer')
79
80 # Number of cpus.
81 CPUS = cpu_count()
82
83 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the
84 # executable name in group 1.
85 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
86
87
88 def executables():
89 """Iterates over executable files in the build directory."""
90 for f in os.listdir(BUILD_DIR):
91 file_path = os.path.join(BUILD_DIR, f)
92 if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
93 yield file_path
94
95
96 def process_symbolizer_output(output):
97 """Post-process llvm symbolizer output.
98
99 Excludes files outside the v8 checkout or given in exclusion list above
100 from further processing. Drops the character index in each line.
101
102 Returns: A mapping of file names to lists of line numbers. The file names
103 have relative paths to the v8 base directory. The lists of line
104 numbers don't contain duplicate lines and are sorted.
105 """
106 # Drop path prefix when iterating lines. The path is redundant and takes
107 # too much space. Drop files outside that path, e.g. generated files in
108 # the build dir and absolute paths to c++ library headers.
109 def iter_lines():
110 for line in output.strip().splitlines():
111 if line.startswith(OUTPUT_PATH_PREFIX):
112 yield line[len(OUTPUT_PATH_PREFIX):]
113
114 # Map file names to sets of instrumented line numbers.
115 file_map = {}
116 for line in iter_lines():
117 # Drop character number, we only care for line numbers. Each line has the
118 # form: <file name>:<line number>:<character number>.
119 file_name, number, _ = line.split(':')
120 file_map.setdefault(file_name, set([])).add(int(number))
121
122 # Remove exclusion patterns from file map. It's cheaper to do it after the
123 # mapping, as there are few excluded files and we don't want to do this
124 # check for numerous lines in ordinary files.
125 def keep(file_name):
126 for e in EXCLUSIONS:
127 if file_name.startswith(e):
128 return False
129 return True
130
131 # Return in serializable form and filter.
132 return {k: sorted(file_map[k]) for k in file_map if keep(k)}
133
134
135 def get_instrumented_lines(executable):
136 """Return the instrumented lines of an executable.
137
138 Called trough multiprocessing pool.
139
140 Returns: Post-processed llvm output as returned by process_symbolizer_output.
141 """
142 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex
143 # numbers. The results are piped into the llvm symbolizer, which outputs for
144 # each PC: <file name with abs path>:<line number>:<character number>.
145 # We don't call the sancov tool to get more speed.
146 process = subprocess.Popen(
147 'objdump -d %s | '
148 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
149 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
150 'grep \'^\s\+[0-9a-f]\+\' -o | '
151 '%s | '
152 '%s --obj %s -functions=none' %
153 (executable, SANITIZE_PCS, SYMBOLIZER, executable),
154 stdout=subprocess.PIPE,
155 stderr=subprocess.PIPE,
156 stdin=subprocess.PIPE,
157 cwd=BASE_DIR,
158 shell=True,
159 )
160 output, _ = process.communicate()
161 return process_symbolizer_output(output)
162
163
164 def merge_instrumented_line_results(exe_list, results):
165 """Merge multiprocessing results for all instrumented lines.
166
167 Args:
168 exe_list: List of all executable names with absolute paths.
169 results: List of results as returned by get_instrumented_lines.
170
171 Returns: Dict to be used as json data as specified on the top of this page.
172 The dictionary contains all instrumented lines of all files
173 referenced by all executables.
174 """
175 def merge_files(x, y):
176 for file_name, lines in y.iteritems():
177 x.setdefault(file_name, set([])).update(lines)
178 return x
179 result = reduce(merge_files, results, {})
180
181 # Return data as file->lines mapping. The lines are saved as lists
182 # with (line number, test bits (as int)). The test bits are initialized with
183 # 0, meaning instrumented, but no coverage.
184 # The order of the test bits is given with key 'tests'. For now, these are
185 # the executable names. We use a _list_ with two items instead of a tuple to
186 # ease merging by allowing mutation of the second item.
187 return {
188 'version': 1,
189 'tests': sorted(map(os.path.basename, exe_list)),
190 'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},
191 }
192
193
194 def write_instrumented(options):
195 """Implements the 'all' action of this tool."""
196 exe_list = list(executables())
197 pool = Pool(CPUS)
198 try:
199 results = pool.imap_unordered(get_instrumented_lines, exe_list)
200 finally:
201 pool.close()
202
203 # Merge multiprocessing results and prepare output data.
204 data = merge_instrumented_line_results(results)
205
206 # Write json output.
207 with open(options.json_output, 'w') as f:
208 json.dump(data, f, sort_keys=True)
209
210
211 def get_covered_lines(args):
212 """Return the covered lines of an executable.
213
214 Called trough multiprocessing pool. The args are expected to unpack to:
215 cov_dir: Folder with sancov files merged by sancov_merger.py.
216 executable: The executable that was called to produce the given coverage
217 data.
218 sancov_file: The merged sancov file with coverage data.
219
220 Returns: A tuple of post-processed llvm output as returned by
221 process_symbolizer_output and the executable name.
222 """
223 cov_dir, executable, sancov_file = args
224
225 # Let the sancov tool print the covered PCs and pipe them through the llvm
226 # symbolizer.
227 process = subprocess.Popen(
228 '%s print %s 2> /dev/null | '
229 '%s --obj %s -functions=none' %
230 (SANCOV_TOOL,
231 os.path.join(cov_dir, sancov_file),
232 SYMBOLIZER,
233 os.path.join(BUILD_DIR, executable)),
234 stdout=subprocess.PIPE,
235 stderr=subprocess.PIPE,
236 stdin=subprocess.PIPE,
237 cwd=BASE_DIR,
238 shell=True,
239 )
240 output, _ = process.communicate()
241 return process_symbolizer_output(output), executable
242
243
244 def merge_covered_line_results(data, results):
245 """Merge multiprocessing results for covered lines.
246
247 The data is mutated, the results are merged into it in place.
248
249 Args:
250 data: Existing coverage data from json file containing all instrumented
251 lines.
252 results: List of results as returned by get_covered_lines.
253 """
254
255 # List of executables and mapping to the test bit mask.
256 exe_list = data['tests']
257 assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
kjellander_chromium 2016/03/08 04:48:33 Maybe this limitation should be documented more vi
Michael Achenbach 2016/03/08 10:05:46 I'll add a comment here and in the top-level doc.
258 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
259
260 def merge_lines(old_lines, new_lines, mask):
261 """Merge the coverage data of a list of lines.
262
263 Args:
264 old_lines: Lines as list of pairs with line number and test bit mask.
265 The new lines will be merged into the list in place.
266 new_lines: List of new (covered) lines (sorted).
267 mask: The bit to be set for covered lines. The bit index is the test
268 index of the executable that covered the line.
269 """
270 i = 0
271 # Iterate over old and new lines, both are sorted.
272 for l in new_lines:
273 while old_lines[i][0] < l:
274 # Forward instrumented lines not present in this coverage data.
275 i += 1
276 # TODO: Add more context to the assert message.
277 assert i < len(old_lines), 'Covered line %d not in input file.' % l
278 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
279
280 # Add coverage information to the line.
281 old_lines[i][1] |= mask
282
283 def merge_files(data, result):
284 """Merge result into data.
285
286 The data is mutated in place.
287
288 Args:
289 data: Merged coverage data from the previous reduce step.
290 result: New result to be merged in. The type is as returned by
291 get_covered_lines.
292 """
293 file_map, executable = result
294 files = data['files']
295 for file_name, lines in file_map.iteritems():
296 merge_lines(files[file_name], lines, test_bit_masks[executable])
297 return data
298
299 reduce(merge_files, results, data)
300
301
302 def merge(options):
303 """Implements the 'merge' action of this tool."""
304
305 # Check if folder with coverage output exists.
306 assert (os.path.exists(options.coverage_dir) and
307 os.path.isdir(options.coverage_dir))
308
309 # Inputs for multiprocessing. List of tuples of:
310 # Coverage dir, executable name, sancov file name.
311 inputs = []
312 for f in os.listdir(options.coverage_dir):
313 match = SANCOV_FILE_RE.match(f)
314 if match:
315 inputs.append((options.coverage_dir, match.group(1), f))
316
317 # Post-process covered lines in parallel.
318 pool = Pool(CPUS)
319 try:
320 results = pool.imap_unordered(get_covered_lines, inputs)
321 finally:
322 pool.close()
323
324 # Load existing json data file for merging the results.
325 with open(options.json_input, 'r') as f:
326 data = json.load(f)
327
328 # Merge muliprocessing results. Mutates data.
329 merge_covered_line_results(data, results)
330
331 # Write merged results to file.
332 with open(options.json_output, 'w') as f:
333 json.dump(data, f, sort_keys=True)
334
335
336 def main():
337 parser = argparse.ArgumentParser()
338 parser.add_argument('--coverage-dir',
339 help='Path to the sancov output files.')
340 parser.add_argument('--json-input',
341 help='Path to an existing json file with coverage data.')
342 parser.add_argument('--json-output', required=True,
343 help='Path to a file to write json output to.')
344 parser.add_argument('action', choices=['all', 'merge'],
345 help='Action to perform.')
346
347 options = parser.parse_args()
348 if options.action.lower() == 'all':
349 write_instrumented(options)
350 elif options.action.lower() == 'merge':
351 if not options.coverage_dir:
352 print '--coverage-dir is required'
353 return 1
354 if not options.json_input:
355 print '--json-input is required'
356 return 1
357 merge(options)
358 return 0
359
360
361 if __name__ == '__main__':
362 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698