Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Documentation Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2016 the V8 project authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """
tandrii(chromium) 2016/03/07 15:10:22 nit: afair, first line of doc shouldn't be empty,
Michael Achenbach 2016/03/07 16:07:20 Done.
7 Script to transform and merge sancov files into human readable json-format.
8
9 The script supports two actions:
10 all: Writes a json file with all instrumented lines of all executables.
11 merge: Merges sancov files with coverage output into an existing json file.
12
13 The json data is structured as follows:
14 {
15 "version": 1,
16 "tests": ["executable1", "executable2", ...],
17 "files": {
18 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
19 "file2": [...],
20 ...
21 }
22 }
23
24 The executables are sorted and determine the test bit mask. Their index+1 is
25 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
26 executable1 and executable3 will have bit_mask == 5.
27
28 The line-number-bit_mask pairs are sorted by line number and don't contain
29 duplicates.
30
31 The sancov tool is expected to be in the llvm compiler-rt third-party
32 directory. It's not checked out by default and must be added as a custom deps:
33 'v8/third_party/llvm/projects/compiler-rt':
34 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
35 """
36
37 import argparse
38 import json
39 from multiprocessing import Pool, cpu_count
kjellander_chromium 2016/03/04 14:35:22 nit: move down the from-import: "Imports should be
Michael Achenbach 2016/03/04 15:00:25 Done.
40 import os
41 import re
42 import subprocess
43 import sys
44
45
46 # Files to exclude from coverage. Dropping their data early adds more speed.
47 # The contained cc files are already excluded from instrumentation, but inlined
48 # data is referenced through v8's object files.
49 EXCLUSIONS = [
50 'buildtools',
51 'src/third_party',
52 'third_party',
53 'test',
54 'testing',
55 ]
56
57 # V8 checkout directory.
58 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
59 os.path.abspath(__file__))))
60
61 # Executable location. TODO(machenbach): Only release is supported for now.
62 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
63
64 # Path prefix added by the llvm symbolizer including trailing slash.
65 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
kjellander_chromium 2016/03/04 14:35:22 it seems more readable to append os.sep than joini
Michael Achenbach 2016/03/08 10:05:46 I leave it as it is now - I think joining os.sep i
66 OUTPUT_PATH_OFFSET = len(OUTPUT_PATH_PREFIX)
67
68 # The sancov tool location.
69 SANCOV_TOOL = os.path.join(
70 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
71 'lib', 'sanitizer_common', 'scripts', 'sancov.py')
72
73 # Simple script to sanitize the PCs from objdump.
74 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')
75
76 # The llvm symbolizer location.
77 SYMBOLIZER = os.path.join(
78 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
79 'llvm-symbolizer')
80
81 # Number of cpus.
82 CPUS = cpu_count()
83
84 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the
85 # executable name in group 1.
86 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
87
88
89 def executables():
90 """Iterates over executable files in the build directory."""
91 for f in os.listdir(BUILD_DIR):
92 file_path = os.path.join(BUILD_DIR, f)
93 if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
94 yield file_path
95
96
97 def process_symbolizer_output(output):
98 """Post-process llvm symbolizer output.
99
100 Excludes files outside the v8 checkout or given in exclusion list above
101 from further processing. Drops the character index in each line.
102
103 Returns: A mapping of file names to lists of lines. The file names have
104 relative paths to the v8 base directory. The lists of lines
105 don't contain duplicate lines and are sorted.
106 """
107 # Drop path offset when iterating lines. The path is redundant and takes
108 # too much space. Drop files outside that path, e.g. generated files in
109 # the build dir and absolute paths to c++ library headers.
110 def iter_lines():
111 for line in output.strip().splitlines():
112 if line.startswith(OUTPUT_PATH_PREFIX):
113 yield line[OUTPUT_PATH_OFFSET:]
114
115 # Map file names to sets of instrumented line numbers.
116 file_map = {}
117 for line in iter_lines():
118 # Drop character number, we only care for line numbers. Each line has the
119 # form: <file name>:<line number>:<character number>.
120 file_name, number, _ = line.split(':')
121 file_map.setdefault(file_name, set([])).add(int(number))
122
123 # Remove exclusion patterns from file map. It's cheaper to do it after the
124 # mapping, as there are few excluded files and we don't want to do this
125 # check for numerous lines in ordinary files.
126 def keep(file_name):
127 for e in EXCLUSIONS:
128 if file_name.startswith(e):
129 return False
130 return True
131
132 # Return in serializable form and filter.
133 return {k:sorted(list(file_map[k])) for k in file_map if keep(k)}
134
135
136 def get_instrumented_lines(executable):
137 """Return the instrumented lines of an executable.
138
139 Called trough multiprocessing pool.
140
141 Returns: Post-processed llvm output as returned by process_symbolizer_output.
142 """
143 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex
144 # numbers. The results are piped into the llvm symbolizer, which outputs for
145 # each PC: <file name with abs path>:<line number>:<character number>.
146 # We don't call the sancov tool to get more speed.
147 process = subprocess.Popen(
148 'objdump -d %s | '
149 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
150 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
151 'grep \'^\s\+[0-9a-f]\+\' -o | '
152 '%s | '
153 '%s --obj %s -functions=none' %
154 (executable, SANITIZE_PCS, SYMBOLIZER, executable),
155 stdout=subprocess.PIPE,
156 stderr=subprocess.PIPE,
157 stdin=subprocess.PIPE,
158 cwd=BASE_DIR,
159 shell=True,
160 )
161 output, _ = process.communicate()
162 return process_symbolizer_output(output)
163
164
165 def merge_instrumented_line_results(exe_list, results):
166 """Merge multiprocessing results for all instrumented lines.
167
168 Args:
169 exe_list: List of all executable names with absolute paths.
170 results: List of results as returned by get_instrumented_lines.
171
172 Returns: Dict to be used as json data as specified on the top of this page.
173 The dictionary contains all instrumented lines of all files
174 referenced by all executables.
175 """
176 def merge_files(x, y):
177 for file_name, lines in y.iteritems():
178 x.setdefault(file_name, set([])).update(lines)
179 return x
180 result = reduce(merge_files, results, {})
181
182 # Return data as file->lines mapping. The lines are saved as lists
183 # with (line number, test bits (as int)). The test bits are initialized with
184 # 0, meaning instrumented, but no coverage.
185 # The order of the test bits is given with key 'tests'. For now, these are
186 # the executable names. We use a _list_ with two items instead of a tuple to
187 # ease merging by allowing mutation of the second item.
188 return {
189 'version': 1,
190 'tests': sorted(map(os.path.basename, exe_list)),
191 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result},
192 }
193
194
195 def write_instrumented(options):
196 """Implements the 'all' action of this tool."""
197 exe_list = list(executables())
198 pool = Pool(CPUS)
199 try:
200 results = pool.imap_unordered(get_instrumented_lines, exe_list)
201 finally:
202 pool.close()
203
204 # Merge muliprocessing results and prepare output data.
kjellander_chromium 2016/03/04 14:35:22 multiprocessing.
Michael Achenbach 2016/03/04 15:00:25 Done.
205 data = merge_instrumented_line_results(results)
206
207 # Write json output.
208 with open(options.json_output, 'w') as f:
209 json.dump(data, f, sort_keys=True)
210
211
212 def get_covered_lines(args):
213 """Return the covered lines of an executable.
214
215 Called trough multiprocessing pool. The args are expected to unpack to:
216 cov_dir: Folder with sancov files merged by sancov_merger.py.
217 executable: The executable that was called to produce the given coverage
218 data.
219 sancov_file: The merged sancov file with coverage data.
220
221 Returns: A tuple of post-processed llvm output as returned by
222 process_symbolizer_output and the executable name.
223 """
224 cov_dir, executable, sancov_file = args
225
226 # Let the sancov tool print the covered PCs and pipe them through the llvm
227 # symbolizer.
228 process = subprocess.Popen(
229 '%s print %s 2> /dev/null | '
230 '%s --obj %s -functions=none' %
231 (SANCOV_TOOL,
232 os.path.join(cov_dir, sancov_file),
233 SYMBOLIZER,
234 os.path.join(BUILD_DIR, executable)),
235 stdout=subprocess.PIPE,
236 stderr=subprocess.PIPE,
237 stdin=subprocess.PIPE,
238 cwd=BASE_DIR,
239 shell=True,
240 )
241 output, _ = process.communicate()
242 return process_symbolizer_output(output), executable
243
244
245 def merge_covered_line_results(data, results):
246 """Merge multiprocessing results for covered lines.
247
248 The data is mutated, the results are merged into it in place.
249
250 Args:
251 data: Existing coverage data from json file containing all instrumented
252 lines.
253 results: List of results as returned by get_covered_lines.
254 """
255
256 # List of executables and mapping to the test bit mask.
257 exe_list = data['tests']
258 assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
259 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
260
261 def merge_lines(old_lines, new_lines, mask):
262 """Merge the coverage data of a list of lines.
263
264 Args:
265 old_lines: Lines as list of pairs with line number and test bit mask.
266 The new lines will be merged into the list in place.
267 new_lines: List of new (covered) lines (sorted).
268 mask: The bit to be set for covered lines. The bit index is the test
269 index of the executable that covered the line.
270 """
271 i = 0
272 # Iterate over old and new lines, both are sorted.
273 for l in new_lines:
274 while old_lines[i][0] < l:
275 # Forward instrumented lines not present in this coverage data.
276 i += 1
277 # TODO: Add more context to the assert message.
278 assert i < len(old_lines), 'Covered line %d not in input file.' % l
279 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
280
281 # Add coverage information to the line.
282 old_lines[i][1] |= mask
283
284 def merge_files(data, result):
285 """Merge result into data.
286
287 The data is mutated in place.
288
289 Args:
290 data: Merged coverage data from the previous reduce step.
291 result: New result to be merged in. The type is as returned by
292 get_covered_lines.
293 """
294 file_map, executable = result
295 files = data['files']
296 for file_name, lines in file_map.iteritems():
297 merge_lines(files[file_name], lines, test_bit_masks[executable])
298 return data
299
300 reduce(merge_files, results, data)
301
302
303 def merge(options):
304 """Implements the 'merge' action of this tool."""
305
306 # Check if folder with coverage output exists.
307 assert (os.path.exists(options.coverage_dir) and
308 os.path.isdir(options.coverage_dir))
309
310 # Inputs for multiprocessing. List of tuples of:
311 # Coverage dir, executable name, sancov file name.
312 inputs = []
313 for f in os.listdir(options.coverage_dir):
314 match = SANCOV_FILE_RE.match(f)
315 if match:
316 inputs.append((options.coverage_dir, match.group(1), f))
317
318 # Post-process covered lines in parallel.
319 pool = Pool(CPUS)
320 try:
321 results = pool.imap_unordered(get_covered_lines, inputs)
322 finally:
323 pool.close()
324
325 # Load existing json data file for merging the results.
326 with open(options.json_input, 'r') as f:
327 data = json.load(f)
328
329 # Merge muliprocessing results. Mutates data.
330 merge_covered_line_results(data, results)
331
332 # Write merged results to file.
333 with open(options.json_output, 'w') as f:
334 json.dump(data, f, sort_keys=True)
335
336
337 def main():
338 parser = argparse.ArgumentParser()
339 parser.add_argument('--coverage-dir',
340 help='Path to the sancov output files.')
341 parser.add_argument('--json-input',
342 help='Path to an existing json file with coverage data.')
343 parser.add_argument('--json-output', required=True,
344 help='Path to a file to write json output to.')
345 parser.add_argument('action',
346 help='Action to perform. One of all|merge.')
347
348 options = parser.parse_args()
349 if options.action.lower() == 'all':
350 write_instrumented(options)
351 elif options.action.lower() == 'merge':
352 if not options.coverage_dir:
353 print '--coverage-dir is required'
354 return 1
355 if not options.json_input:
356 print '--json-input is required'
357 return 1
358 merge(options)
359 else:
360 print 'Unsupported action.'
361 return 1
362 return 0
363
364
365 if __name__ == '__main__':
366 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698