tools/sanitizers/sancov_formatter.py - Issue 1737263003: [coverage] Enable sanitizer coverage.

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Review kjellander Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """

	7 Script to transform and merge sancov files into human readable json-format.

	8

	9 The script supports two actions:

	10 all: Writes a json file with all instrumented lines of all executables.

	11 merge: Merges sancov files with coverage output into an existing json file.

	12

	13 The json data is structured as follows:

	14 {

	15 "version": 1,

	16 "tests": ["executable1", "executable2", ...],

	17 "files": {

	18 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],

	19 "file2": [...],

	20 ...

	21 }

	22 }

	23

	24 The executables are sorted and determine the test bit mask. Their index+1 is

	25 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by

	26 executable1 and executable3 will have bit_mask == 5.

	27

	28 The line-number-bit_mask pairs are sorted by line number and don't contain

	29 duplicates.

	30

	31 The sancov tool is expected to be in the llvm compiler-rt third-party

	32 directory. It's not checked out by default and must be added as a custom deps:

	33 'v8/third_party/llvm/projects/compiler-rt':

	34 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'

	35 """

	36

	37 import argparse

	38 import json

	39 import os

	40 import re

	41 import subprocess

	42 import sys

	43

	44 from multiprocessing import Pool, cpu_count

	45

	46

	47 # Files to exclude from coverage. Dropping their data early adds more speed.

	48 # The contained cc files are already excluded from instrumentation, but inlined

	49 # data is referenced through v8's object files.

	50 EXCLUSIONS = [

	51 'buildtools',

	52 'src/third_party',

	53 'third_party',

	54 'test',

	55 'testing',

	56 ]

	57

	58 # V8 checkout directory.

	59 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(

	60 os.path.abspath(__file__))))

	61

	62 # Executable location. TODO(machenbach): Only release is supported for now.

	63 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')

	64

	65 # Path prefix added by the llvm symbolizer including trailing slash.

	66 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')

	67 OUTPUT_PATH_OFFSET = len(OUTPUT_PATH_PREFIX)

	68

	69 # The sancov tool location.

	70 SANCOV_TOOL = os.path.join(

	71 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',

	72 'lib', 'sanitizer_common', 'scripts', 'sancov.py')

	73

	74 # Simple script to sanitize the PCs from objdump.

	75 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')

	76

	77 # The llvm symbolizer location.

	78 SYMBOLIZER = os.path.join(

	79 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',

	80 'llvm-symbolizer')

	81

	82 # Number of cpus.

	83 CPUS = cpu_count()

	84

	85 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the

	86 # executable name in group 1.

	87 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')

	88

	89

	90 def executables():

	91 """Iterates over executable files in the build directory."""

	92 for f in os.listdir(BUILD_DIR):

	93 file_path = os.path.join(BUILD_DIR, f)

	94 if os.path.isfile(file_path) and os.access(file_path, os.X_OK):

	95 yield file_path

	96

	97

	98 def process_symbolizer_output(output):

	99 """Post-process llvm symbolizer output.

	100

	101 Excludes files outside the v8 checkout or given in exclusion list above

	102 from further processing. Drops the character index in each line.

	103

	104 Returns: A mapping of file names to lists of lines. The file names have
	tandrii(chromium) 2016/03/07 15:10:22 here and next line: s/lines/line numbers here and next line: s/lines/line numbers Michael Achenbach 2016/03/07 16:07:21 Done. Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > here and next line: s/lines/line numbers Done.
	105 relative paths to the v8 base directory. The lists of lines

	106 don't contain duplicate lines and are sorted.

	107 """

	108 # Drop path offset when iterating lines. The path is redundant and takes

	109 # too much space. Drop files outside that path, e.g. generated files in

	110 # the build dir and absolute paths to c++ library headers.

	111 def iter_lines():

	112 for line in output.strip().splitlines():

	113 if line.startswith(OUTPUT_PATH_PREFIX):

	114 yield line[OUTPUT_PATH_OFFSET:]
	tandrii(chromium) 2016/03/07 15:10:22 nit: i'd use s/OUTPUT_PATH_OFFSET/len(OUTPUT_PATH_ nit: i'd use s/OUTPUT_PATH_OFFSET/len(OUTPUT_PATH_PREFIX) as it's easier to read. Michael Achenbach 2016/03/07 16:07:21 Done. Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > nit: i'd use s/OUTPUT_PATH_OFFSET/len(OUTPUT_PATH_PREFIX) > as it's easier to read. Done.
	115

	116 # Map file names to sets of instrumented line numbers.

	117 file_map = {}

	118 for line in iter_lines():

	119 # Drop character number, we only care for line numbers. Each line has the

	120 # form: <file name>:<line number>:<character number>.

	121 file_name, number, _ = line.split(':')

	122 file_map.setdefault(file_name, set([])).add(int(number))

	123

	124 # Remove exclusion patterns from file map. It's cheaper to do it after the

	125 # mapping, as there are few excluded files and we don't want to do this

	126 # check for numerous lines in ordinary files.

	127 def keep(file_name):

	128 for e in EXCLUSIONS:

	129 if file_name.startswith(e):

	130 return False

	131 return True

	132

	133 # Return in serializable form and filter.

	134 return {k:sorted(list(file_map[k])) for k in file_map if keep(k)}
	tandrii(chromium) 2016/03/07 15:10:22 nit: space after : also, omit "list", sorted works nit: space after : also, omit "list", sorted works on any iterables. Michael Achenbach 2016/03/07 16:07:21 Done. Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > nit: space after : > also, omit "list", sorted works on any iterables. Done.
	135

	136

	137 def get_instrumented_lines(executable):

	138 """Return the instrumented lines of an executable.

	139

	140 Called trough multiprocessing pool.

	141

	142 Returns: Post-processed llvm output as returned by process_symbolizer_output.

	143 """

	144 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex

	145 # numbers. The results are piped into the llvm symbolizer, which outputs for

	146 # each PC: <file name with abs path>:<line number>:<character number>.

	147 # We don't call the sancov tool to get more speed.

	148 process = subprocess.Popen(

	149 'objdump -d %s \| '

	150 'grep \'^\s\+[0-9a-f]\+:.*\scall$q\\|$\s\+[0-9a-f]\+ '

	151 '<__sanitizer_cov$_with_check\\|$$@plt\\|$>\' \| '

	152 'grep \'^\s\+[0-9a-f]\+\' -o \| '

	153 '%s \| '

	154 '%s --obj %s -functions=none' %

	155 (executable, SANITIZE_PCS, SYMBOLIZER, executable),

	156 stdout=subprocess.PIPE,

	157 stderr=subprocess.PIPE,

	158 stdin=subprocess.PIPE,

	159 cwd=BASE_DIR,

	160 shell=True,

	161 )

	162 output, _ = process.communicate()

	163 return process_symbolizer_output(output)

	164

	165

	166 def merge_instrumented_line_results(exe_list, results):

	167 """Merge multiprocessing results for all instrumented lines.

	168

	169 Args:

	170 exe_list: List of all executable names with absolute paths.

	171 results: List of results as returned by get_instrumented_lines.

	172

	173 Returns: Dict to be used as json data as specified on the top of this page.

	174 The dictionary contains all instrumented lines of all files

	175 referenced by all executables.

	176 """

	177 def merge_files(x, y):

	178 for file_name, lines in y.iteritems():

	179 x.setdefault(file_name, set([])).update(lines)

	180 return x

	181 result = reduce(merge_files, results, {})

	182

	183 # Return data as file->lines mapping. The lines are saved as lists

	184 # with (line number, test bits (as int)). The test bits are initialized with

	185 # 0, meaning instrumented, but no coverage.

	186 # The order of the test bits is given with key 'tests'. For now, these are

	187 # the executable names. We use a _list_ with two items instead of a tuple to

	188 # ease merging by allowing mutation of the second item.

	189 return {

	190 'version': 1,

	191 'tests': sorted(map(os.path.basename, exe_list)),

	192 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result},
	tandrii(chromium) 2016/03/07 15:10:22 suggestion: s/k/f and s/x/l (at least that' suggestion: s/k/f and s/x/l (at least that's a bit easier to read since f = filename, l = line number.. Michael Achenbach 2016/03/07 16:07:21 Done. Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > suggestion: s/k/f and s/x/l (at least that's a bit easier to read since f > = filename, l = line number.. Done.
	193 }

	194

	195

	196 def write_instrumented(options):

	197 """Implements the 'all' action of this tool."""

	198 exe_list = list(executables())

	199 pool = Pool(CPUS)

	200 try:

	201 results = pool.imap_unordered(get_instrumented_lines, exe_list)

	202 finally:

	203 pool.close()

	204

	205 # Merge multiprocessing results and prepare output data.

	206 data = merge_instrumented_line_results(results)

	207

	208 # Write json output.

	209 with open(options.json_output, 'w') as f:

	210 json.dump(data, f, sort_keys=True)

	211

	212

	213 def get_covered_lines(args):

	214 """Return the covered lines of an executable.

	215

	216 Called trough multiprocessing pool. The args are expected to unpack to:

	217 cov_dir: Folder with sancov files merged by sancov_merger.py.

	218 executable: The executable that was called to produce the given coverage

	219 data.

	220 sancov_file: The merged sancov file with coverage data.

	221

	222 Returns: A tuple of post-processed llvm output as returned by

	223 process_symbolizer_output and the executable name.

	224 """

	225 cov_dir, executable, sancov_file = args

	226

	227 # Let the sancov tool print the covered PCs and pipe them through the llvm

	228 # symbolizer.

	229 process = subprocess.Popen(

	230 '%s print %s 2> /dev/null \| '

	231 '%s --obj %s -functions=none' %

	232 (SANCOV_TOOL,

	233 os.path.join(cov_dir, sancov_file),

	234 SYMBOLIZER,

	235 os.path.join(BUILD_DIR, executable)),

	236 stdout=subprocess.PIPE,

	237 stderr=subprocess.PIPE,

	238 stdin=subprocess.PIPE,

	239 cwd=BASE_DIR,

	240 shell=True,

	241 )

	242 output, _ = process.communicate()

	243 return process_symbolizer_output(output), executable

	244

	245

	246 def merge_covered_line_results(data, results):

	247 """Merge multiprocessing results for covered lines.

	248

	249 The data is mutated, the results are merged into it in place.

	250

	251 Args:

	252 data: Existing coverage data from json file containing all instrumented

	253 lines.

	254 results: List of results as returned by get_covered_lines.

	255 """

	256

	257 # List of executables and mapping to the test bit mask.

	258 exe_list = data['tests']

	259 assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
	tandrii(chromium) 2016/03/07 15:10:22 why 32? ftr, python works just fine even with 67 why 32? ftr, python works just fine even with 67 bit integers: Show quoted text >>> bin((1<<66) \| 0x15) '0b1000000000000000000000000000000000000000000000000000000000000010101' Michael Achenbach 2016/03/07 16:07:21 ok - but not sure if there's a limit on the JS sid Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > why 32? > > ftr, python works just fine even with 67 bit integers: > >>> bin((1<<66) \| 0x15) > '0b1000000000000000000000000000000000000000000000000000000000000010101' ok - but not sure if there's a limit on the JS side for parsing a number of that size from JSON. So, I'd keep it like that for now.
	260 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}

	261

	262 def merge_lines(old_lines, new_lines, mask):

	263 """Merge the coverage data of a list of lines.

	264

	265 Args:

	266 old_lines: Lines as list of pairs with line number and test bit mask.

	267 The new lines will be merged into the list in place.

	268 new_lines: List of new (covered) lines (sorted).

	269 mask: The bit to be set for covered lines. The bit index is the test

	270 index of the executable that covered the line.

	271 """

	272 i = 0
	tandrii(chromium) 2016/03/07 15:10:22 small suggestion: I personally avoid using index a small suggestion: I personally avoid using index and do this: while old_lines[0][0] < l: old_lines = old_lines[1:] but maybe that's too much C++ with ranges. Michael Achenbach 2016/03/07 16:07:21 ok - somewhat more readable, but also more expensi Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > small suggestion: I personally avoid using index and do this: > > while old_lines[0][0] < l: > old_lines = old_lines[1:] > > but maybe that's too much C++ with ranges. ok - somewhat more readable, but also more expensive. I'd keep the old version for speed here, since this is a hot function.
	273 # Iterate over old and new lines, both are sorted.

	274 for l in new_lines:

	275 while old_lines[i][0] < l:

	276 # Forward instrumented lines not present in this coverage data.

	277 i += 1

	278 # TODO: Add more context to the assert message.

	279 assert i < len(old_lines), 'Covered line %d not in input file.' % l

	280 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l

	281

	282 # Add coverage information to the line.

	283 old_lines[i][1] \|= mask

	284

	285 def merge_files(data, result):

	286 """Merge result into data.

	287

	288 The data is mutated in place.

	289

	290 Args:

	291 data: Merged coverage data from the previous reduce step.

	292 result: New result to be merged in. The type is as returned by

	293 get_covered_lines.

	294 """

	295 file_map, executable = result

	296 files = data['files']

	297 for file_name, lines in file_map.iteritems():

	298 merge_lines(files[file_name], lines, test_bit_masks[executable])

	299 return data

	300

	301 reduce(merge_files, results, data)

	302

	303

	304 def merge(options):

	305 """Implements the 'merge' action of this tool."""

	306

	307 # Check if folder with coverage output exists.

	308 assert (os.path.exists(options.coverage_dir) and

	309 os.path.isdir(options.coverage_dir))

	310

	311 # Inputs for multiprocessing. List of tuples of:

	312 # Coverage dir, executable name, sancov file name.

	313 inputs = []

	314 for f in os.listdir(options.coverage_dir):

	315 match = SANCOV_FILE_RE.match(f)

	316 if match:

	317 inputs.append((options.coverage_dir, match.group(1), f))

	318

	319 # Post-process covered lines in parallel.

	320 pool = Pool(CPUS)

	321 try:

	322 results = pool.imap_unordered(get_covered_lines, inputs)

	323 finally:

	324 pool.close()

	325

	326 # Load existing json data file for merging the results.

	327 with open(options.json_input, 'r') as f:

	328 data = json.load(f)

	329

	330 # Merge muliprocessing results. Mutates data.

	331 merge_covered_line_results(data, results)

	332

	333 # Write merged results to file.

	334 with open(options.json_output, 'w') as f:

	335 json.dump(data, f, sort_keys=True)

	336

	337

	338 def main():

	339 parser = argparse.ArgumentParser()

	340 parser.add_argument('--coverage-dir',

	341 help='Path to the sancov output files.')

	342 parser.add_argument('--json-input',

	343 help='Path to an existing json file with coverage data.')

	344 parser.add_argument('--json-output', required=True,

	345 help='Path to a file to write json output to.')

	346 parser.add_argument('action',
	tandrii(chromium) 2016/03/07 15:10:22 add arg choices = ('all', 'merge') https://docs. add arg choices = ('all', 'merge') https://docs.python.org/3/library/argparse.html#choices Michael Achenbach 2016/03/07 16:07:21 Done. Show quoted text On 2016/03/07 15:10:22, tandrii(chromium) wrote: > add arg > choices = ('all', 'merge') > > https://docs.python.org/3/library/argparse.html#choices Done.
	347 help='Action to perform. One of all\|merge.')

	348

	349 options = parser.parse_args()

	350 if options.action.lower() == 'all':

	351 write_instrumented(options)

	352 elif options.action.lower() == 'merge':

	353 if not options.coverage_dir:

	354 print '--coverage-dir is required'

	355 return 1

	356 if not options.json_input:

	357 print '--json-input is required'

	358 return 1

	359 merge(options)

	360 else:

	361 print 'Unsupported action.'

	362 return 1

	363 return 0

	364

	365

	366 if __name__ == '__main__':

	367 sys.exit(main())

OLD	NEW

« no previous file with comments | « tools/run-tests.py ('k') | tools/sanitizers/sancov_formatter_test.py » ('j') | tools/sanitizers/sancov_formatter_test.py » ('J')