tools/sanitizers/sancov_formatter.py - Issue 1737263003: [coverage] Enable sanitizer coverage.

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Logging + exe blacklist Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Script to transform and merge sancov files into human readable json-format.

	7

	8 The script supports two actions:

	9 all: Writes a json file with all instrumented lines of all executables.

	10 merge: Merges sancov files with coverage output into an existing json file.

	11

	12 The json data is structured as follows:

	13 {

	14 "version": 1,

	15 "tests": ["executable1", "executable2", ...],

	16 "files": {

	17 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],

	18 "file2": [...],

	19 ...

	20 }

	21 }

	22

	23 The executables are sorted and determine the test bit mask. Their index+1 is

	24 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by

	25 executable1 and executable3 will have bit_mask == 5 == 0b101. The number of

	26 tests is restricted to 52 in version 1, to allow javascript JSON parsing of

	27 the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.

	28

	29 The line-number-bit_mask pairs are sorted by line number and don't contain

	30 duplicates.

	31

	32 The sancov tool is expected to be in the llvm compiler-rt third-party

	33 directory. It's not checked out by default and must be added as a custom deps:

	34 'v8/third_party/llvm/projects/compiler-rt':

	35 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'

	36 """

	37

	38 import argparse

	39 import json

	40 import logging

	41 import os

	42 import re

	43 import subprocess

	44 import sys

	45

	46 from multiprocessing import Pool, cpu_count

	47

	48

	49 logging.basicConfig(level=logging.INFO)

	50

	51 # Files to exclude from coverage. Dropping their data early adds more speed.

	52 # The contained cc files are already excluded from instrumentation, but inlined

	53 # data is referenced through v8's object files.

	54 EXCLUSIONS = [

	55 'buildtools',

	56 'src/third_party',

	57 'third_party',

	58 'test',

	59 'testing',

	60 ]

	61

	62 # Executables found in the build output for which no coverage is generated.

	63 # Exclude them from the coverage data file.

	64 EXE_BLACKLIST = [

	65 'generate-bytecode-expectations',

	66 'hello-world',

	67 'mksnapshot',

	68 'parser-shell',

	69 'process',

	70 'shell',

	71 ]

	72

	73 # V8 checkout directory.

	74 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(

	75 os.path.abspath(__file__))))

	76

	77 # Executable location. TODO(machenbach): Only release is supported for now.

	78 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')

	79

	80 # Path prefix added by the llvm symbolizer including trailing slash.

	81 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')

	82

	83 # The sancov tool location.

	84 SANCOV_TOOL = os.path.join(

	85 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',

	86 'lib', 'sanitizer_common', 'scripts', 'sancov.py')

	87

	88 # Simple script to sanitize the PCs from objdump.

	89 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')

	90

	91 # The llvm symbolizer location.

	92 SYMBOLIZER = os.path.join(

	93 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',

	94 'llvm-symbolizer')

	95

	96 # Number of cpus.

	97 CPUS = cpu_count()

	98

	99 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the

	100 # executable name in group 1.

	101 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')

	102

	103

	104 def executables():

	105 """Iterates over executable files in the build directory."""

	106 for f in os.listdir(BUILD_DIR):

	107 file_path = os.path.join(BUILD_DIR, f)

	108 if (os.path.isfile(file_path) and

	109 os.access(file_path, os.X_OK) and

	110 f not in EXE_BLACKLIST):

	111 yield file_path

	112

	113

	114 def process_symbolizer_output(output):

	115 """Post-process llvm symbolizer output.

	116

	117 Excludes files outside the v8 checkout or given in exclusion list above

	118 from further processing. Drops the character index in each line.

	119

	120 Returns: A mapping of file names to lists of line numbers. The file names

	121 have relative paths to the v8 base directory. The lists of line

	122 numbers don't contain duplicate lines and are sorted.

	123 """

	124 # Drop path prefix when iterating lines. The path is redundant and takes

	125 # too much space. Drop files outside that path, e.g. generated files in

	126 # the build dir and absolute paths to c++ library headers.

	127 def iter_lines():

	128 for line in output.strip().splitlines():

	129 if line.startswith(OUTPUT_PATH_PREFIX):

	130 yield line[len(OUTPUT_PATH_PREFIX):]

	131

	132 # Map file names to sets of instrumented line numbers.

	133 file_map = {}

	134 for line in iter_lines():

	135 # Drop character number, we only care for line numbers. Each line has the

	136 # form: <file name>:<line number>:<character number>.

	137 file_name, number, _ = line.split(':')

	138 file_map.setdefault(file_name, set([])).add(int(number))

	139

	140 # Remove exclusion patterns from file map. It's cheaper to do it after the

	141 # mapping, as there are few excluded files and we don't want to do this

	142 # check for numerous lines in ordinary files.

	143 def keep(file_name):

	144 for e in EXCLUSIONS:

	145 if file_name.startswith(e):

	146 return False

	147 return True

	148

	149 # Return in serializable form and filter.

	150 return {k: sorted(file_map[k]) for k in file_map if keep(k)}

	151

	152

	153 def get_instrumented_lines(executable):

	154 """Return the instrumented lines of an executable.

	155

	156 Called trough multiprocessing pool.

	157

	158 Returns: Post-processed llvm output as returned by process_symbolizer_output.

	159 """

	160 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex

	161 # numbers. The results are piped into the llvm symbolizer, which outputs for

	162 # each PC: <file name with abs path>:<line number>:<character number>.

	163 # We don't call the sancov tool to get more speed.

	164 process = subprocess.Popen(

	165 'objdump -d %s \| '

	166 'grep \'^\s\+[0-9a-f]\+:.*\scall$q\\|$\s\+[0-9a-f]\+ '

	167 '<__sanitizer_cov$_with_check\\|$$@plt\\|$>\' \| '

	168 'grep \'^\s\+[0-9a-f]\+\' -o \| '

	169 '%s \| '

	170 '%s --obj %s -functions=none' %

	171 (executable, SANITIZE_PCS, SYMBOLIZER, executable),

	172 stdout=subprocess.PIPE,

	173 stderr=subprocess.PIPE,

	174 stdin=subprocess.PIPE,

	175 cwd=BASE_DIR,

	176 shell=True,

	177 )

	178 output, _ = process.communicate()

	179 assert process.returncode == 0

	180 return process_symbolizer_output(output)

	181

	182

	183 def merge_instrumented_line_results(exe_list, results):

	184 """Merge multiprocessing results for all instrumented lines.

	185

	186 Args:

	187 exe_list: List of all executable names with absolute paths.

	188 results: List of results as returned by get_instrumented_lines.

	189

	190 Returns: Dict to be used as json data as specified on the top of this page.

	191 The dictionary contains all instrumented lines of all files

	192 referenced by all executables.

	193 """

	194 def merge_files(x, y):

	195 for file_name, lines in y.iteritems():

	196 x.setdefault(file_name, set([])).update(lines)

	197 return x

	198 result = reduce(merge_files, results, {})

	199

	200 # Return data as file->lines mapping. The lines are saved as lists

	201 # with (line number, test bits (as int)). The test bits are initialized with

	202 # 0, meaning instrumented, but no coverage.

	203 # The order of the test bits is given with key 'tests'. For now, these are

	204 # the executable names. We use a _list_ with two items instead of a tuple to

	205 # ease merging by allowing mutation of the second item.

	206 return {

	207 'version': 1,

	208 'tests': sorted(map(os.path.basename, exe_list)),

	209 'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},

	210 }

	211

	212

	213 def write_instrumented(options):

	214 """Implements the 'all' action of this tool."""

	215 exe_list = list(executables())

	216 logging.info('Reading instrumented lines from %d executables.' %

	217 len(exe_list))

	218 pool = Pool(CPUS)

	219 try:

	220 results = pool.imap_unordered(get_instrumented_lines, exe_list)

	221 finally:

	222 pool.close()

	223

	224 # Merge multiprocessing results and prepare output data.

	225 data = merge_instrumented_line_results(exe_list, results)

	226

	227 logging.info('Read data from %d executables, which covers %d files.' %

	228 (len(data['tests']), len(data['files'])))

	229 logging.info('Writing results to %s' % options.json_output)

	230

	231 # Write json output.

	232 with open(options.json_output, 'w') as f:

	233 json.dump(data, f, sort_keys=True)

	234

	235

	236 def get_covered_lines(args):

	237 """Return the covered lines of an executable.

	238

	239 Called trough multiprocessing pool. The args are expected to unpack to:

	240 cov_dir: Folder with sancov files merged by sancov_merger.py.

	241 executable: The executable that was called to produce the given coverage

	242 data.

	243 sancov_file: The merged sancov file with coverage data.

	244

	245 Returns: A tuple of post-processed llvm output as returned by

	246 process_symbolizer_output and the executable name.

	247 """

	248 cov_dir, executable, sancov_file = args

	249

	250 # Let the sancov tool print the covered PCs and pipe them through the llvm

	251 # symbolizer.

	252 process = subprocess.Popen(

	253 '%s print %s 2> /dev/null \| '

	254 '%s --obj %s -functions=none' %

	255 (SANCOV_TOOL,

	256 os.path.join(cov_dir, sancov_file),

	257 SYMBOLIZER,

	258 os.path.join(BUILD_DIR, executable)),

	259 stdout=subprocess.PIPE,

	260 stderr=subprocess.PIPE,

	261 stdin=subprocess.PIPE,

	262 cwd=BASE_DIR,

	263 shell=True,

	264 )

	265 output, _ = process.communicate()

	266 assert process.returncode == 0

	267 return process_symbolizer_output(output), executable

	268

	269

	270 def merge_covered_line_results(data, results):

	271 """Merge multiprocessing results for covered lines.

	272

	273 The data is mutated, the results are merged into it in place.

	274

	275 Args:

	276 data: Existing coverage data from json file containing all instrumented

	277 lines.

	278 results: List of results as returned by get_covered_lines.

	279 """

	280

	281 # List of executables and mapping to the test bit mask. The number of

	282 # tests is restricted to 52, to allow javascript JSON parsing of

	283 # the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.

	284 exe_list = data['tests']

	285 assert len(exe_list) <= 52, 'Max 52 different tests are supported.'

	286 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}

	287

	288 def merge_lines(old_lines, new_lines, mask):

	289 """Merge the coverage data of a list of lines.

	290

	291 Args:

	292 old_lines: Lines as list of pairs with line number and test bit mask.

	293 The new lines will be merged into the list in place.

	294 new_lines: List of new (covered) lines (sorted).

	295 mask: The bit to be set for covered lines. The bit index is the test

	296 index of the executable that covered the line.

	297 """

	298 i = 0

	299 # Iterate over old and new lines, both are sorted.

	300 for l in new_lines:

	301 while old_lines[i][0] < l:

	302 # Forward instrumented lines not present in this coverage data.

	303 i += 1

	304 # TODO: Add more context to the assert message.

	305 assert i < len(old_lines), 'Covered line %d not in input file.' % l

	306 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l

	307

	308 # Add coverage information to the line.

	309 old_lines[i][1] \|= mask

	310

	311 def merge_files(data, result):

	312 """Merge result into data.

	313

	314 The data is mutated in place.

	315

	316 Args:

	317 data: Merged coverage data from the previous reduce step.

	318 result: New result to be merged in. The type is as returned by

	319 get_covered_lines.

	320 """

	321 file_map, executable = result

	322 files = data['files']

	323 for file_name, lines in file_map.iteritems():

	324 merge_lines(files[file_name], lines, test_bit_masks[executable])

	325 return data

	326

	327 reduce(merge_files, results, data)

	328

	329

	330 def merge(options):

	331 """Implements the 'merge' action of this tool."""

	332

	333 # Check if folder with coverage output exists.

	334 assert (os.path.exists(options.coverage_dir) and

	335 os.path.isdir(options.coverage_dir))

	336

	337 # Inputs for multiprocessing. List of tuples of:

	338 # Coverage dir, executable name, sancov file name.

	339 inputs = []

	340 for f in os.listdir(options.coverage_dir):

	341 match = SANCOV_FILE_RE.match(f)

	342 if match:

	343 inputs.append((options.coverage_dir, match.group(1), f))

	344

	345 logging.info('Merging %d sancov files into %s' %

	346 (len(inputs), options.json_input))

	347

	348 # Post-process covered lines in parallel.

	349 pool = Pool(CPUS)

	350 try:

	351 results = pool.imap_unordered(get_covered_lines, inputs)

	352 finally:

	353 pool.close()

	354

	355 # Load existing json data file for merging the results.

	356 with open(options.json_input, 'r') as f:

	357 data = json.load(f)

	358

	359 # Merge muliprocessing results. Mutates data.

	360 merge_covered_line_results(data, results)

	361

	362 logging.info('Merged data from %d executables, which covers %d files.' %

	363 (len(data['tests']), len(data['files'])))

	364 logging.info('Writing results to %s' % options.json_output)

	365

	366 # Write merged results to file.

	367 with open(options.json_output, 'w') as f:

	368 json.dump(data, f, sort_keys=True)

	369

	370

	371 def main():

	372 parser = argparse.ArgumentParser()

	373 parser.add_argument('--coverage-dir',

	374 help='Path to the sancov output files.')

	375 parser.add_argument('--json-input',

	376 help='Path to an existing json file with coverage data.')

	377 parser.add_argument('--json-output', required=True,

	378 help='Path to a file to write json output to.')

	379 parser.add_argument('action', choices=['all', 'merge'],

	380 help='Action to perform.')

	381

	382 options = parser.parse_args()

	383 if options.action.lower() == 'all':

	384 write_instrumented(options)

	385 elif options.action.lower() == 'merge':

	386 if not options.coverage_dir:

	387 print '--coverage-dir is required'

	388 return 1

	389 if not options.json_input:

	390 print '--json-input is required'

	391 return 1

	392 merge(options)

	393 return 0

	394

	395

	396 if __name__ == '__main__':

	397 sys.exit(main())

OLD	NEW

« no previous file with comments | « tools/run-tests.py ('k') | tools/sanitizers/sancov_formatter_test.py » ('j') | no next file with comments »