tools/sanitizers/sancov_formatter.py - Issue 1737263003: [coverage] Enable sanitizer coverage.

Side by Side Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Review Andrii Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 the V8 project authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Script to transform and merge sancov files into human readable json-format.

	7

	8 The script supports two actions:

	9 all: Writes a json file with all instrumented lines of all executables.

	10 merge: Merges sancov files with coverage output into an existing json file.

	11

	12 The json data is structured as follows:

	13 {

	14 "version": 1,

	15 "tests": ["executable1", "executable2", ...],

	16 "files": {

	17 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],

	18 "file2": [...],

	19 ...

	20 }

	21 }

	22

	23 The executables are sorted and determine the test bit mask. Their index+1 is

	24 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by

	25 executable1 and executable3 will have bit_mask == 5.

	26

	27 The line-number-bit_mask pairs are sorted by line number and don't contain

	28 duplicates.

	29

	30 The sancov tool is expected to be in the llvm compiler-rt third-party

	31 directory. It's not checked out by default and must be added as a custom deps:

	32 'v8/third_party/llvm/projects/compiler-rt':

	33 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'

	34 """

	35

	36 import argparse

	37 import json

	38 import os

	39 import re

	40 import subprocess

	41 import sys

	42

	43 from multiprocessing import Pool, cpu_count

	44

	45

	46 # Files to exclude from coverage. Dropping their data early adds more speed.

	47 # The contained cc files are already excluded from instrumentation, but inlined

	48 # data is referenced through v8's object files.

	49 EXCLUSIONS = [

	50 'buildtools',

	51 'src/third_party',

	52 'third_party',

	53 'test',

	54 'testing',

	55 ]

	56

	57 # V8 checkout directory.

	58 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(

	59 os.path.abspath(__file__))))

	60

	61 # Executable location. TODO(machenbach): Only release is supported for now.

	62 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')

	63

	64 # Path prefix added by the llvm symbolizer including trailing slash.

	65 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')

	66

	67 # The sancov tool location.

	68 SANCOV_TOOL = os.path.join(

	69 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',

	70 'lib', 'sanitizer_common', 'scripts', 'sancov.py')

	71

	72 # Simple script to sanitize the PCs from objdump.

	73 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')

	74

	75 # The llvm symbolizer location.

	76 SYMBOLIZER = os.path.join(

	77 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',

	78 'llvm-symbolizer')

	79

	80 # Number of cpus.

	81 CPUS = cpu_count()

	82

	83 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the

	84 # executable name in group 1.

	85 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')

	86

	87

	88 def executables():

	89 """Iterates over executable files in the build directory."""

	90 for f in os.listdir(BUILD_DIR):

	91 file_path = os.path.join(BUILD_DIR, f)

	92 if os.path.isfile(file_path) and os.access(file_path, os.X_OK):

	93 yield file_path

	94

	95

	96 def process_symbolizer_output(output):

	97 """Post-process llvm symbolizer output.

	98

	99 Excludes files outside the v8 checkout or given in exclusion list above

	100 from further processing. Drops the character index in each line.

	101

	102 Returns: A mapping of file names to lists of line numbers. The file names

	103 have relative paths to the v8 base directory. The lists of line

	104 numbers don't contain duplicate lines and are sorted.

	105 """

	106 # Drop path prefix when iterating lines. The path is redundant and takes

	107 # too much space. Drop files outside that path, e.g. generated files in

	108 # the build dir and absolute paths to c++ library headers.

	109 def iter_lines():

	110 for line in output.strip().splitlines():

	111 if line.startswith(OUTPUT_PATH_PREFIX):

	112 yield line[len(OUTPUT_PATH_PREFIX):]

	113

	114 # Map file names to sets of instrumented line numbers.

	115 file_map = {}

	116 for line in iter_lines():

	117 # Drop character number, we only care for line numbers. Each line has the

	118 # form: <file name>:<line number>:<character number>.

	119 file_name, number, _ = line.split(':')

	120 file_map.setdefault(file_name, set([])).add(int(number))

	121

	122 # Remove exclusion patterns from file map. It's cheaper to do it after the

	123 # mapping, as there are few excluded files and we don't want to do this

	124 # check for numerous lines in ordinary files.

	125 def keep(file_name):

	126 for e in EXCLUSIONS:

	127 if file_name.startswith(e):

	128 return False

	129 return True

	130

	131 # Return in serializable form and filter.

	132 return {k: sorted(file_map[k]) for k in file_map if keep(k)}

	133

	134

	135 def get_instrumented_lines(executable):

	136 """Return the instrumented lines of an executable.

	137

	138 Called trough multiprocessing pool.

	139

	140 Returns: Post-processed llvm output as returned by process_symbolizer_output.

	141 """

	142 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex

	143 # numbers. The results are piped into the llvm symbolizer, which outputs for

	144 # each PC: <file name with abs path>:<line number>:<character number>.

	145 # We don't call the sancov tool to get more speed.

	146 process = subprocess.Popen(

	147 'objdump -d %s \| '

	148 'grep \'^\s\+[0-9a-f]\+:.*\scall$q\\|$\s\+[0-9a-f]\+ '

	149 '<__sanitizer_cov$_with_check\\|$$@plt\\|$>\' \| '

	150 'grep \'^\s\+[0-9a-f]\+\' -o \| '

	151 '%s \| '

	152 '%s --obj %s -functions=none' %

	153 (executable, SANITIZE_PCS, SYMBOLIZER, executable),

	154 stdout=subprocess.PIPE,

	155 stderr=subprocess.PIPE,

	156 stdin=subprocess.PIPE,

	157 cwd=BASE_DIR,

	158 shell=True,

	159 )

	160 output, _ = process.communicate()

	161 return process_symbolizer_output(output)

	162

	163

	164 def merge_instrumented_line_results(exe_list, results):

	165 """Merge multiprocessing results for all instrumented lines.

	166

	167 Args:

	168 exe_list: List of all executable names with absolute paths.

	169 results: List of results as returned by get_instrumented_lines.

	170

	171 Returns: Dict to be used as json data as specified on the top of this page.

	172 The dictionary contains all instrumented lines of all files

	173 referenced by all executables.

	174 """

	175 def merge_files(x, y):

	176 for file_name, lines in y.iteritems():

	177 x.setdefault(file_name, set([])).update(lines)

	178 return x

	179 result = reduce(merge_files, results, {})

	180

	181 # Return data as file->lines mapping. The lines are saved as lists

	182 # with (line number, test bits (as int)). The test bits are initialized with

	183 # 0, meaning instrumented, but no coverage.

	184 # The order of the test bits is given with key 'tests'. For now, these are

	185 # the executable names. We use a _list_ with two items instead of a tuple to

	186 # ease merging by allowing mutation of the second item.

	187 return {

	188 'version': 1,

	189 'tests': sorted(map(os.path.basename, exe_list)),

	190 'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},

	191 }

	192

	193

	194 def write_instrumented(options):

	195 """Implements the 'all' action of this tool."""

	196 exe_list = list(executables())

	197 pool = Pool(CPUS)

	198 try:

	199 results = pool.imap_unordered(get_instrumented_lines, exe_list)

	200 finally:

	201 pool.close()

	202

	203 # Merge multiprocessing results and prepare output data.

	204 data = merge_instrumented_line_results(results)

	205

	206 # Write json output.

	207 with open(options.json_output, 'w') as f:

	208 json.dump(data, f, sort_keys=True)

	209

	210

	211 def get_covered_lines(args):

	212 """Return the covered lines of an executable.

	213

	214 Called trough multiprocessing pool. The args are expected to unpack to:

	215 cov_dir: Folder with sancov files merged by sancov_merger.py.

	216 executable: The executable that was called to produce the given coverage

	217 data.

	218 sancov_file: The merged sancov file with coverage data.

	219

	220 Returns: A tuple of post-processed llvm output as returned by

	221 process_symbolizer_output and the executable name.

	222 """

	223 cov_dir, executable, sancov_file = args

	224

	225 # Let the sancov tool print the covered PCs and pipe them through the llvm

	226 # symbolizer.

	227 process = subprocess.Popen(

	228 '%s print %s 2> /dev/null \| '

	229 '%s --obj %s -functions=none' %

	230 (SANCOV_TOOL,

	231 os.path.join(cov_dir, sancov_file),

	232 SYMBOLIZER,

	233 os.path.join(BUILD_DIR, executable)),

	234 stdout=subprocess.PIPE,

	235 stderr=subprocess.PIPE,

	236 stdin=subprocess.PIPE,

	237 cwd=BASE_DIR,

	238 shell=True,

	239 )

	240 output, _ = process.communicate()

	241 return process_symbolizer_output(output), executable

	242

	243

	244 def merge_covered_line_results(data, results):

	245 """Merge multiprocessing results for covered lines.

	246

	247 The data is mutated, the results are merged into it in place.

	248

	249 Args:

	250 data: Existing coverage data from json file containing all instrumented

	251 lines.

	252 results: List of results as returned by get_covered_lines.

	253 """

	254

	255 # List of executables and mapping to the test bit mask.

	256 exe_list = data['tests']

	257 assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
	kjellander_chromium 2016/03/08 04:48:33 Maybe this limitation should be documented more vi Maybe this limitation should be documented more visibly? And why even have it? Many tests = slow seems like a fair consequence. Michael Achenbach 2016/03/08 10:05:46 I'll add a comment here and in the top-level doc. Show quoted text On 2016/03/08 04:48:33, kjellander (chromium) wrote: > Maybe this limitation should be documented more visibly? And why even have it? > Many tests = slow seems like a fair consequence. I'll add a comment here and in the top-level doc. I updated the number to 52 after reading the JS specs and trying around. The number of tests is restricted to 52, to allow javascript JSON parsing of the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1, because JS represents bigger numbers as doubles. Beyond (1 << 53) - 1, precision is lost. In order to support more tests (or better test contexts) in a future version, we need a different encoding, e.g. hex strings or something.
	258 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}

	259

	260 def merge_lines(old_lines, new_lines, mask):

	261 """Merge the coverage data of a list of lines.

	262

	263 Args:

	264 old_lines: Lines as list of pairs with line number and test bit mask.

	265 The new lines will be merged into the list in place.

	266 new_lines: List of new (covered) lines (sorted).

	267 mask: The bit to be set for covered lines. The bit index is the test

	268 index of the executable that covered the line.

	269 """

	270 i = 0

	271 # Iterate over old and new lines, both are sorted.

	272 for l in new_lines:

	273 while old_lines[i][0] < l:

	274 # Forward instrumented lines not present in this coverage data.

	275 i += 1

	276 # TODO: Add more context to the assert message.

	277 assert i < len(old_lines), 'Covered line %d not in input file.' % l

	278 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l

	279

	280 # Add coverage information to the line.

	281 old_lines[i][1] \|= mask

	282

	283 def merge_files(data, result):

	284 """Merge result into data.

	285

	286 The data is mutated in place.

	287

	288 Args:

	289 data: Merged coverage data from the previous reduce step.

	290 result: New result to be merged in. The type is as returned by

	291 get_covered_lines.

	292 """

	293 file_map, executable = result

	294 files = data['files']

	295 for file_name, lines in file_map.iteritems():

	296 merge_lines(files[file_name], lines, test_bit_masks[executable])

	297 return data

	298

	299 reduce(merge_files, results, data)

	300

	301

	302 def merge(options):

	303 """Implements the 'merge' action of this tool."""

	304

	305 # Check if folder with coverage output exists.

	306 assert (os.path.exists(options.coverage_dir) and

	307 os.path.isdir(options.coverage_dir))

	308

	309 # Inputs for multiprocessing. List of tuples of:

	310 # Coverage dir, executable name, sancov file name.

	311 inputs = []

	312 for f in os.listdir(options.coverage_dir):

	313 match = SANCOV_FILE_RE.match(f)

	314 if match:

	315 inputs.append((options.coverage_dir, match.group(1), f))

	316

	317 # Post-process covered lines in parallel.

	318 pool = Pool(CPUS)

	319 try:

	320 results = pool.imap_unordered(get_covered_lines, inputs)

	321 finally:

	322 pool.close()

	323

	324 # Load existing json data file for merging the results.

	325 with open(options.json_input, 'r') as f:

	326 data = json.load(f)

	327

	328 # Merge muliprocessing results. Mutates data.

	329 merge_covered_line_results(data, results)

	330

	331 # Write merged results to file.

	332 with open(options.json_output, 'w') as f:

	333 json.dump(data, f, sort_keys=True)

	334

	335

	336 def main():

	337 parser = argparse.ArgumentParser()

	338 parser.add_argument('--coverage-dir',

	339 help='Path to the sancov output files.')

	340 parser.add_argument('--json-input',

	341 help='Path to an existing json file with coverage data.')

	342 parser.add_argument('--json-output', required=True,

	343 help='Path to a file to write json output to.')

	344 parser.add_argument('action', choices=['all', 'merge'],

	345 help='Action to perform.')

	346

	347 options = parser.parse_args()

	348 if options.action.lower() == 'all':

	349 write_instrumented(options)

	350 elif options.action.lower() == 'merge':

	351 if not options.coverage_dir:

	352 print '--coverage-dir is required'

	353 return 1

	354 if not options.json_input:

	355 print '--json-input is required'

	356 return 1

	357 merge(options)

	358 return 0

	359

	360

	361 if __name__ == '__main__':

	362 sys.exit(main())

OLD	NEW

« build/coverage_wrapper.py ('K') | « tools/run-tests.py ('k') | tools/sanitizers/sancov_formatter_test.py » ('j') | tools/sanitizers/sancov_merger.py » ('J')