tools/sanitizers/sancov_formatter.py - Issue 1737263003: [coverage] Enable sanitizer coverage.

Unified Diff: tools/sanitizers/sancov_formatter.py

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Logging + exe blacklist Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/sanitizers/sancov_formatter.py

diff --git a/tools/sanitizers/sancov_formatter.py b/tools/sanitizers/sancov_formatter.py

new file mode 100755

index 0000000000000000000000000000000000000000..35827de82668694e84053bf3128ff7a7e7105c02

--- /dev/null

+++ b/tools/sanitizers/sancov_formatter.py

@@ -0,0 +1,397 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Script to transform and merge sancov files into human readable json-format.

+The script supports two actions:

+all: Writes a json file with all instrumented lines of all executables.

+merge: Merges sancov files with coverage output into an existing json file.

+The json data is structured as follows:

+ "version": 1,

+ "tests": ["executable1", "executable2", ...],

+ "files": {

+ "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],

+ "file2": [...],

+ ...

+ }

+The executables are sorted and determine the test bit mask. Their index+1 is

+the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by

+executable1 and executable3 will have bit_mask == 5 == 0b101. The number of

+tests is restricted to 52 in version 1, to allow javascript JSON parsing of

+the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.

+The line-number-bit_mask pairs are sorted by line number and don't contain

+duplicates.

+The sancov tool is expected to be in the llvm compiler-rt third-party

+directory. It's not checked out by default and must be added as a custom deps:

+'v8/third_party/llvm/projects/compiler-rt':

+ 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'

+"""

+import argparse

+import json

+import logging

+import os

+import re

+import subprocess

+import sys

+from multiprocessing import Pool, cpu_count

+logging.basicConfig(level=logging.INFO)

+# Files to exclude from coverage. Dropping their data early adds more speed.

+# The contained cc files are already excluded from instrumentation, but inlined

+# data is referenced through v8's object files.

+EXCLUSIONS = [

+ 'buildtools',

+ 'src/third_party',

+ 'third_party',

+ 'test',

+ 'testing',

+# Executables found in the build output for which no coverage is generated.

+# Exclude them from the coverage data file.

+EXE_BLACKLIST = [

+ 'generate-bytecode-expectations',

+ 'hello-world',

+ 'mksnapshot',

+ 'parser-shell',

+ 'process',

+ 'shell',

+# V8 checkout directory.

+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(

+ os.path.abspath(__file__))))

+# Executable location. TODO(machenbach): Only release is supported for now.

+BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')

+# Path prefix added by the llvm symbolizer including trailing slash.

+OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')

+# The sancov tool location.

+SANCOV_TOOL = os.path.join(

+ BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',

+ 'lib', 'sanitizer_common', 'scripts', 'sancov.py')

+# Simple script to sanitize the PCs from objdump.

+SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')

+# The llvm symbolizer location.

+SYMBOLIZER = os.path.join(

+ BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',

+ 'llvm-symbolizer')

+# Number of cpus.

+CPUS = cpu_count()

+# Regexp to find sancov files as output by sancov_merger.py. Also grabs the

+# executable name in group 1.

+SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')

+def executables():

+ """Iterates over executable files in the build directory."""

+ for f in os.listdir(BUILD_DIR):

+ file_path = os.path.join(BUILD_DIR, f)

+ if (os.path.isfile(file_path) and

+ os.access(file_path, os.X_OK) and

+ f not in EXE_BLACKLIST):

+ yield file_path

+def process_symbolizer_output(output):

+ """Post-process llvm symbolizer output.

+ Excludes files outside the v8 checkout or given in exclusion list above

+ from further processing. Drops the character index in each line.

+ Returns: A mapping of file names to lists of line numbers. The file names

+ have relative paths to the v8 base directory. The lists of line

+ numbers don't contain duplicate lines and are sorted.

+ """

+ # Drop path prefix when iterating lines. The path is redundant and takes

+ # too much space. Drop files outside that path, e.g. generated files in

+ # the build dir and absolute paths to c++ library headers.

+ def iter_lines():

+ for line in output.strip().splitlines():

+ if line.startswith(OUTPUT_PATH_PREFIX):

+ yield line[len(OUTPUT_PATH_PREFIX):]

+ # Map file names to sets of instrumented line numbers.

+ file_map = {}

+ for line in iter_lines():

+ # Drop character number, we only care for line numbers. Each line has the

+ # form: <file name>:<line number>:<character number>.

+ file_name, number, _ = line.split(':')

+ file_map.setdefault(file_name, set([])).add(int(number))

+ # Remove exclusion patterns from file map. It's cheaper to do it after the

+ # mapping, as there are few excluded files and we don't want to do this

+ # check for numerous lines in ordinary files.

+ def keep(file_name):

+ for e in EXCLUSIONS:

+ if file_name.startswith(e):

+ return False

+ return True

+ # Return in serializable form and filter.

+ return {k: sorted(file_map[k]) for k in file_map if keep(k)}

+def get_instrumented_lines(executable):

+ """Return the instrumented lines of an executable.

+ Called trough multiprocessing pool.

+ Returns: Post-processed llvm output as returned by process_symbolizer_output.

+ """

+ # The first two pipes are from llvm's tool sancov.py with 0x added to the hex

+ # numbers. The results are piped into the llvm symbolizer, which outputs for

+ # each PC: <file name with abs path>:<line number>:<character number>.

+ # We don't call the sancov tool to get more speed.

+ process = subprocess.Popen(

+ 'objdump -d %s | '

+ 'grep \'^\s\+[0-9a-f]\+:.*\scall$q\|$\s\+[0-9a-f]\+ '

+ '<__sanitizer_cov$_with_check\|$$@plt\|$>\' | '

+ 'grep \'^\s\+[0-9a-f]\+\' -o | '

+ '%s | '

+ '%s --obj %s -functions=none' %

+ (executable, SANITIZE_PCS, SYMBOLIZER, executable),

+ stdout=subprocess.PIPE,

+ stderr=subprocess.PIPE,

+ stdin=subprocess.PIPE,

+ cwd=BASE_DIR,

+ shell=True,

+ )

+ output, _ = process.communicate()

+ assert process.returncode == 0

+ return process_symbolizer_output(output)

+def merge_instrumented_line_results(exe_list, results):

+ """Merge multiprocessing results for all instrumented lines.

+ Args:

+ exe_list: List of all executable names with absolute paths.

+ results: List of results as returned by get_instrumented_lines.

+ Returns: Dict to be used as json data as specified on the top of this page.

+ The dictionary contains all instrumented lines of all files

+ referenced by all executables.

+ """

+ def merge_files(x, y):

+ for file_name, lines in y.iteritems():

+ x.setdefault(file_name, set([])).update(lines)

+ return x

+ result = reduce(merge_files, results, {})

+ # Return data as file->lines mapping. The lines are saved as lists

+ # with (line number, test bits (as int)). The test bits are initialized with

+ # 0, meaning instrumented, but no coverage.

+ # The order of the test bits is given with key 'tests'. For now, these are

+ # the executable names. We use a _list_ with two items instead of a tuple to

+ # ease merging by allowing mutation of the second item.

+ return {

+ 'version': 1,

+ 'tests': sorted(map(os.path.basename, exe_list)),

+ 'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},

+ }

+def write_instrumented(options):

+ """Implements the 'all' action of this tool."""

+ exe_list = list(executables())

+ logging.info('Reading instrumented lines from %d executables.' %

+ len(exe_list))

+ pool = Pool(CPUS)

+ try:

+ results = pool.imap_unordered(get_instrumented_lines, exe_list)

+ finally:

+ pool.close()

+ # Merge multiprocessing results and prepare output data.

+ data = merge_instrumented_line_results(exe_list, results)

+ logging.info('Read data from %d executables, which covers %d files.' %

+ (len(data['tests']), len(data['files'])))

+ logging.info('Writing results to %s' % options.json_output)

+ # Write json output.

+ with open(options.json_output, 'w') as f:

+ json.dump(data, f, sort_keys=True)

+def get_covered_lines(args):

+ """Return the covered lines of an executable.

+ Called trough multiprocessing pool. The args are expected to unpack to:

+ cov_dir: Folder with sancov files merged by sancov_merger.py.

+ executable: The executable that was called to produce the given coverage

+ data.

+ sancov_file: The merged sancov file with coverage data.

+ Returns: A tuple of post-processed llvm output as returned by

+ process_symbolizer_output and the executable name.

+ """

+ cov_dir, executable, sancov_file = args

+ # Let the sancov tool print the covered PCs and pipe them through the llvm

+ # symbolizer.

+ process = subprocess.Popen(

+ '%s print %s 2> /dev/null | '

+ '%s --obj %s -functions=none' %

+ (SANCOV_TOOL,

+ os.path.join(cov_dir, sancov_file),

+ SYMBOLIZER,

+ os.path.join(BUILD_DIR, executable)),

+ stdout=subprocess.PIPE,

+ stderr=subprocess.PIPE,

+ stdin=subprocess.PIPE,

+ cwd=BASE_DIR,

+ shell=True,

+ )

+ output, _ = process.communicate()

+ assert process.returncode == 0

+ return process_symbolizer_output(output), executable

+def merge_covered_line_results(data, results):

+ """Merge multiprocessing results for covered lines.

+ The data is mutated, the results are merged into it in place.

+ Args:

+ data: Existing coverage data from json file containing all instrumented

+ lines.

+ results: List of results as returned by get_covered_lines.

+ """

+ # List of executables and mapping to the test bit mask. The number of

+ # tests is restricted to 52, to allow javascript JSON parsing of

+ # the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.

+ exe_list = data['tests']

+ assert len(exe_list) <= 52, 'Max 52 different tests are supported.'

+ test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}

+ def merge_lines(old_lines, new_lines, mask):

+ """Merge the coverage data of a list of lines.

+ Args:

+ old_lines: Lines as list of pairs with line number and test bit mask.

+ The new lines will be merged into the list in place.

+ new_lines: List of new (covered) lines (sorted).

+ mask: The bit to be set for covered lines. The bit index is the test

+ index of the executable that covered the line.

+ """

+ i = 0

+ # Iterate over old and new lines, both are sorted.

+ for l in new_lines:

+ while old_lines[i][0] < l:

+ # Forward instrumented lines not present in this coverage data.

+ i += 1

+ # TODO: Add more context to the assert message.

+ assert i < len(old_lines), 'Covered line %d not in input file.' % l

+ assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l

+ # Add coverage information to the line.

+ old_lines[i][1] |= mask

+ def merge_files(data, result):

+ """Merge result into data.

+ The data is mutated in place.

+ Args:

+ data: Merged coverage data from the previous reduce step.

+ result: New result to be merged in. The type is as returned by

+ get_covered_lines.

+ """

+ file_map, executable = result

+ files = data['files']

+ for file_name, lines in file_map.iteritems():

+ merge_lines(files[file_name], lines, test_bit_masks[executable])

+ return data

+ reduce(merge_files, results, data)

+def merge(options):

+ """Implements the 'merge' action of this tool."""

+ # Check if folder with coverage output exists.

+ assert (os.path.exists(options.coverage_dir) and

+ os.path.isdir(options.coverage_dir))

+ # Inputs for multiprocessing. List of tuples of:

+ # Coverage dir, executable name, sancov file name.

+ inputs = []

+ for f in os.listdir(options.coverage_dir):

+ match = SANCOV_FILE_RE.match(f)

+ if match:

+ inputs.append((options.coverage_dir, match.group(1), f))

+ logging.info('Merging %d sancov files into %s' %

+ (len(inputs), options.json_input))

+ # Post-process covered lines in parallel.

+ pool = Pool(CPUS)

+ try:

+ results = pool.imap_unordered(get_covered_lines, inputs)

+ finally:

+ pool.close()

+ # Load existing json data file for merging the results.

+ with open(options.json_input, 'r') as f:

+ data = json.load(f)

+ # Merge muliprocessing results. Mutates data.

+ merge_covered_line_results(data, results)

+ logging.info('Merged data from %d executables, which covers %d files.' %

+ (len(data['tests']), len(data['files'])))

+ logging.info('Writing results to %s' % options.json_output)

+ # Write merged results to file.

+ with open(options.json_output, 'w') as f:

+ json.dump(data, f, sort_keys=True)

+def main():

+ parser = argparse.ArgumentParser()

+ parser.add_argument('--coverage-dir',

+ help='Path to the sancov output files.')

+ parser.add_argument('--json-input',

+ help='Path to an existing json file with coverage data.')

+ parser.add_argument('--json-output', required=True,

+ help='Path to a file to write json output to.')

+ parser.add_argument('action', choices=['all', 'merge'],

+ help='Action to perform.')

+ options = parser.parse_args()

+ if options.action.lower() == 'all':

+ write_instrumented(options)

+ elif options.action.lower() == 'merge':

+ if not options.coverage_dir:

+ print '--coverage-dir is required'

+ return 1

+ if not options.json_input:

+ print '--json-input is required'

+ return 1

+ merge(options)

+ return 0

+if __name__ == '__main__':

+ sys.exit(main())

« no previous file with comments | « tools/run-tests.py ('k') | tools/sanitizers/sancov_formatter_test.py » ('j') | no next file with comments »