Chromium Code Reviews (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

Unified Diff: tools/sanitizers/

Issue 1737263003: [coverage] Enable sanitizer coverage. (Closed) Base URL:
Patch Set: Documentation Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/sanitizers/
diff --git a/tools/sanitizers/ b/tools/sanitizers/
new file mode 100755
index 0000000000000000000000000000000000000000..0ceb4f711583bccd0b87a8dafa1a91cc43e9f291
--- /dev/null
+++ b/tools/sanitizers/
@@ -0,0 +1,366 @@
+#!/usr/bin/env python
+# Copyright 2016 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
tandrii(chromium) 2016/03/07 15:10:22 nit: afair, first line of doc shouldn't be empty,
Michael Achenbach 2016/03/07 16:07:20 Done.
+Script to transform and merge sancov files into human readable json-format.
+The script supports two actions:
+all: Writes a json file with all instrumented lines of all executables.
+merge: Merges sancov files with coverage output into an existing json file.
+The json data is structured as follows:
+ "version": 1,
+ "tests": ["executable1", "executable2", ...],
+ "files": {
+ "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
+ "file2": [...],
+ ...
+ }
+The executables are sorted and determine the test bit mask. Their index+1 is
+the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
+executable1 and executable3 will have bit_mask == 5.
+The line-number-bit_mask pairs are sorted by line number and don't contain
+The sancov tool is expected to be in the llvm compiler-rt third-party
+directory. It's not checked out by default and must be added as a custom deps:
+ ''
+import argparse
+import json
+from multiprocessing import Pool, cpu_count
kjellander_chromium 2016/03/04 14:35:22 nit: move down the from-import: "Imports should be
Michael Achenbach 2016/03/04 15:00:25 Done.
+import os
+import re
+import subprocess
+import sys
+# Files to exclude from coverage. Dropping their data early adds more speed.
+# The contained cc files are already excluded from instrumentation, but inlined
+# data is referenced through v8's object files.
+ 'buildtools',
+ 'src/third_party',
+ 'third_party',
+ 'test',
+ 'testing',
+# V8 checkout directory.
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
+ os.path.abspath(__file__))))
+# Executable location. TODO(machenbach): Only release is supported for now.
+BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
+# Path prefix added by the llvm symbolizer including trailing slash.
+OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
kjellander_chromium 2016/03/04 14:35:22 it seems more readable to append os.sep than joini
Michael Achenbach 2016/03/08 10:05:46 I leave it as it is now - I think joining os.sep i
+# The sancov tool location.
+SANCOV_TOOL = os.path.join(
+ BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
+ 'lib', 'sanitizer_common', 'scripts', '')
+# Simple script to sanitize the PCs from objdump.
+SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', '')
+# The llvm symbolizer location.
+SYMBOLIZER = os.path.join(
+ BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
+ 'llvm-symbolizer')
+# Number of cpus.
+CPUS = cpu_count()
+# Regexp to find sancov files as output by Also grabs the
+# executable name in group 1.
+SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
+def executables():
+ """Iterates over executable files in the build directory."""
+ for f in os.listdir(BUILD_DIR):
+ file_path = os.path.join(BUILD_DIR, f)
+ if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
+ yield file_path
+def process_symbolizer_output(output):
+ """Post-process llvm symbolizer output.
+ Excludes files outside the v8 checkout or given in exclusion list above
+ from further processing. Drops the character index in each line.
+ Returns: A mapping of file names to lists of lines. The file names have
+ relative paths to the v8 base directory. The lists of lines
+ don't contain duplicate lines and are sorted.
+ """
+ # Drop path offset when iterating lines. The path is redundant and takes
+ # too much space. Drop files outside that path, e.g. generated files in
+ # the build dir and absolute paths to c++ library headers.
+ def iter_lines():
+ for line in output.strip().splitlines():
+ if line.startswith(OUTPUT_PATH_PREFIX):
+ yield line[OUTPUT_PATH_OFFSET:]
+ # Map file names to sets of instrumented line numbers.
+ file_map = {}
+ for line in iter_lines():
+ # Drop character number, we only care for line numbers. Each line has the
+ # form: <file name>:<line number>:<character number>.
+ file_name, number, _ = line.split(':')
+ file_map.setdefault(file_name, set([])).add(int(number))
+ # Remove exclusion patterns from file map. It's cheaper to do it after the
+ # mapping, as there are few excluded files and we don't want to do this
+ # check for numerous lines in ordinary files.
+ def keep(file_name):
+ for e in EXCLUSIONS:
+ if file_name.startswith(e):
+ return False
+ return True
+ # Return in serializable form and filter.
+ return {k:sorted(list(file_map[k])) for k in file_map if keep(k)}
+def get_instrumented_lines(executable):
+ """Return the instrumented lines of an executable.
+ Called trough multiprocessing pool.
+ Returns: Post-processed llvm output as returned by process_symbolizer_output.
+ """
+ # The first two pipes are from llvm's tool with 0x added to the hex
+ # numbers. The results are piped into the llvm symbolizer, which outputs for
+ # each PC: <file name with abs path>:<line number>:<character number>.
+ # We don't call the sancov tool to get more speed.
+ process = subprocess.Popen(
+ 'objdump -d %s | '
+ 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
+ '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
+ 'grep \'^\s\+[0-9a-f]\+\' -o | '
+ '%s | '
+ '%s --obj %s -functions=none' %
+ (executable, SANITIZE_PCS, SYMBOLIZER, executable),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ cwd=BASE_DIR,
+ shell=True,
+ )
+ output, _ = process.communicate()
+ return process_symbolizer_output(output)
+def merge_instrumented_line_results(exe_list, results):
+ """Merge multiprocessing results for all instrumented lines.
+ Args:
+ exe_list: List of all executable names with absolute paths.
+ results: List of results as returned by get_instrumented_lines.
+ Returns: Dict to be used as json data as specified on the top of this page.
+ The dictionary contains all instrumented lines of all files
+ referenced by all executables.
+ """
+ def merge_files(x, y):
+ for file_name, lines in y.iteritems():
+ x.setdefault(file_name, set([])).update(lines)
+ return x
+ result = reduce(merge_files, results, {})
+ # Return data as file->lines mapping. The lines are saved as lists
+ # with (line number, test bits (as int)). The test bits are initialized with
+ # 0, meaning instrumented, but no coverage.
+ # The order of the test bits is given with key 'tests'. For now, these are
+ # the executable names. We use a _list_ with two items instead of a tuple to
+ # ease merging by allowing mutation of the second item.
+ return {
+ 'version': 1,
+ 'tests': sorted(map(os.path.basename, exe_list)),
+ 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result},
+ }
+def write_instrumented(options):
+ """Implements the 'all' action of this tool."""
+ exe_list = list(executables())
+ pool = Pool(CPUS)
+ try:
+ results = pool.imap_unordered(get_instrumented_lines, exe_list)
+ finally:
+ pool.close()
+ # Merge muliprocessing results and prepare output data.
kjellander_chromium 2016/03/04 14:35:22 multiprocessing.
Michael Achenbach 2016/03/04 15:00:25 Done.
+ data = merge_instrumented_line_results(results)
+ # Write json output.
+ with open(options.json_output, 'w') as f:
+ json.dump(data, f, sort_keys=True)
+def get_covered_lines(args):
+ """Return the covered lines of an executable.
+ Called trough multiprocessing pool. The args are expected to unpack to:
+ cov_dir: Folder with sancov files merged by
+ executable: The executable that was called to produce the given coverage
+ data.
+ sancov_file: The merged sancov file with coverage data.
+ Returns: A tuple of post-processed llvm output as returned by
+ process_symbolizer_output and the executable name.
+ """
+ cov_dir, executable, sancov_file = args
+ # Let the sancov tool print the covered PCs and pipe them through the llvm
+ # symbolizer.
+ process = subprocess.Popen(
+ '%s print %s 2> /dev/null | '
+ '%s --obj %s -functions=none' %
+ os.path.join(cov_dir, sancov_file),
+ os.path.join(BUILD_DIR, executable)),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ cwd=BASE_DIR,
+ shell=True,
+ )
+ output, _ = process.communicate()
+ return process_symbolizer_output(output), executable
+def merge_covered_line_results(data, results):
+ """Merge multiprocessing results for covered lines.
+ The data is mutated, the results are merged into it in place.
+ Args:
+ data: Existing coverage data from json file containing all instrumented
+ lines.
+ results: List of results as returned by get_covered_lines.
+ """
+ # List of executables and mapping to the test bit mask.
+ exe_list = data['tests']
+ assert len(exe_list) <= 32, 'Max 32 different tests are supported.'
+ test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
+ def merge_lines(old_lines, new_lines, mask):
+ """Merge the coverage data of a list of lines.
+ Args:
+ old_lines: Lines as list of pairs with line number and test bit mask.
+ The new lines will be merged into the list in place.
+ new_lines: List of new (covered) lines (sorted).
+ mask: The bit to be set for covered lines. The bit index is the test
+ index of the executable that covered the line.
+ """
+ i = 0
+ # Iterate over old and new lines, both are sorted.
+ for l in new_lines:
+ while old_lines[i][0] < l:
+ # Forward instrumented lines not present in this coverage data.
+ i += 1
+ # TODO: Add more context to the assert message.
+ assert i < len(old_lines), 'Covered line %d not in input file.' % l
+ assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
+ # Add coverage information to the line.
+ old_lines[i][1] |= mask
+ def merge_files(data, result):
+ """Merge result into data.
+ The data is mutated in place.
+ Args:
+ data: Merged coverage data from the previous reduce step.
+ result: New result to be merged in. The type is as returned by
+ get_covered_lines.
+ """
+ file_map, executable = result
+ files = data['files']
+ for file_name, lines in file_map.iteritems():
+ merge_lines(files[file_name], lines, test_bit_masks[executable])
+ return data
+ reduce(merge_files, results, data)
+def merge(options):
+ """Implements the 'merge' action of this tool."""
+ # Check if folder with coverage output exists.
+ assert (os.path.exists(options.coverage_dir) and
+ os.path.isdir(options.coverage_dir))
+ # Inputs for multiprocessing. List of tuples of:
+ # Coverage dir, executable name, sancov file name.
+ inputs = []
+ for f in os.listdir(options.coverage_dir):
+ match = SANCOV_FILE_RE.match(f)
+ if match:
+ inputs.append((options.coverage_dir,, f))
+ # Post-process covered lines in parallel.
+ pool = Pool(CPUS)
+ try:
+ results = pool.imap_unordered(get_covered_lines, inputs)
+ finally:
+ pool.close()
+ # Load existing json data file for merging the results.
+ with open(options.json_input, 'r') as f:
+ data = json.load(f)
+ # Merge muliprocessing results. Mutates data.
+ merge_covered_line_results(data, results)
+ # Write merged results to file.
+ with open(options.json_output, 'w') as f:
+ json.dump(data, f, sort_keys=True)
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--coverage-dir',
+ help='Path to the sancov output files.')
+ parser.add_argument('--json-input',
+ help='Path to an existing json file with coverage data.')
+ parser.add_argument('--json-output', required=True,
+ help='Path to a file to write json output to.')
+ parser.add_argument('action',
+ help='Action to perform. One of all|merge.')
+ options = parser.parse_args()
+ if options.action.lower() == 'all':
+ write_instrumented(options)
+ elif options.action.lower() == 'merge':
+ if not options.coverage_dir:
+ print '--coverage-dir is required'
+ return 1
+ if not options.json_input:
+ print '--json-input is required'
+ return 1
+ merge(options)
+ else:
+ print 'Unsupported action.'
+ return 1
+ return 0
+if __name__ == '__main__':
+ sys.exit(main())

Powered by Google App Engine
This is Rietveld 408576698