OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2016 the V8 project authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Script to transform and merge sancov files into human readable json-format. |
| 7 |
| 8 The script supports two actions: |
| 9 all: Writes a json file with all instrumented lines of all executables. |
| 10 merge: Merges sancov files with coverage output into an existing json file. |
| 11 |
| 12 The json data is structured as follows: |
| 13 { |
| 14 "version": 1, |
| 15 "tests": ["executable1", "executable2", ...], |
| 16 "files": { |
| 17 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...], |
| 18 "file2": [...], |
| 19 ... |
| 20 } |
| 21 } |
| 22 |
| 23 The executables are sorted and determine the test bit mask. Their index+1 is |
| 24 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by |
| 25 executable1 and executable3 will have bit_mask == 5 == 0b101. The number of |
| 26 tests is restricted to 52 in version 1, to allow javascript JSON parsing of |
| 27 the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1. |
| 28 |
| 29 The line-number-bit_mask pairs are sorted by line number and don't contain |
| 30 duplicates. |
| 31 |
| 32 The sancov tool is expected to be in the llvm compiler-rt third-party |
| 33 directory. It's not checked out by default and must be added as a custom deps: |
| 34 'v8/third_party/llvm/projects/compiler-rt': |
| 35 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git' |
| 36 """ |
| 37 |
| 38 import argparse |
| 39 import json |
| 40 import logging |
| 41 import os |
| 42 import re |
| 43 import subprocess |
| 44 import sys |
| 45 |
| 46 from multiprocessing import Pool, cpu_count |
| 47 |
| 48 |
| 49 logging.basicConfig(level=logging.INFO) |
| 50 |
| 51 # Files to exclude from coverage. Dropping their data early adds more speed. |
| 52 # The contained cc files are already excluded from instrumentation, but inlined |
| 53 # data is referenced through v8's object files. |
| 54 EXCLUSIONS = [ |
| 55 'buildtools', |
| 56 'src/third_party', |
| 57 'third_party', |
| 58 'test', |
| 59 'testing', |
| 60 ] |
| 61 |
| 62 # Executables found in the build output for which no coverage is generated. |
| 63 # Exclude them from the coverage data file. |
| 64 EXE_BLACKLIST = [ |
| 65 'generate-bytecode-expectations', |
| 66 'hello-world', |
| 67 'mksnapshot', |
| 68 'parser-shell', |
| 69 'process', |
| 70 'shell', |
| 71 ] |
| 72 |
| 73 # V8 checkout directory. |
| 74 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname( |
| 75 os.path.abspath(__file__)))) |
| 76 |
| 77 # Executable location. TODO(machenbach): Only release is supported for now. |
| 78 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release') |
| 79 |
| 80 # Path prefix added by the llvm symbolizer including trailing slash. |
| 81 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '') |
| 82 |
| 83 # The sancov tool location. |
| 84 SANCOV_TOOL = os.path.join( |
| 85 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt', |
| 86 'lib', 'sanitizer_common', 'scripts', 'sancov.py') |
| 87 |
| 88 # Simple script to sanitize the PCs from objdump. |
| 89 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py') |
| 90 |
| 91 # The llvm symbolizer location. |
| 92 SYMBOLIZER = os.path.join( |
| 93 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin', |
| 94 'llvm-symbolizer') |
| 95 |
| 96 # Number of cpus. |
| 97 CPUS = cpu_count() |
| 98 |
| 99 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the |
| 100 # executable name in group 1. |
| 101 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$') |
| 102 |
| 103 |
| 104 def executables(): |
| 105 """Iterates over executable files in the build directory.""" |
| 106 for f in os.listdir(BUILD_DIR): |
| 107 file_path = os.path.join(BUILD_DIR, f) |
| 108 if (os.path.isfile(file_path) and |
| 109 os.access(file_path, os.X_OK) and |
| 110 f not in EXE_BLACKLIST): |
| 111 yield file_path |
| 112 |
| 113 |
| 114 def process_symbolizer_output(output): |
| 115 """Post-process llvm symbolizer output. |
| 116 |
| 117 Excludes files outside the v8 checkout or given in exclusion list above |
| 118 from further processing. Drops the character index in each line. |
| 119 |
| 120 Returns: A mapping of file names to lists of line numbers. The file names |
| 121 have relative paths to the v8 base directory. The lists of line |
| 122 numbers don't contain duplicate lines and are sorted. |
| 123 """ |
| 124 # Drop path prefix when iterating lines. The path is redundant and takes |
| 125 # too much space. Drop files outside that path, e.g. generated files in |
| 126 # the build dir and absolute paths to c++ library headers. |
| 127 def iter_lines(): |
| 128 for line in output.strip().splitlines(): |
| 129 if line.startswith(OUTPUT_PATH_PREFIX): |
| 130 yield line[len(OUTPUT_PATH_PREFIX):] |
| 131 |
| 132 # Map file names to sets of instrumented line numbers. |
| 133 file_map = {} |
| 134 for line in iter_lines(): |
| 135 # Drop character number, we only care for line numbers. Each line has the |
| 136 # form: <file name>:<line number>:<character number>. |
| 137 file_name, number, _ = line.split(':') |
| 138 file_map.setdefault(file_name, set([])).add(int(number)) |
| 139 |
| 140 # Remove exclusion patterns from file map. It's cheaper to do it after the |
| 141 # mapping, as there are few excluded files and we don't want to do this |
| 142 # check for numerous lines in ordinary files. |
| 143 def keep(file_name): |
| 144 for e in EXCLUSIONS: |
| 145 if file_name.startswith(e): |
| 146 return False |
| 147 return True |
| 148 |
| 149 # Return in serializable form and filter. |
| 150 return {k: sorted(file_map[k]) for k in file_map if keep(k)} |
| 151 |
| 152 |
| 153 def get_instrumented_lines(executable): |
| 154 """Return the instrumented lines of an executable. |
| 155 |
| 156 Called trough multiprocessing pool. |
| 157 |
| 158 Returns: Post-processed llvm output as returned by process_symbolizer_output. |
| 159 """ |
| 160 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex |
| 161 # numbers. The results are piped into the llvm symbolizer, which outputs for |
| 162 # each PC: <file name with abs path>:<line number>:<character number>. |
| 163 # We don't call the sancov tool to get more speed. |
| 164 process = subprocess.Popen( |
| 165 'objdump -d %s | ' |
| 166 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ ' |
| 167 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | ' |
| 168 'grep \'^\s\+[0-9a-f]\+\' -o | ' |
| 169 '%s | ' |
| 170 '%s --obj %s -functions=none' % |
| 171 (executable, SANITIZE_PCS, SYMBOLIZER, executable), |
| 172 stdout=subprocess.PIPE, |
| 173 stderr=subprocess.PIPE, |
| 174 stdin=subprocess.PIPE, |
| 175 cwd=BASE_DIR, |
| 176 shell=True, |
| 177 ) |
| 178 output, _ = process.communicate() |
| 179 assert process.returncode == 0 |
| 180 return process_symbolizer_output(output) |
| 181 |
| 182 |
| 183 def merge_instrumented_line_results(exe_list, results): |
| 184 """Merge multiprocessing results for all instrumented lines. |
| 185 |
| 186 Args: |
| 187 exe_list: List of all executable names with absolute paths. |
| 188 results: List of results as returned by get_instrumented_lines. |
| 189 |
| 190 Returns: Dict to be used as json data as specified on the top of this page. |
| 191 The dictionary contains all instrumented lines of all files |
| 192 referenced by all executables. |
| 193 """ |
| 194 def merge_files(x, y): |
| 195 for file_name, lines in y.iteritems(): |
| 196 x.setdefault(file_name, set([])).update(lines) |
| 197 return x |
| 198 result = reduce(merge_files, results, {}) |
| 199 |
| 200 # Return data as file->lines mapping. The lines are saved as lists |
| 201 # with (line number, test bits (as int)). The test bits are initialized with |
| 202 # 0, meaning instrumented, but no coverage. |
| 203 # The order of the test bits is given with key 'tests'. For now, these are |
| 204 # the executable names. We use a _list_ with two items instead of a tuple to |
| 205 # ease merging by allowing mutation of the second item. |
| 206 return { |
| 207 'version': 1, |
| 208 'tests': sorted(map(os.path.basename, exe_list)), |
| 209 'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result}, |
| 210 } |
| 211 |
| 212 |
| 213 def write_instrumented(options): |
| 214 """Implements the 'all' action of this tool.""" |
| 215 exe_list = list(executables()) |
| 216 logging.info('Reading instrumented lines from %d executables.' % |
| 217 len(exe_list)) |
| 218 pool = Pool(CPUS) |
| 219 try: |
| 220 results = pool.imap_unordered(get_instrumented_lines, exe_list) |
| 221 finally: |
| 222 pool.close() |
| 223 |
| 224 # Merge multiprocessing results and prepare output data. |
| 225 data = merge_instrumented_line_results(exe_list, results) |
| 226 |
| 227 logging.info('Read data from %d executables, which covers %d files.' % |
| 228 (len(data['tests']), len(data['files']))) |
| 229 logging.info('Writing results to %s' % options.json_output) |
| 230 |
| 231 # Write json output. |
| 232 with open(options.json_output, 'w') as f: |
| 233 json.dump(data, f, sort_keys=True) |
| 234 |
| 235 |
| 236 def get_covered_lines(args): |
| 237 """Return the covered lines of an executable. |
| 238 |
| 239 Called trough multiprocessing pool. The args are expected to unpack to: |
| 240 cov_dir: Folder with sancov files merged by sancov_merger.py. |
| 241 executable: The executable that was called to produce the given coverage |
| 242 data. |
| 243 sancov_file: The merged sancov file with coverage data. |
| 244 |
| 245 Returns: A tuple of post-processed llvm output as returned by |
| 246 process_symbolizer_output and the executable name. |
| 247 """ |
| 248 cov_dir, executable, sancov_file = args |
| 249 |
| 250 # Let the sancov tool print the covered PCs and pipe them through the llvm |
| 251 # symbolizer. |
| 252 process = subprocess.Popen( |
| 253 '%s print %s 2> /dev/null | ' |
| 254 '%s --obj %s -functions=none' % |
| 255 (SANCOV_TOOL, |
| 256 os.path.join(cov_dir, sancov_file), |
| 257 SYMBOLIZER, |
| 258 os.path.join(BUILD_DIR, executable)), |
| 259 stdout=subprocess.PIPE, |
| 260 stderr=subprocess.PIPE, |
| 261 stdin=subprocess.PIPE, |
| 262 cwd=BASE_DIR, |
| 263 shell=True, |
| 264 ) |
| 265 output, _ = process.communicate() |
| 266 assert process.returncode == 0 |
| 267 return process_symbolizer_output(output), executable |
| 268 |
| 269 |
| 270 def merge_covered_line_results(data, results): |
| 271 """Merge multiprocessing results for covered lines. |
| 272 |
| 273 The data is mutated, the results are merged into it in place. |
| 274 |
| 275 Args: |
| 276 data: Existing coverage data from json file containing all instrumented |
| 277 lines. |
| 278 results: List of results as returned by get_covered_lines. |
| 279 """ |
| 280 |
| 281 # List of executables and mapping to the test bit mask. The number of |
| 282 # tests is restricted to 52, to allow javascript JSON parsing of |
| 283 # the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1. |
| 284 exe_list = data['tests'] |
| 285 assert len(exe_list) <= 52, 'Max 52 different tests are supported.' |
| 286 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)} |
| 287 |
| 288 def merge_lines(old_lines, new_lines, mask): |
| 289 """Merge the coverage data of a list of lines. |
| 290 |
| 291 Args: |
| 292 old_lines: Lines as list of pairs with line number and test bit mask. |
| 293 The new lines will be merged into the list in place. |
| 294 new_lines: List of new (covered) lines (sorted). |
| 295 mask: The bit to be set for covered lines. The bit index is the test |
| 296 index of the executable that covered the line. |
| 297 """ |
| 298 i = 0 |
| 299 # Iterate over old and new lines, both are sorted. |
| 300 for l in new_lines: |
| 301 while old_lines[i][0] < l: |
| 302 # Forward instrumented lines not present in this coverage data. |
| 303 i += 1 |
| 304 # TODO: Add more context to the assert message. |
| 305 assert i < len(old_lines), 'Covered line %d not in input file.' % l |
| 306 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l |
| 307 |
| 308 # Add coverage information to the line. |
| 309 old_lines[i][1] |= mask |
| 310 |
| 311 def merge_files(data, result): |
| 312 """Merge result into data. |
| 313 |
| 314 The data is mutated in place. |
| 315 |
| 316 Args: |
| 317 data: Merged coverage data from the previous reduce step. |
| 318 result: New result to be merged in. The type is as returned by |
| 319 get_covered_lines. |
| 320 """ |
| 321 file_map, executable = result |
| 322 files = data['files'] |
| 323 for file_name, lines in file_map.iteritems(): |
| 324 merge_lines(files[file_name], lines, test_bit_masks[executable]) |
| 325 return data |
| 326 |
| 327 reduce(merge_files, results, data) |
| 328 |
| 329 |
| 330 def merge(options): |
| 331 """Implements the 'merge' action of this tool.""" |
| 332 |
| 333 # Check if folder with coverage output exists. |
| 334 assert (os.path.exists(options.coverage_dir) and |
| 335 os.path.isdir(options.coverage_dir)) |
| 336 |
| 337 # Inputs for multiprocessing. List of tuples of: |
| 338 # Coverage dir, executable name, sancov file name. |
| 339 inputs = [] |
| 340 for f in os.listdir(options.coverage_dir): |
| 341 match = SANCOV_FILE_RE.match(f) |
| 342 if match: |
| 343 inputs.append((options.coverage_dir, match.group(1), f)) |
| 344 |
| 345 logging.info('Merging %d sancov files into %s' % |
| 346 (len(inputs), options.json_input)) |
| 347 |
| 348 # Post-process covered lines in parallel. |
| 349 pool = Pool(CPUS) |
| 350 try: |
| 351 results = pool.imap_unordered(get_covered_lines, inputs) |
| 352 finally: |
| 353 pool.close() |
| 354 |
| 355 # Load existing json data file for merging the results. |
| 356 with open(options.json_input, 'r') as f: |
| 357 data = json.load(f) |
| 358 |
| 359 # Merge muliprocessing results. Mutates data. |
| 360 merge_covered_line_results(data, results) |
| 361 |
| 362 logging.info('Merged data from %d executables, which covers %d files.' % |
| 363 (len(data['tests']), len(data['files']))) |
| 364 logging.info('Writing results to %s' % options.json_output) |
| 365 |
| 366 # Write merged results to file. |
| 367 with open(options.json_output, 'w') as f: |
| 368 json.dump(data, f, sort_keys=True) |
| 369 |
| 370 |
| 371 def main(): |
| 372 parser = argparse.ArgumentParser() |
| 373 parser.add_argument('--coverage-dir', |
| 374 help='Path to the sancov output files.') |
| 375 parser.add_argument('--json-input', |
| 376 help='Path to an existing json file with coverage data.') |
| 377 parser.add_argument('--json-output', required=True, |
| 378 help='Path to a file to write json output to.') |
| 379 parser.add_argument('action', choices=['all', 'merge'], |
| 380 help='Action to perform.') |
| 381 |
| 382 options = parser.parse_args() |
| 383 if options.action.lower() == 'all': |
| 384 write_instrumented(options) |
| 385 elif options.action.lower() == 'merge': |
| 386 if not options.coverage_dir: |
| 387 print '--coverage-dir is required' |
| 388 return 1 |
| 389 if not options.json_input: |
| 390 print '--json-input is required' |
| 391 return 1 |
| 392 merge(options) |
| 393 return 0 |
| 394 |
| 395 |
| 396 if __name__ == '__main__': |
| 397 sys.exit(main()) |
OLD | NEW |