Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2016 the V8 project authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """ | |
| 7 Script to transform and merge sancov files into human readable json-format. | |
| 8 | |
| 9 The script supports two actions: | |
| 10 all: Writes a json file with all instrumented lines of all executables. | |
| 11 merge: Merges sancov files with coverage output into an existing json file. | |
| 12 | |
| 13 The json data is structured as follows: | |
| 14 { | |
| 15 "version": 1, | |
| 16 "tests": ["executable1", "executable2", ...], | |
| 17 "files": { | |
| 18 "file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...], | |
| 19 "file2": [...], | |
| 20 ... | |
| 21 } | |
| 22 } | |
| 23 | |
| 24 The executables are sorted and determine the test bit mask. Their index+1 is | |
| 25 the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by | |
| 26 executable1 and executable3 will have bit_mask == 5. | |
| 27 | |
| 28 The line-number-bit_mask pairs are sorted by line number and don't contain | |
| 29 duplicates. | |
| 30 | |
| 31 The sancov tool is expected to be in the llvm compiler-rt third-party | |
| 32 directory. It's not checked out by default and must be added as a custom deps: | |
| 33 'v8/third_party/llvm/projects/compiler-rt': | |
| 34 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git' | |
| 35 """ | |
| 36 | |
| 37 import argparse | |
| 38 import json | |
| 39 import os | |
| 40 import re | |
| 41 import subprocess | |
| 42 import sys | |
| 43 | |
| 44 from multiprocessing import Pool, cpu_count | |
| 45 | |
| 46 | |
| 47 # Files to exclude from coverage. Dropping their data early adds more speed. | |
| 48 # The contained cc files are already excluded from instrumentation, but inlined | |
| 49 # data is referenced through v8's object files. | |
| 50 EXCLUSIONS = [ | |
| 51 'buildtools', | |
| 52 'src/third_party', | |
| 53 'third_party', | |
| 54 'test', | |
| 55 'testing', | |
| 56 ] | |
| 57 | |
| 58 # V8 checkout directory. | |
| 59 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname( | |
| 60 os.path.abspath(__file__)))) | |
| 61 | |
| 62 # Executable location. TODO(machenbach): Only release is supported for now. | |
| 63 BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release') | |
| 64 | |
| 65 # Path prefix added by the llvm symbolizer including trailing slash. | |
| 66 OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '') | |
| 67 OUTPUT_PATH_OFFSET = len(OUTPUT_PATH_PREFIX) | |
| 68 | |
| 69 # The sancov tool location. | |
| 70 SANCOV_TOOL = os.path.join( | |
| 71 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt', | |
| 72 'lib', 'sanitizer_common', 'scripts', 'sancov.py') | |
| 73 | |
| 74 # Simple script to sanitize the PCs from objdump. | |
| 75 SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py') | |
| 76 | |
| 77 # The llvm symbolizer location. | |
| 78 SYMBOLIZER = os.path.join( | |
| 79 BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin', | |
| 80 'llvm-symbolizer') | |
| 81 | |
| 82 # Number of cpus. | |
| 83 CPUS = cpu_count() | |
| 84 | |
| 85 # Regexp to find sancov files as output by sancov_merger.py. Also grabs the | |
| 86 # executable name in group 1. | |
| 87 SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$') | |
| 88 | |
| 89 | |
| 90 def executables(): | |
| 91 """Iterates over executable files in the build directory.""" | |
| 92 for f in os.listdir(BUILD_DIR): | |
| 93 file_path = os.path.join(BUILD_DIR, f) | |
| 94 if os.path.isfile(file_path) and os.access(file_path, os.X_OK): | |
| 95 yield file_path | |
| 96 | |
| 97 | |
| 98 def process_symbolizer_output(output): | |
| 99 """Post-process llvm symbolizer output. | |
| 100 | |
| 101 Excludes files outside the v8 checkout or given in exclusion list above | |
| 102 from further processing. Drops the character index in each line. | |
| 103 | |
| 104 Returns: A mapping of file names to lists of lines. The file names have | |
|
tandrii(chromium)
2016/03/07 15:10:22
here and next line: s/lines/line numbers
Michael Achenbach
2016/03/07 16:07:21
Done.
| |
| 105 relative paths to the v8 base directory. The lists of lines | |
| 106 don't contain duplicate lines and are sorted. | |
| 107 """ | |
| 108 # Drop path offset when iterating lines. The path is redundant and takes | |
| 109 # too much space. Drop files outside that path, e.g. generated files in | |
| 110 # the build dir and absolute paths to c++ library headers. | |
| 111 def iter_lines(): | |
| 112 for line in output.strip().splitlines(): | |
| 113 if line.startswith(OUTPUT_PATH_PREFIX): | |
| 114 yield line[OUTPUT_PATH_OFFSET:] | |
|
tandrii(chromium)
2016/03/07 15:10:22
nit: i'd use s/OUTPUT_PATH_OFFSET/len(OUTPUT_PATH_
Michael Achenbach
2016/03/07 16:07:21
Done.
| |
| 115 | |
| 116 # Map file names to sets of instrumented line numbers. | |
| 117 file_map = {} | |
| 118 for line in iter_lines(): | |
| 119 # Drop character number, we only care for line numbers. Each line has the | |
| 120 # form: <file name>:<line number>:<character number>. | |
| 121 file_name, number, _ = line.split(':') | |
| 122 file_map.setdefault(file_name, set([])).add(int(number)) | |
| 123 | |
| 124 # Remove exclusion patterns from file map. It's cheaper to do it after the | |
| 125 # mapping, as there are few excluded files and we don't want to do this | |
| 126 # check for numerous lines in ordinary files. | |
| 127 def keep(file_name): | |
| 128 for e in EXCLUSIONS: | |
| 129 if file_name.startswith(e): | |
| 130 return False | |
| 131 return True | |
| 132 | |
| 133 # Return in serializable form and filter. | |
| 134 return {k:sorted(list(file_map[k])) for k in file_map if keep(k)} | |
|
tandrii(chromium)
2016/03/07 15:10:22
nit: space after :
also, omit "list", sorted works
Michael Achenbach
2016/03/07 16:07:21
Done.
| |
| 135 | |
| 136 | |
| 137 def get_instrumented_lines(executable): | |
| 138 """Return the instrumented lines of an executable. | |
| 139 | |
| 140 Called trough multiprocessing pool. | |
| 141 | |
| 142 Returns: Post-processed llvm output as returned by process_symbolizer_output. | |
| 143 """ | |
| 144 # The first two pipes are from llvm's tool sancov.py with 0x added to the hex | |
| 145 # numbers. The results are piped into the llvm symbolizer, which outputs for | |
| 146 # each PC: <file name with abs path>:<line number>:<character number>. | |
| 147 # We don't call the sancov tool to get more speed. | |
| 148 process = subprocess.Popen( | |
| 149 'objdump -d %s | ' | |
| 150 'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ ' | |
| 151 '<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | ' | |
| 152 'grep \'^\s\+[0-9a-f]\+\' -o | ' | |
| 153 '%s | ' | |
| 154 '%s --obj %s -functions=none' % | |
| 155 (executable, SANITIZE_PCS, SYMBOLIZER, executable), | |
| 156 stdout=subprocess.PIPE, | |
| 157 stderr=subprocess.PIPE, | |
| 158 stdin=subprocess.PIPE, | |
| 159 cwd=BASE_DIR, | |
| 160 shell=True, | |
| 161 ) | |
| 162 output, _ = process.communicate() | |
| 163 return process_symbolizer_output(output) | |
| 164 | |
| 165 | |
| 166 def merge_instrumented_line_results(exe_list, results): | |
| 167 """Merge multiprocessing results for all instrumented lines. | |
| 168 | |
| 169 Args: | |
| 170 exe_list: List of all executable names with absolute paths. | |
| 171 results: List of results as returned by get_instrumented_lines. | |
| 172 | |
| 173 Returns: Dict to be used as json data as specified on the top of this page. | |
| 174 The dictionary contains all instrumented lines of all files | |
| 175 referenced by all executables. | |
| 176 """ | |
| 177 def merge_files(x, y): | |
| 178 for file_name, lines in y.iteritems(): | |
| 179 x.setdefault(file_name, set([])).update(lines) | |
| 180 return x | |
| 181 result = reduce(merge_files, results, {}) | |
| 182 | |
| 183 # Return data as file->lines mapping. The lines are saved as lists | |
| 184 # with (line number, test bits (as int)). The test bits are initialized with | |
| 185 # 0, meaning instrumented, but no coverage. | |
| 186 # The order of the test bits is given with key 'tests'. For now, these are | |
| 187 # the executable names. We use a _list_ with two items instead of a tuple to | |
| 188 # ease merging by allowing mutation of the second item. | |
| 189 return { | |
| 190 'version': 1, | |
| 191 'tests': sorted(map(os.path.basename, exe_list)), | |
| 192 'files': {k:map(lambda x: [x, 0], sorted(result[k])) for k in result}, | |
|
tandrii(chromium)
2016/03/07 15:10:22
suggestion: s/k/f and s/x/l (at least that'
Michael Achenbach
2016/03/07 16:07:21
Done.
| |
| 193 } | |
| 194 | |
| 195 | |
| 196 def write_instrumented(options): | |
| 197 """Implements the 'all' action of this tool.""" | |
| 198 exe_list = list(executables()) | |
| 199 pool = Pool(CPUS) | |
| 200 try: | |
| 201 results = pool.imap_unordered(get_instrumented_lines, exe_list) | |
| 202 finally: | |
| 203 pool.close() | |
| 204 | |
| 205 # Merge multiprocessing results and prepare output data. | |
| 206 data = merge_instrumented_line_results(results) | |
| 207 | |
| 208 # Write json output. | |
| 209 with open(options.json_output, 'w') as f: | |
| 210 json.dump(data, f, sort_keys=True) | |
| 211 | |
| 212 | |
| 213 def get_covered_lines(args): | |
| 214 """Return the covered lines of an executable. | |
| 215 | |
| 216 Called trough multiprocessing pool. The args are expected to unpack to: | |
| 217 cov_dir: Folder with sancov files merged by sancov_merger.py. | |
| 218 executable: The executable that was called to produce the given coverage | |
| 219 data. | |
| 220 sancov_file: The merged sancov file with coverage data. | |
| 221 | |
| 222 Returns: A tuple of post-processed llvm output as returned by | |
| 223 process_symbolizer_output and the executable name. | |
| 224 """ | |
| 225 cov_dir, executable, sancov_file = args | |
| 226 | |
| 227 # Let the sancov tool print the covered PCs and pipe them through the llvm | |
| 228 # symbolizer. | |
| 229 process = subprocess.Popen( | |
| 230 '%s print %s 2> /dev/null | ' | |
| 231 '%s --obj %s -functions=none' % | |
| 232 (SANCOV_TOOL, | |
| 233 os.path.join(cov_dir, sancov_file), | |
| 234 SYMBOLIZER, | |
| 235 os.path.join(BUILD_DIR, executable)), | |
| 236 stdout=subprocess.PIPE, | |
| 237 stderr=subprocess.PIPE, | |
| 238 stdin=subprocess.PIPE, | |
| 239 cwd=BASE_DIR, | |
| 240 shell=True, | |
| 241 ) | |
| 242 output, _ = process.communicate() | |
| 243 return process_symbolizer_output(output), executable | |
| 244 | |
| 245 | |
| 246 def merge_covered_line_results(data, results): | |
| 247 """Merge multiprocessing results for covered lines. | |
| 248 | |
| 249 The data is mutated, the results are merged into it in place. | |
| 250 | |
| 251 Args: | |
| 252 data: Existing coverage data from json file containing all instrumented | |
| 253 lines. | |
| 254 results: List of results as returned by get_covered_lines. | |
| 255 """ | |
| 256 | |
| 257 # List of executables and mapping to the test bit mask. | |
| 258 exe_list = data['tests'] | |
| 259 assert len(exe_list) <= 32, 'Max 32 different tests are supported.' | |
|
tandrii(chromium)
2016/03/07 15:10:22
why 32?
ftr, python works just fine even with 67
Michael Achenbach
2016/03/07 16:07:21
ok - but not sure if there's a limit on the JS sid
| |
| 260 test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)} | |
| 261 | |
| 262 def merge_lines(old_lines, new_lines, mask): | |
| 263 """Merge the coverage data of a list of lines. | |
| 264 | |
| 265 Args: | |
| 266 old_lines: Lines as list of pairs with line number and test bit mask. | |
| 267 The new lines will be merged into the list in place. | |
| 268 new_lines: List of new (covered) lines (sorted). | |
| 269 mask: The bit to be set for covered lines. The bit index is the test | |
| 270 index of the executable that covered the line. | |
| 271 """ | |
| 272 i = 0 | |
|
tandrii(chromium)
2016/03/07 15:10:22
small suggestion: I personally avoid using index a
Michael Achenbach
2016/03/07 16:07:21
ok - somewhat more readable, but also more expensi
| |
| 273 # Iterate over old and new lines, both are sorted. | |
| 274 for l in new_lines: | |
| 275 while old_lines[i][0] < l: | |
| 276 # Forward instrumented lines not present in this coverage data. | |
| 277 i += 1 | |
| 278 # TODO: Add more context to the assert message. | |
| 279 assert i < len(old_lines), 'Covered line %d not in input file.' % l | |
| 280 assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l | |
| 281 | |
| 282 # Add coverage information to the line. | |
| 283 old_lines[i][1] |= mask | |
| 284 | |
| 285 def merge_files(data, result): | |
| 286 """Merge result into data. | |
| 287 | |
| 288 The data is mutated in place. | |
| 289 | |
| 290 Args: | |
| 291 data: Merged coverage data from the previous reduce step. | |
| 292 result: New result to be merged in. The type is as returned by | |
| 293 get_covered_lines. | |
| 294 """ | |
| 295 file_map, executable = result | |
| 296 files = data['files'] | |
| 297 for file_name, lines in file_map.iteritems(): | |
| 298 merge_lines(files[file_name], lines, test_bit_masks[executable]) | |
| 299 return data | |
| 300 | |
| 301 reduce(merge_files, results, data) | |
| 302 | |
| 303 | |
| 304 def merge(options): | |
| 305 """Implements the 'merge' action of this tool.""" | |
| 306 | |
| 307 # Check if folder with coverage output exists. | |
| 308 assert (os.path.exists(options.coverage_dir) and | |
| 309 os.path.isdir(options.coverage_dir)) | |
| 310 | |
| 311 # Inputs for multiprocessing. List of tuples of: | |
| 312 # Coverage dir, executable name, sancov file name. | |
| 313 inputs = [] | |
| 314 for f in os.listdir(options.coverage_dir): | |
| 315 match = SANCOV_FILE_RE.match(f) | |
| 316 if match: | |
| 317 inputs.append((options.coverage_dir, match.group(1), f)) | |
| 318 | |
| 319 # Post-process covered lines in parallel. | |
| 320 pool = Pool(CPUS) | |
| 321 try: | |
| 322 results = pool.imap_unordered(get_covered_lines, inputs) | |
| 323 finally: | |
| 324 pool.close() | |
| 325 | |
| 326 # Load existing json data file for merging the results. | |
| 327 with open(options.json_input, 'r') as f: | |
| 328 data = json.load(f) | |
| 329 | |
| 330 # Merge muliprocessing results. Mutates data. | |
| 331 merge_covered_line_results(data, results) | |
| 332 | |
| 333 # Write merged results to file. | |
| 334 with open(options.json_output, 'w') as f: | |
| 335 json.dump(data, f, sort_keys=True) | |
| 336 | |
| 337 | |
| 338 def main(): | |
| 339 parser = argparse.ArgumentParser() | |
| 340 parser.add_argument('--coverage-dir', | |
| 341 help='Path to the sancov output files.') | |
| 342 parser.add_argument('--json-input', | |
| 343 help='Path to an existing json file with coverage data.') | |
| 344 parser.add_argument('--json-output', required=True, | |
| 345 help='Path to a file to write json output to.') | |
| 346 parser.add_argument('action', | |
|
tandrii(chromium)
2016/03/07 15:10:22
add arg
choices = ('all', 'merge')
https://docs.
Michael Achenbach
2016/03/07 16:07:21
Done.
| |
| 347 help='Action to perform. One of all|merge.') | |
| 348 | |
| 349 options = parser.parse_args() | |
| 350 if options.action.lower() == 'all': | |
| 351 write_instrumented(options) | |
| 352 elif options.action.lower() == 'merge': | |
| 353 if not options.coverage_dir: | |
| 354 print '--coverage-dir is required' | |
| 355 return 1 | |
| 356 if not options.json_input: | |
| 357 print '--json-input is required' | |
| 358 return 1 | |
| 359 merge(options) | |
| 360 else: | |
| 361 print 'Unsupported action.' | |
| 362 return 1 | |
| 363 return 0 | |
| 364 | |
| 365 | |
| 366 if __name__ == '__main__': | |
| 367 sys.exit(main()) | |
| OLD | NEW |