Index: tools/parse_llvm_coverage.py |
diff --git a/tools/parse_llvm_coverage.py b/tools/parse_llvm_coverage.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..2eadddd3cb1f7615758df257d92b5955648462c2 |
--- /dev/null |
+++ b/tools/parse_llvm_coverage.py |
@@ -0,0 +1,201 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+ |
+"""Parse an LLVM coverage report to generate useable results.""" |
+ |
+ |
+import argparse |
+import json |
+import os |
+import re |
+import subprocess |
+import sys |
+ |
+ |
+def _fix_filename(filename): |
+ """Return a filename which we can use to identify the file. |
+ |
+ The file paths printed by llvm-cov take the form: |
+ |
+ /path/to/repo/out/dir/../../src/filename.cpp |
+ |
+ And then they're truncated to 22 characters with leading ellipses: |
+ |
+ ...../../src/filename.cpp |
+ |
+ This makes it really tough to determine whether the file actually belongs in |
+ the Skia repo. This function strips out the leading junk so that, if the file |
+ exists in the repo, the returned string matches the end of some relative path |
+ in the repo. This doesn't guarantee correctness, but it's about as close as |
+ we can get. |
+ """ |
+ return filename.split('..')[-1].lstrip('./') |
+ |
+ |
+def _file_in_repo(filename, all_files): |
+ """Return the name of the checked-in file matching the given filename. |
+ |
+ Use suffix matching to determine which checked-in files the given filename |
+ matches. If there are no matches or multiple matches, return None. |
+ """ |
+ new_file = _fix_filename(filename) |
+ matched = [] |
+ for f in all_files: |
+ if f.endswith(new_file): |
+ matched.append(f) |
+ if len(matched) == 1: |
+ return matched[0] |
+ elif len(matched) > 1: |
+ print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' |
+ % (new_file, '\n\t'.join(matched))) |
+ return None |
+ |
+ |
+def _get_per_file_per_line_coverage(report): |
+ """Return a dict whose keys are file names and values are coverage data. |
+ |
+ Values are lists which take the form (lineno, coverage, code). |
+ """ |
+ all_files = subprocess.check_output(['git', 'ls-files']).splitlines() |
+ lines = report.splitlines() |
+ current_file = None |
+ file_lines = [] |
+ files = {} |
+ not_checked_in = '%' # Use this as the file name for not-checked-in files. |
+ for line in lines: |
+ m = re.match('([a-zA-Z0-9\./_-]+):', line) |
+ if m: |
+ if current_file and current_file != not_checked_in: |
+ files[current_file] = file_lines |
+ match_filename = _file_in_repo(m.groups()[0], all_files) |
+ current_file = match_filename or not_checked_in |
+ file_lines = [] |
+ else: |
+ if current_file != not_checked_in: |
+ skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) |
+ if line and not skip: |
+ cov, linenum, code = line.split('|', 2) |
+ cov = cov.strip() |
+ if cov: |
+ cov = int(cov) |
+ else: |
+ cov = None # We don't care about coverage for this line. |
+ linenum = int(linenum.strip()) |
+ assert linenum == len(file_lines) + 1 |
+ file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) |
+ return files |
+ |
+ |
+ |
+def _testname(filename): |
+ """Transform the file name into an ingestible test name.""" |
+ return re.sub(r'[^a-zA-Z0-9]', '_', filename) |
+ |
+ |
+def _nanobench_json(results, properties, key): |
+ """Return the results in JSON format like that produced by nanobench.""" |
+ rv = {} |
+ # Copy over the properties first, then set the 'key' and 'results' keys, |
+ # in order to avoid bad formatting in case the user passes in a properties |
+ # dict containing those keys. |
+ rv.update(properties) |
+ rv['key'] = key |
+ rv['results'] = { |
+ _testname(f): { |
+ 'coverage': { |
+ 'percent': percent, |
+ 'options': { |
+ 'fullname': f, |
+ 'dir': os.path.dirname(f), |
+ }, |
+ }, |
+ } for percent, f in results |
+ } |
+ return rv |
+ |
+ |
+def _parse_key_value(kv_list): |
+ """Return a dict whose key/value pairs are derived from the given list. |
+ |
+ For example: |
+ |
+ ['k1', 'v1', 'k2', 'v2'] |
+ becomes: |
+ |
+ {'k1': 'v1', |
+ 'k2': 'v2'} |
+ """ |
+ if len(kv_list) % 2 != 0: |
+ raise Exception('Invalid key/value pairs: %s' % kv_list) |
+ |
+ rv = {} |
+ for i in xrange(len(kv_list) / 2): |
+ rv[kv_list[i*2]] = kv_list[i*2+1] |
+ return rv |
+ |
+ |
+def _get_per_file_summaries(line_by_line): |
+ """Summarize the full line-by-line coverage report by file.""" |
+ per_file = [] |
+ for filepath, lines in line_by_line.iteritems(): |
+ total_lines = 0 |
+ covered_lines = 0 |
+ for _, cov, _ in lines: |
+ if cov is not None: |
+ total_lines += 1 |
+ if cov > 0: |
+ covered_lines += 1 |
+ if total_lines > 0: |
+ per_file.append((float(covered_lines)/float(total_lines)*100.0, |
+ filepath)) |
+ return per_file |
+ |
+ |
+def main(): |
+ """Generate useful data from a coverage report.""" |
+ # Parse args. |
+ parser = argparse.ArgumentParser() |
+ parser.add_argument('--report', help='input file; an llvm coverage report.', |
+ required=True) |
+ parser.add_argument('--nanobench', help='output file for nanobench data.') |
+ parser.add_argument( |
+ '--key', metavar='key_or_value', nargs='+', |
+ help='key/value pairs identifying this bot.') |
+ parser.add_argument( |
+ '--properties', metavar='key_or_value', nargs='+', |
+ help='key/value pairs representing properties of this build.') |
+ parser.add_argument('--linebyline', |
+ help='output file for line-by-line JSON data.') |
+ args = parser.parse_args() |
+ |
+ if args.nanobench and not (args.key and args.properties): |
+ raise Exception('--key and --properties are required with --nanobench') |
+ |
+ with open(args.report) as f: |
+ report = f.read() |
+ |
+ line_by_line = _get_per_file_per_line_coverage(report) |
+ |
+ if args.linebyline: |
+ with open(args.linebyline, 'w') as f: |
+ json.dump(line_by_line, f) |
+ |
+ if args.nanobench: |
+ # Parse the key and properties for use in the nanobench JSON output. |
+ key = _parse_key_value(args.key) |
+ properties = _parse_key_value(args.properties) |
+ |
+ # Get per-file summaries. |
+ per_file = _get_per_file_summaries(line_by_line) |
+ |
+ # Write results. |
+ format_results = _nanobench_json(per_file, properties, key) |
+ with open(args.nanobench, 'w') as f: |
+ json.dump(format_results, f) |
+ |
+ |
+if __name__ == '__main__': |
+ main() |