tools/parse_llvm_coverage.py - Issue 1239963002: Split llvm_coverage_run into two scripts

Unified Diff: tools/parse_llvm_coverage.py

Issue 1239963002: Split llvm_coverage_run into two scripts (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Split into two scripts Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/parse_llvm_coverage.py

diff --git a/tools/parse_llvm_coverage.py b/tools/parse_llvm_coverage.py

new file mode 100755

index 0000000000000000000000000000000000000000..2eadddd3cb1f7615758df257d92b5955648462c2

--- /dev/null

+++ b/tools/parse_llvm_coverage.py

@@ -0,0 +1,201 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Parse an LLVM coverage report to generate useable results."""

+import argparse

+import json

+import os

+import re

+import subprocess

+import sys

+def _fix_filename(filename):

+ """Return a filename which we can use to identify the file.

+ The file paths printed by llvm-cov take the form:

+ /path/to/repo/out/dir/../../src/filename.cpp

+ And then they're truncated to 22 characters with leading ellipses:

+ ...../../src/filename.cpp

+ This makes it really tough to determine whether the file actually belongs in

+ the Skia repo. This function strips out the leading junk so that, if the file

+ exists in the repo, the returned string matches the end of some relative path

+ in the repo. This doesn't guarantee correctness, but it's about as close as

+ we can get.

+ """

+ return filename.split('..')[-1].lstrip('./')

+def _file_in_repo(filename, all_files):

+ """Return the name of the checked-in file matching the given filename.

+ Use suffix matching to determine which checked-in files the given filename

+ matches. If there are no matches or multiple matches, return None.

+ """

+ new_file = _fix_filename(filename)

+ matched = []

+ for f in all_files:

+ if f.endswith(new_file):

+ matched.append(f)

+ if len(matched) == 1:

+ return matched[0]

+ elif len(matched) > 1:

+ print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'

+ % (new_file, '\n\t'.join(matched)))

+ return None

+def _get_per_file_per_line_coverage(report):

+ """Return a dict whose keys are file names and values are coverage data.

+ Values are lists which take the form (lineno, coverage, code).

+ """

+ all_files = subprocess.check_output(['git', 'ls-files']).splitlines()

+ lines = report.splitlines()

+ current_file = None

+ file_lines = []

+ files = {}

+ not_checked_in = '%' # Use this as the file name for not-checked-in files.

+ for line in lines:

+ m = re.match('([a-zA-Z0-9\./_-]+):', line)

+ if m:

+ if current_file and current_file != not_checked_in:

+ files[current_file] = file_lines

+ match_filename = _file_in_repo(m.groups()[0], all_files)

+ current_file = match_filename or not_checked_in

+ file_lines = []

+ else:

+ if current_file != not_checked_in:

+ skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)

+ if line and not skip:

+ cov, linenum, code = line.split('|', 2)

+ cov = cov.strip()

+ if cov:

+ cov = int(cov)

+ else:

+ cov = None # We don't care about coverage for this line.

+ linenum = int(linenum.strip())

+ assert linenum == len(file_lines) + 1

+ file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))

+ return files

+def _testname(filename):

+ """Transform the file name into an ingestible test name."""

+ return re.sub(r'[^a-zA-Z0-9]', '_', filename)

+def _nanobench_json(results, properties, key):

+ """Return the results in JSON format like that produced by nanobench."""

+ rv = {}

+ # Copy over the properties first, then set the 'key' and 'results' keys,

+ # in order to avoid bad formatting in case the user passes in a properties

+ # dict containing those keys.

+ rv.update(properties)

+ rv['key'] = key

+ rv['results'] = {

+ _testname(f): {

+ 'coverage': {

+ 'percent': percent,

+ 'options': {

+ 'fullname': f,

+ 'dir': os.path.dirname(f),

+ },

+ } for percent, f in results

+ }

+ return rv

+def _parse_key_value(kv_list):

+ """Return a dict whose key/value pairs are derived from the given list.

+ For example:

+ ['k1', 'v1', 'k2', 'v2']

+ becomes:

+ {'k1': 'v1',

+ 'k2': 'v2'}

+ """

+ if len(kv_list) % 2 != 0:

+ raise Exception('Invalid key/value pairs: %s' % kv_list)

+ rv = {}

+ for i in xrange(len(kv_list) / 2):

+ rv[kv_list[i*2]] = kv_list[i*2+1]

+ return rv

+def _get_per_file_summaries(line_by_line):

+ """Summarize the full line-by-line coverage report by file."""

+ per_file = []

+ for filepath, lines in line_by_line.iteritems():

+ total_lines = 0

+ covered_lines = 0

+ for _, cov, _ in lines:

+ if cov is not None:

+ total_lines += 1

+ if cov > 0:

+ covered_lines += 1

+ if total_lines > 0:

+ per_file.append((float(covered_lines)/float(total_lines)*100.0,

+ filepath))

+ return per_file

+def main():

+ """Generate useful data from a coverage report."""

+ # Parse args.

+ parser = argparse.ArgumentParser()

+ parser.add_argument('--report', help='input file; an llvm coverage report.',

+ required=True)

+ parser.add_argument('--nanobench', help='output file for nanobench data.')

+ parser.add_argument(

+ '--key', metavar='key_or_value', nargs='+',

+ help='key/value pairs identifying this bot.')

+ parser.add_argument(

+ '--properties', metavar='key_or_value', nargs='+',

+ help='key/value pairs representing properties of this build.')

+ parser.add_argument('--linebyline',

+ help='output file for line-by-line JSON data.')

+ args = parser.parse_args()

+ if args.nanobench and not (args.key and args.properties):

+ raise Exception('--key and --properties are required with --nanobench')

+ with open(args.report) as f:

+ report = f.read()

+ line_by_line = _get_per_file_per_line_coverage(report)

+ if args.linebyline:

+ with open(args.linebyline, 'w') as f:

+ json.dump(line_by_line, f)

+ if args.nanobench:

+ # Parse the key and properties for use in the nanobench JSON output.

+ key = _parse_key_value(args.key)

+ properties = _parse_key_value(args.properties)

+ # Get per-file summaries.

+ per_file = _get_per_file_summaries(line_by_line)

+ # Write results.

+ format_results = _nanobench_json(per_file, properties, key)

+ with open(args.nanobench, 'w') as f:

+ json.dump(format_results, f)

+if __name__ == '__main__':

+ main()

« no previous file with comments | « tools/llvm_coverage_run.py ('k') | no next file » | no next file with comments »