build/android/coverage.py - Issue 1216033009: Updated script to capture useful coverage stats.

Unified Diff: build/android/coverage.py

Issue 1216033009: Updated script to capture useful coverage stats. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Small refactor. Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: build/android/coverage.py

diff --git a/build/android/coverage.py b/build/android/coverage.py

new file mode 100755

index 0000000000000000000000000000000000000000..e7766fc5cddb78473b51ade7509a6a8e42b15f00

--- /dev/null

+++ b/build/android/coverage.py

@@ -0,0 +1,491 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Generates incremental code coverage reports for Java code in Chromium.

+Usage:

+ build/android/coverage.py -v --out <output file path> --emma-dir

+ <EMMA file directory> --lines-for-coverage-file

+ <path to file containing lines for coverage>

+ Creates a JSON representation of overall and file coverage stats and saves

+ this information to the specified output file.

+"""

+import argparse

+import collections

+import json

+import logging

+import os

+import re

+import sys

+from xml.etree import ElementTree

+from pylib.utils import run_tests_helper

+NOT_EXECUTABLE = -1

+NOT_COVERED = 0

+COVERED = 1

+PARTIALLY_COVERED = 2

+# Coverage information about a single line of code.

+LineCoverage = collections.namedtuple(

+ 'LineCoverage',

+ ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])

+class _EmmaHtmlParser(object):

+ """Encapsulates HTML file parsing operations.

+ This class contains all operations related to parsing HTML files that were

+ produced using the EMMA code coverage tool. It uses the lxml module for

+ parsing.

+ Example HTML:

+ Package links:

+ <a href="_files/1.html">org.chromium.chrome</a>

+ This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.

+ Class links:

+ <a href="1e.html">DoActivity.java</a>

+ This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.

+ Line coverage data:

+ <tr class="p">

+ <td class="l" title="78% line coverage (7 out of 9)">108</td>

+ <td title="78% line coverage (7 out of 9 instructions)">

+ if (index < 0 || index = mSelectors.size()) index = 0;</td>

+ </tr>

+ <tr>

+ <td class="l">109</td>

+ <td> </td>

+ </tr>

+ <tr class="c">

+ <td class="l">110</td>

+ <td> if (mSelectors.get(index) != null) {</td>

+ </tr>

+ <tr class="z">

+ <td class="l">111</td>

+ <td> for (int i = 0; i < mSelectors.size(); i++) {</td>

+ </tr>

+ Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.

+ We can parse this to get:

+ 1. Line number

+ 2. Line of source code

+ 3. Coverage status (c, z, or p)

+ 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)

+ """

+ # Selector to match all <a> elements within the rows that are in the table

+ # that displays all of the different packages.

+ _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'

+ # Selector to match all <a> elements within the rows that are in the table

+ # that displays all of the different packages within a class.

+ _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'

+ # Selector to match all <tr> elements within the table containing Java source

+ # code in an EMMA HTML file.

+ _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'

+ # Children of HTML elements are represented as a list in lxml. These constants

+ # represent list indices corresponding to relevant child elements.

+ # Child 1 contains percentage covered for a line.

+ _ELEMENT_PERCENT_COVERED = 1

+ # Child 1 contains the original line of source code.

+ _ELEMENT_CONTAINING_SOURCE_CODE = 1

+ # Child 0 contains the line number.

+ _ELEMENT_CONTAINING_LINENO = 0

+ # Maps CSS class names to corresponding coverage constants.

+ _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}

+ # UTF-8 no break space.

+ _NO_BREAK_SPACE = '\xc2\xa0'

+ def __init__(self, emma_file_base_dir):

+ """Initializes _EmmaHtmlParser.

+ Args:

+ emma_file_base_dir: Path to the location where EMMA report files are

+ stored. Should be where index.html is stored.

+ """

+ self._base_dir = emma_file_base_dir

+ self._emma_files_path = os.path.join(self._base_dir, '_files')

+ self._index_path = os.path.join(self._base_dir, 'index.html')

+ def GetLineCoverage(self, emma_file_path):

+ """Returns a list of LineCoverage objects for the given EMMA HTML file.

+ Args:

+ emma_file_path: String representing the path to the EMMA HTML file.

+ Returns:

+ A list of LineCoverage objects.

+ """

+ line_tr_elements = self._FindElements(

+ emma_file_path, self._XPATH_SELECT_LOC)

+ line_coverage = []

+ for tr in line_tr_elements:

+ # Get the coverage status.

+ coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)

+ # Get the fractional coverage value.

+ if coverage_status == PARTIALLY_COVERED:

+ title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))

+ # Parse string that contains percent covered: "83% line coverage ...".

+ percent_covered = title_attribute.split('%')[0]

+ fractional_coverage = int(percent_covered) / 100.0

+ else:

+ fractional_coverage = 1.0

+ # Get the line number.

+ lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]

+ # Handles oddly formatted HTML (where there is an extra <a> tag).

+ lineno = int(lineno_element.text or

+ lineno_element[self._ELEMENT_CONTAINING_LINENO].text)

+ # Get the original line of Java source code.

+ raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text

+ utf8_source = raw_source.encode('UTF-8')

+ source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')

+ line = LineCoverage(lineno, source, coverage_status, fractional_coverage)

+ line_coverage.append(line)

+ return line_coverage

+ def GetPackageNameToEmmaFileDict(self):

+ """Returns a dict mapping Java packages to EMMA HTML coverage files.

+ Parses the EMMA index.html file to get a list of packages, then parses each

+ package HTML file to get a list of classes for that package, and creates

+ a dict with this info.

+ Returns:

+ A dict mapping string representation of Java packages (with class

+ names appended) to the corresponding file paths of EMMA HTML files.

+ """

+ # These <a> elements contain each package name and the path of the file

+ # where all classes within said package are listed.

+ package_link_elements = self._FindElements(

+ self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)

+ # Maps file path of package directory (EMMA generated) to package name.

+ # Ex. emma_dir/f.html: org.chromium.chrome.

+ package_links = {

+ os.path.join(self._base_dir, link.attrib['HREF']): link.text

+ for link in package_link_elements if 'HREF' in link.attrib

+ }

+ package_to_emma = {}

+ for package_emma_file_path, package_name in package_links.iteritems():

+ # These <a> elements contain each class name in the current package and

+ # the path of the file where the coverage info is stored for each class.

+ coverage_file_link_elements = self._FindElements(

+ package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)

+ for coverage_file_element in coverage_file_link_elements:

+ emma_coverage_file_path = os.path.join(

+ self._emma_files_path, coverage_file_element.attrib['HREF'])

+ full_package_name = '%s.%s' % (package_name, coverage_file_element.text)

mikecase (-- gone --) 2015/07/27 18:45:52 coverage_file_element.text is the class names? cor

estevenson1 2015/07/30 02:34:10 Done.

+ package_to_emma[full_package_name] = emma_coverage_file_path

mikecase (-- gone --) 2015/07/27 18:45:52 It seems like it would probably be this way, but a

estevenson1 2015/07/30 02:34:10 I wasn't able to find any cases where multiple emm

+ return package_to_emma

+ def _FindElements(self, file_path, xpath_selector):

+ """Reads a HTML file and performs an XPath match.

+ Args:

+ file_path: String representing the path to the HTML file.

+ xpath_selector: String representing xpath search pattern.

+ Returns:

+ A list of lxml.html.HtmlElements matching the given XPath selector.

+ Returns an empty list if there is no match.

+ """

+ with open(file_path) as f:

+ file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')

+ root = ElementTree.fromstring(file_contents)

+ return root.findall(xpath_selector)

+class _EmmaCoverageStats(object):

mikecase (-- gone --) 2015/07/27 18:45:52 I still think this interface is a little confusing

estevenson1 2015/07/30 02:34:10 Made some interface changes, let me know if it nee

Made some interface changes, let me know if it needs further tweaking.

+ """Encapsulates coverage operations related to EMMA files.

+ This class provides an API that allows users to capture absolute code coverage

+ and code coverage on a subset of lines for each file.

+ Additionally, this class stores the information needed to correlate EMMA HTML

+ files with Java source files. EMMA XML and plain text reports do not provide

+ line by line coverage data, so HTML reports must be used instead.

+ Unfortunately, the HTML files that are created are given garbage names

+ (i.e 1.html) so we need to manually correlate EMMA HTML files

+ with the original Java source files.

+ Attributes:

+ _emma_parser: An _EmmaHtmlParser for reading EMMA HTML files.

+ _source_to_emma: A dict mapping Java source files to EMMA HTML files.

+ """

+ # Regular expression to get package name from Java package statement.

mikecase (-- gone --) 2015/07/27 18:45:53 super nit: two spaces btw "expression" and "to"

estevenson1 2015/07/30 02:34:10 Done.

+ RE_PACKAGE = re.compile(r'package (?P<package>[\w.]*);')

+ RE_PACKAGE_MATCH_GROUP = 'package'

mikecase (-- gone --) 2015/07/27 18:45:52 If you are going to name the group. I would just r

estevenson1 2015/07/30 02:34:10 Done.

+ def __init__(self, emma_file_base_dir, files_for_coverage):

+ """Initialize _EmmaCoverageStats.

+ Args:

+ emma_file_base_dir: String representing the path to the base directory

+ where EMMA HTML coverage files are stored, i.e. parent of index.html.

+ files_for_coverage: A list of Java file paths to get EMMA coverage for.

+ """

+ self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)

+ self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)

+ def GetCoverageDictForFiles(self, lines_for_coverage):

+ """Returns a dict containing detailed coverage info for all files.

+ Args:

+ lines_for_coverage: A dict mapping Java source file paths to lists of

mikecase (-- gone --) 2015/07/27 18:45:53 This wording is confusing. I would just simplify

estevenson1 2015/07/30 02:34:10 Done.

+ integers representing a set of lines in the file. In addition to overall

+ coverage stats, _EmmaCoverageStats will perform coverage analysis on

+ this subset of files/lines separately.

mikecase (-- gone --) 2015/07/27 18:45:52 About, In addition to overall coverage st

estevenson1 2015/07/30 02:34:10 This method doesn't actually compute coverage for

+ Returns:

+ A dict containing coverage stats for the given dict of files and lines.

+ Contains absolute coverage stats for each file, coverage stats for each

+ file's lines specified in |lines_for_coverage|, and overall coverage

+ stats for the lines specified in |lines_for_coverage|.

+ """

+ stats = {}

+ for file_name, lines in lines_for_coverage.iteritems():

+ # Get a list of LineCoverage objects for the current file.

+ line_coverage = self._GetCoverageStatusForFile(file_name)

+ stats[file_name] = self.GetCoverageReportForLines(line_coverage, lines)

+ statuses = [s['incremental'] for s in stats.itervalues()]

+ covered = sum(s['covered'] for s in statuses)

+ total = sum(s['total'] for s in statuses)

+ return {

+ 'files': stats,

+ 'patch': {

+ 'incremental': {

+ 'covered': covered,

+ 'total': total

+ }

+ def GetCoverageReportForLines(self, line_coverage, lines):

+ """Gets code coverage stats for a given set of LineCoverage objects.

+ Args:

+ line_coverage: A list of LineCoverage objects holding coverage state

+ of each line.

+ lines: A list of integer line numbers to retrieve additional stats for.

+ Returns:

+ A dict containing absolute, incremental, and line by line coverage for

+ a file.

+ """

+ line_by_line_coverage = [

+ {

+ 'line': line.source,

+ 'coverage': line.covered_status,

+ 'changed': line.lineno in lines,

+ }

+ for line in line_coverage

+ ]

+ total_covered_lines, total_lines = self.GetStatsForLines(line_coverage)

+ incremental_covered_lines, incremental_total_lines = (

+ self.GetStatsForLines(line_coverage, lines))

+ file_coverage_stats = {

+ 'absolute': {

+ 'covered': total_covered_lines,

+ 'total': total_lines

+ },

+ 'incremental': {

+ 'covered': incremental_covered_lines,

+ 'total': incremental_total_lines

+ },

+ 'source': line_by_line_coverage,

+ }

+ return file_coverage_stats

+ def GetStatsForLines(self, line_coverage, line_numbers=None):

+ """Gets coverage stats for a list of LineCoverage objects and line numbers.

+ Args:

+ line_coverage: A list of LineCoverage objects representing the coverage

+ status of each line.

+ line_numbers: An optional list of integers representing changed lines.

+ If not specified, the method will return coverage stats for all lines.

+ Returns:

+ a dict containing the incremental coverage stats for the given

+ |file_path|: (total added lines covered, total lines added).

+ """

+ if not line_numbers:

+ line_numbers = range(1, len(line_coverage) + 1)

+ partially_covered_sum = 0

+ totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}

+ for line in line_coverage:

+ status = line.covered_status

+ if line.lineno not in line_numbers or status == NOT_EXECUTABLE:

+ continue

+ totals[status] += 1

+ if status == PARTIALLY_COVERED:

+ partially_covered_sum += line.fractional_line_coverage

+ total_covered = totals[COVERED] + partially_covered_sum

+ total_lines = sum(totals.values())

+ return total_covered, total_lines

+ def _GetCoverageStatusForFile(self, file_path):

+ """Gets a list LineCoverage objects corresponding to the given file path.

mikecase (-- gone --) 2015/07/27 18:45:53 nit: s/"Gets a list"/"Gets a list of"

estevenson1 2015/07/30 02:34:10 Done.

+ Args:

+ file_path: String representing the path to the Java source file.

+ Returns:

+ A list of LineCoverage objects, or None if there is no EMMA file

+ for the given Java source file.

+ """

+ if file_path in self._source_to_emma:

+ emma_file = self._source_to_emma[file_path]

+ return self._emma_parser.GetLineCoverage(emma_file)

+ else:

+ logging.warning(

+ 'No code coverage data for %s, skipping.', file_path)

+ return None

+ def _GetSourceFileToEmmaFileDict(self, files):

+ """Gets a dict used to correlate Java source files with EMMA HTML files.

+ Args:

+ files: A list of file names for which coverage information is desired.

+ Returns:

+ A dict mapping Java source file paths to EMMA HTML file paths.

+ """

+ # Maps Java source file paths to package names.

+ # Example: /usr/code/file.java -> org.chromium.file.java.

+ source_to_package = {}

+ for file_path in files:

+ package = self.GetPackageNameFromFile(file_path)

+ if package:

+ source_to_package[file_path] = package

+ else:

+ logging.warning('Skipping %s because it doesn\'t have a package '

+ 'statement.', file_path)

+ # Maps package names to EMMA report HTML files.

+ # Example: org.chromium.file.java -> out/coverage/1a.html.

+ package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()

+ # Finally, we have a dict mapping Java file paths to EMMA report files.

+ # Example: /usr/code/file.java -> out/coverage/1a.html.

+ source_to_emma = {source: package_to_emma.get(package)

+ for source, package in source_to_package.iteritems()}

+ return source_to_emma

+ @staticmethod

+ def NeedsCoverage(file_path):

+ """Checks to see if the file needs to be analyzed for code coverage.

+ Args:

+ file_path: A string representing path to the file.

+ Returns:

+ True for Java files that exist, False for all others.

+ """

+ return (os.path.splitext(file_path)[1] == '.java'

mikecase (-- gone --) 2015/07/27 18:45:52 Might want to add a logging.debug statement if som

estevenson1 2015/07/30 02:34:10 Done.

+ and os.path.exists(file_path))

+ @staticmethod

+ def GetPackageNameFromFile(file_path):

+ """Gets the full package name including the file name for a given file path.

+ Args:

+ file_path: String representing the path to the Java source file.

+ Returns:

+ string representing the full package name

+ ex. org.chromium.chrome.Activity.java or None if there is no package

mikecase (-- gone --) 2015/07/27 18:45:53 super nit: Everywhere else you write "Example:" fo

estevenson1 2015/07/30 02:34:10 Done.

+ statement in the file.

+ """

+ with open(file_path) as f:

+ file_content = f.read()

+ package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)

+ if package_match:

+ package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)

mikecase (-- gone --) 2015/07/27 18:45:53 nit: I would prefer self.RE_PACKAGE and self.RE_PA

estevenson1 2015/07/30 02:34:10 I did this because this is a static method (has no

+ file_name = os.path.basename(file_path)

+ return '%s.%s' % (package, file_name)

+ else:

+ return None

+def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):

+ """Generates a coverage report for a given set of lines.

+ Writes the results of the coverage analysis to the file specified by

+ |out_file_path|.

+ Args:

+ line_coverage_file: The path to a file which contains a dict mapping file

+ names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means

+ that we should compute coverage information on lines 1 - 3 for file1.

+ Coverage reports will contain overall coverage (i.e. for all lines) and

+ coverage for the lines specified in |line_coverage_file|.

+ out_file_path: A string representing the location to write the JSON report.

+ coverage_dir: A string representing the file path where the EMMA

+ HTML coverage files are located (i.e. folder where index.html is located).

+ Raises:

+ IOError: A non existent |line_coverage_file| was supplied.

+ ValueError: An improperly formatted |line_coverage_file| was supplied.

estevenson1 2015/07/27 17:10:10 Need to update this docstring. Function doesn't ra

+ """

+ with open(line_coverage_file) as f:

+ files_for_coverage = json.load(f)

+ files_for_coverage = {f: lines

+ for f, lines in files_for_coverage.iteritems()

+ if _EmmaCoverageStats.NeedsCoverage(f)}

+ if not files_for_coverage:

+ logging.info('No Java files requiring coverage were included in %s.',

+ line_coverage_file)

+ sys.exit(0)

mikecase (-- gone --) 2015/07/27 18:45:52 I would probably just return here instead of sys.e

estevenson1 2015/07/30 02:34:10 Done.

+ code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())

+ coverage_results = code_coverage.GetCoverageDictForFiles(files_for_coverage)

+ # Log summary and save stats to file.

+ covered = coverage_results['patch']['incremental']['covered']

+ total = coverage_results['patch']['incremental']['total']

+ percent = (covered / float(total)) * 100 if total else 0

+ logging.info('Covered %s out of %s lines (%.2f%%).',

+ covered, total, round(percent, 2))

+ with open(out_file_path, 'w+') as out_status_file:

+ json.dump(coverage_results, out_status_file)

+def main():

+ argparser = argparse.ArgumentParser()

+ argparser.add_argument('--out', required=True, type=str,

+ help='Report output file path.')

+ argparser.add_argument('--emma-dir', required=True, type=str,

+ help='EMMA HTML report directory.')

+ argparser.add_argument('--lines-for-coverage-file', required=True, type=str,

+ help='File containing a JSON object. Should contain a '

+ 'dict mapping file names to lists of line numbers of '

+ 'code for which coverage information is desired.')

+ argparser.add_argument('-v', '--verbose', action='count',

+ help='Print verbose log information.')

+ args = argparser.parse_args()

+ run_tests_helper.SetLogLevel(args.verbose)

+ GenerateCoverageReport(args.lines_for_coverage, args.out, args.emma_dir)

+if __name__ == '__main__':

+ sys.exit(main())

« no previous file with comments | « no previous file | build/android/coverage_test.py » ('j') | no next file with comments »