Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(669)

Side by Side Diff: build/android/coverage.py

Issue 1216033009: Updated script to capture useful coverage stats. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Small refactor. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | build/android/coverage_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Generates incremental code coverage reports for Java code in Chromium.
7
8 Usage:
9
10 build/android/coverage.py -v --out <output file path> --emma-dir
11 <EMMA file directory> --lines-for-coverage-file
12 <path to file containing lines for coverage>
13
14 Creates a JSON representation of overall and file coverage stats and saves
15 this information to the specified output file.
16 """
17
18 import argparse
19 import collections
20 import json
21 import logging
22 import os
23 import re
24 import sys
25 from xml.etree import ElementTree
26
27 from pylib.utils import run_tests_helper
28
29 NOT_EXECUTABLE = -1
30 NOT_COVERED = 0
31 COVERED = 1
32 PARTIALLY_COVERED = 2
33
34 # Coverage information about a single line of code.
35 LineCoverage = collections.namedtuple(
36 'LineCoverage',
37 ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
38
39
40 class _EmmaHtmlParser(object):
41 """Encapsulates HTML file parsing operations.
42
43 This class contains all operations related to parsing HTML files that were
44 produced using the EMMA code coverage tool. It uses the lxml module for
45 parsing.
46
47 Example HTML:
48
49 Package links:
50 <a href="_files/1.html">org.chromium.chrome</a>
51 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
52
53 Class links:
54 <a href="1e.html">DoActivity.java</a>
55 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
56
57 Line coverage data:
58 <tr class="p">
59 <td class="l" title="78% line coverage (7 out of 9)">108</td>
60 <td title="78% line coverage (7 out of 9 instructions)">
61 if (index < 0 || index = mSelectors.size()) index = 0;</td>
62 </tr>
63 <tr>
64 <td class="l">109</td>
65 <td> </td>
66 </tr>
67 <tr class="c">
68 <td class="l">110</td>
69 <td> if (mSelectors.get(index) != null) {</td>
70 </tr>
71 <tr class="z">
72 <td class="l">111</td>
73 <td> for (int i = 0; i < mSelectors.size(); i++) {</td>
74 </tr>
75 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
76
77 We can parse this to get:
78 1. Line number
79 2. Line of source code
80 3. Coverage status (c, z, or p)
81 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
82 """
83 # Selector to match all <a> elements within the rows that are in the table
84 # that displays all of the different packages.
85 _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
86
87 # Selector to match all <a> elements within the rows that are in the table
88 # that displays all of the different packages within a class.
89 _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
90
91 # Selector to match all <tr> elements within the table containing Java source
92 # code in an EMMA HTML file.
93 _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
94
95 # Children of HTML elements are represented as a list in lxml. These constants
96 # represent list indices corresponding to relevant child elements.
97
98 # Child 1 contains percentage covered for a line.
99 _ELEMENT_PERCENT_COVERED = 1
100
101 # Child 1 contains the original line of source code.
102 _ELEMENT_CONTAINING_SOURCE_CODE = 1
103
104 # Child 0 contains the line number.
105 _ELEMENT_CONTAINING_LINENO = 0
106
107 # Maps CSS class names to corresponding coverage constants.
108 _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
109
110 # UTF-8 no break space.
111 _NO_BREAK_SPACE = '\xc2\xa0'
112
113 def __init__(self, emma_file_base_dir):
114 """Initializes _EmmaHtmlParser.
115
116 Args:
117 emma_file_base_dir: Path to the location where EMMA report files are
118 stored. Should be where index.html is stored.
119 """
120 self._base_dir = emma_file_base_dir
121 self._emma_files_path = os.path.join(self._base_dir, '_files')
122 self._index_path = os.path.join(self._base_dir, 'index.html')
123
124 def GetLineCoverage(self, emma_file_path):
125 """Returns a list of LineCoverage objects for the given EMMA HTML file.
126
127 Args:
128 emma_file_path: String representing the path to the EMMA HTML file.
129
130 Returns:
131 A list of LineCoverage objects.
132 """
133 line_tr_elements = self._FindElements(
134 emma_file_path, self._XPATH_SELECT_LOC)
135 line_coverage = []
136 for tr in line_tr_elements:
137 # Get the coverage status.
138 coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
139 # Get the fractional coverage value.
140 if coverage_status == PARTIALLY_COVERED:
141 title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
142 # Parse string that contains percent covered: "83% line coverage ...".
143 percent_covered = title_attribute.split('%')[0]
144 fractional_coverage = int(percent_covered) / 100.0
145 else:
146 fractional_coverage = 1.0
147 # Get the line number.
148 lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
149 # Handles oddly formatted HTML (where there is an extra <a> tag).
150 lineno = int(lineno_element.text or
151 lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
152 # Get the original line of Java source code.
153 raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
154 utf8_source = raw_source.encode('UTF-8')
155 source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
156
157 line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
158 line_coverage.append(line)
159
160 return line_coverage
161
162 def GetPackageNameToEmmaFileDict(self):
163 """Returns a dict mapping Java packages to EMMA HTML coverage files.
164
165 Parses the EMMA index.html file to get a list of packages, then parses each
166 package HTML file to get a list of classes for that package, and creates
167 a dict with this info.
168
169 Returns:
170 A dict mapping string representation of Java packages (with class
171 names appended) to the corresponding file paths of EMMA HTML files.
172 """
173 # These <a> elements contain each package name and the path of the file
174 # where all classes within said package are listed.
175 package_link_elements = self._FindElements(
176 self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
177 # Maps file path of package directory (EMMA generated) to package name.
178 # Ex. emma_dir/f.html: org.chromium.chrome.
179 package_links = {
180 os.path.join(self._base_dir, link.attrib['HREF']): link.text
181 for link in package_link_elements if 'HREF' in link.attrib
182 }
183
184 package_to_emma = {}
185 for package_emma_file_path, package_name in package_links.iteritems():
186 # These <a> elements contain each class name in the current package and
187 # the path of the file where the coverage info is stored for each class.
188 coverage_file_link_elements = self._FindElements(
189 package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
190
191 for coverage_file_element in coverage_file_link_elements:
192 emma_coverage_file_path = os.path.join(
193 self._emma_files_path, coverage_file_element.attrib['HREF'])
194 full_package_name = '%s.%s' % (package_name, coverage_file_element.text)
mikecase (-- gone --) 2015/07/27 18:45:52 coverage_file_element.text is the class names? cor
estevenson1 2015/07/30 02:34:10 Done.
195 package_to_emma[full_package_name] = emma_coverage_file_path
mikecase (-- gone --) 2015/07/27 18:45:52 It seems like it would probably be this way, but a
estevenson1 2015/07/30 02:34:10 I wasn't able to find any cases where multiple emm
196
197 return package_to_emma
198
199 def _FindElements(self, file_path, xpath_selector):
200 """Reads a HTML file and performs an XPath match.
201
202 Args:
203 file_path: String representing the path to the HTML file.
204 xpath_selector: String representing xpath search pattern.
205
206 Returns:
207 A list of lxml.html.HtmlElements matching the given XPath selector.
208 Returns an empty list if there is no match.
209 """
210 with open(file_path) as f:
211 file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
212 root = ElementTree.fromstring(file_contents)
213 return root.findall(xpath_selector)
214
215
216 class _EmmaCoverageStats(object):
mikecase (-- gone --) 2015/07/27 18:45:52 I still think this interface is a little confusing
estevenson1 2015/07/30 02:34:10 Made some interface changes, let me know if it nee
217 """Encapsulates coverage operations related to EMMA files.
218
219 This class provides an API that allows users to capture absolute code coverage
220 and code coverage on a subset of lines for each file.
221
222 Additionally, this class stores the information needed to correlate EMMA HTML
223 files with Java source files. EMMA XML and plain text reports do not provide
224 line by line coverage data, so HTML reports must be used instead.
225 Unfortunately, the HTML files that are created are given garbage names
226 (i.e 1.html) so we need to manually correlate EMMA HTML files
227 with the original Java source files.
228
229 Attributes:
230 _emma_parser: An _EmmaHtmlParser for reading EMMA HTML files.
231 _source_to_emma: A dict mapping Java source files to EMMA HTML files.
232 """
233 # Regular expression to get package name from Java package statement.
mikecase (-- gone --) 2015/07/27 18:45:53 super nit: two spaces btw "expression" and "to"
estevenson1 2015/07/30 02:34:10 Done.
234 RE_PACKAGE = re.compile(r'package (?P<package>[\w.]*);')
235 RE_PACKAGE_MATCH_GROUP = 'package'
mikecase (-- gone --) 2015/07/27 18:45:52 If you are going to name the group. I would just r
estevenson1 2015/07/30 02:34:10 Done.
236
237 def __init__(self, emma_file_base_dir, files_for_coverage):
238 """Initialize _EmmaCoverageStats.
239
240 Args:
241 emma_file_base_dir: String representing the path to the base directory
242 where EMMA HTML coverage files are stored, i.e. parent of index.html.
243 files_for_coverage: A list of Java file paths to get EMMA coverage for.
244 """
245 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
246 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
247
248 def GetCoverageDictForFiles(self, lines_for_coverage):
249 """Returns a dict containing detailed coverage info for all files.
250
251 Args:
252 lines_for_coverage: A dict mapping Java source file paths to lists of
mikecase (-- gone --) 2015/07/27 18:45:53 This wording is confusing. I would just simplify
estevenson1 2015/07/30 02:34:10 Done.
253 integers representing a set of lines in the file. In addition to overall
254 coverage stats, _EmmaCoverageStats will perform coverage analysis on
255 this subset of files/lines separately.
mikecase (-- gone --) 2015/07/27 18:45:52 About, In addition to overall coverage st
estevenson1 2015/07/30 02:34:10 This method doesn't actually compute coverage for
256
257 Returns:
258 A dict containing coverage stats for the given dict of files and lines.
259 Contains absolute coverage stats for each file, coverage stats for each
260 file's lines specified in |lines_for_coverage|, and overall coverage
261 stats for the lines specified in |lines_for_coverage|.
262 """
263 stats = {}
264 for file_name, lines in lines_for_coverage.iteritems():
265 # Get a list of LineCoverage objects for the current file.
266 line_coverage = self._GetCoverageStatusForFile(file_name)
267 stats[file_name] = self.GetCoverageReportForLines(line_coverage, lines)
268
269 statuses = [s['incremental'] for s in stats.itervalues()]
270 covered = sum(s['covered'] for s in statuses)
271 total = sum(s['total'] for s in statuses)
272 return {
273 'files': stats,
274 'patch': {
275 'incremental': {
276 'covered': covered,
277 'total': total
278 }
279 }
280 }
281
282 def GetCoverageReportForLines(self, line_coverage, lines):
283 """Gets code coverage stats for a given set of LineCoverage objects.
284
285 Args:
286 line_coverage: A list of LineCoverage objects holding coverage state
287 of each line.
288 lines: A list of integer line numbers to retrieve additional stats for.
289
290 Returns:
291 A dict containing absolute, incremental, and line by line coverage for
292 a file.
293 """
294 line_by_line_coverage = [
295 {
296 'line': line.source,
297 'coverage': line.covered_status,
298 'changed': line.lineno in lines,
299 }
300 for line in line_coverage
301 ]
302 total_covered_lines, total_lines = self.GetStatsForLines(line_coverage)
303 incremental_covered_lines, incremental_total_lines = (
304 self.GetStatsForLines(line_coverage, lines))
305
306 file_coverage_stats = {
307 'absolute': {
308 'covered': total_covered_lines,
309 'total': total_lines
310 },
311 'incremental': {
312 'covered': incremental_covered_lines,
313 'total': incremental_total_lines
314 },
315 'source': line_by_line_coverage,
316 }
317 return file_coverage_stats
318
319 def GetStatsForLines(self, line_coverage, line_numbers=None):
320 """Gets coverage stats for a list of LineCoverage objects and line numbers.
321
322 Args:
323 line_coverage: A list of LineCoverage objects representing the coverage
324 status of each line.
325 line_numbers: An optional list of integers representing changed lines.
326 If not specified, the method will return coverage stats for all lines.
327
328 Returns:
329 a dict containing the incremental coverage stats for the given
330 |file_path|: (total added lines covered, total lines added).
331 """
332 if not line_numbers:
333 line_numbers = range(1, len(line_coverage) + 1)
334 partially_covered_sum = 0
335 totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
336 for line in line_coverage:
337 status = line.covered_status
338 if line.lineno not in line_numbers or status == NOT_EXECUTABLE:
339 continue
340 totals[status] += 1
341 if status == PARTIALLY_COVERED:
342 partially_covered_sum += line.fractional_line_coverage
343
344 total_covered = totals[COVERED] + partially_covered_sum
345 total_lines = sum(totals.values())
346 return total_covered, total_lines
347
348 def _GetCoverageStatusForFile(self, file_path):
349 """Gets a list LineCoverage objects corresponding to the given file path.
mikecase (-- gone --) 2015/07/27 18:45:53 nit: s/"Gets a list"/"Gets a list of"
estevenson1 2015/07/30 02:34:10 Done.
350
351 Args:
352 file_path: String representing the path to the Java source file.
353
354 Returns:
355 A list of LineCoverage objects, or None if there is no EMMA file
356 for the given Java source file.
357 """
358 if file_path in self._source_to_emma:
359 emma_file = self._source_to_emma[file_path]
360 return self._emma_parser.GetLineCoverage(emma_file)
361 else:
362 logging.warning(
363 'No code coverage data for %s, skipping.', file_path)
364 return None
365
366 def _GetSourceFileToEmmaFileDict(self, files):
367 """Gets a dict used to correlate Java source files with EMMA HTML files.
368
369 Args:
370 files: A list of file names for which coverage information is desired.
371
372 Returns:
373 A dict mapping Java source file paths to EMMA HTML file paths.
374 """
375 # Maps Java source file paths to package names.
376 # Example: /usr/code/file.java -> org.chromium.file.java.
377 source_to_package = {}
378 for file_path in files:
379 package = self.GetPackageNameFromFile(file_path)
380 if package:
381 source_to_package[file_path] = package
382 else:
383 logging.warning('Skipping %s because it doesn\'t have a package '
384 'statement.', file_path)
385
386 # Maps package names to EMMA report HTML files.
387 # Example: org.chromium.file.java -> out/coverage/1a.html.
388 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
389 # Finally, we have a dict mapping Java file paths to EMMA report files.
390 # Example: /usr/code/file.java -> out/coverage/1a.html.
391 source_to_emma = {source: package_to_emma.get(package)
392 for source, package in source_to_package.iteritems()}
393 return source_to_emma
394
395 @staticmethod
396 def NeedsCoverage(file_path):
397 """Checks to see if the file needs to be analyzed for code coverage.
398
399 Args:
400 file_path: A string representing path to the file.
401
402 Returns:
403 True for Java files that exist, False for all others.
404 """
405 return (os.path.splitext(file_path)[1] == '.java'
mikecase (-- gone --) 2015/07/27 18:45:52 Might want to add a logging.debug statement if som
estevenson1 2015/07/30 02:34:10 Done.
406 and os.path.exists(file_path))
407
408 @staticmethod
409 def GetPackageNameFromFile(file_path):
410 """Gets the full package name including the file name for a given file path.
411
412 Args:
413 file_path: String representing the path to the Java source file.
414
415 Returns:
416 string representing the full package name
417 ex. org.chromium.chrome.Activity.java or None if there is no package
mikecase (-- gone --) 2015/07/27 18:45:53 super nit: Everywhere else you write "Example:" fo
estevenson1 2015/07/30 02:34:10 Done.
418 statement in the file.
419 """
420 with open(file_path) as f:
421 file_content = f.read()
422 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
423 if package_match:
424 package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
mikecase (-- gone --) 2015/07/27 18:45:53 nit: I would prefer self.RE_PACKAGE and self.RE_PA
estevenson1 2015/07/30 02:34:10 I did this because this is a static method (has no
425 file_name = os.path.basename(file_path)
426 return '%s.%s' % (package, file_name)
427 else:
428 return None
429
430
431 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
432 """Generates a coverage report for a given set of lines.
433
434 Writes the results of the coverage analysis to the file specified by
435 |out_file_path|.
436
437 Args:
438 line_coverage_file: The path to a file which contains a dict mapping file
439 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
440 that we should compute coverage information on lines 1 - 3 for file1.
441 Coverage reports will contain overall coverage (i.e. for all lines) and
442 coverage for the lines specified in |line_coverage_file|.
443 out_file_path: A string representing the location to write the JSON report.
444 coverage_dir: A string representing the file path where the EMMA
445 HTML coverage files are located (i.e. folder where index.html is located).
446
447 Raises:
448 IOError: A non existent |line_coverage_file| was supplied.
449 ValueError: An improperly formatted |line_coverage_file| was supplied.
estevenson1 2015/07/27 17:10:10 Need to update this docstring. Function doesn't ra
450 """
451 with open(line_coverage_file) as f:
452 files_for_coverage = json.load(f)
453 files_for_coverage = {f: lines
454 for f, lines in files_for_coverage.iteritems()
455 if _EmmaCoverageStats.NeedsCoverage(f)}
456 if not files_for_coverage:
457 logging.info('No Java files requiring coverage were included in %s.',
458 line_coverage_file)
459 sys.exit(0)
mikecase (-- gone --) 2015/07/27 18:45:52 I would probably just return here instead of sys.e
estevenson1 2015/07/30 02:34:10 Done.
460
461 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
462 coverage_results = code_coverage.GetCoverageDictForFiles(files_for_coverage)
463 # Log summary and save stats to file.
464 covered = coverage_results['patch']['incremental']['covered']
465 total = coverage_results['patch']['incremental']['total']
466 percent = (covered / float(total)) * 100 if total else 0
467 logging.info('Covered %s out of %s lines (%.2f%%).',
468 covered, total, round(percent, 2))
469 with open(out_file_path, 'w+') as out_status_file:
470 json.dump(coverage_results, out_status_file)
471
472
473 def main():
474 argparser = argparse.ArgumentParser()
475 argparser.add_argument('--out', required=True, type=str,
476 help='Report output file path.')
477 argparser.add_argument('--emma-dir', required=True, type=str,
478 help='EMMA HTML report directory.')
479 argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
480 help='File containing a JSON object. Should contain a '
481 'dict mapping file names to lists of line numbers of '
482 'code for which coverage information is desired.')
483 argparser.add_argument('-v', '--verbose', action='count',
484 help='Print verbose log information.')
485 args = argparser.parse_args()
486 run_tests_helper.SetLogLevel(args.verbose)
487 GenerateCoverageReport(args.lines_for_coverage, args.out, args.emma_dir)
488
489
490 if __name__ == '__main__':
491 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | build/android/coverage_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698