OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright 2015 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generates incremental code coverage reports for Java code in Chromium. | |
7 | |
8 Usage: | |
9 | |
10 build/android/coverage.py -v --out <output file path> --emma-dir | |
11 <EMMA file directory> --lines-for-coverage-file | |
12 <path to file containing lines for coverage> | |
13 | |
14 Creates a JSON representation of overall and file coverage stats and saves | |
15 this information to the specified output file. | |
16 """ | |
17 | |
18 import argparse | |
19 import collections | |
20 import json | |
21 import logging | |
22 import os | |
23 import re | |
24 import sys | |
25 from xml.etree import ElementTree | |
26 | |
27 from pylib.utils import run_tests_helper | |
28 | |
29 NOT_EXECUTABLE = -1 | |
30 NOT_COVERED = 0 | |
31 COVERED = 1 | |
32 PARTIALLY_COVERED = 2 | |
33 | |
34 # Coverage information about a single line of code. | |
35 LineCoverage = collections.namedtuple( | |
36 'LineCoverage', | |
37 ['lineno', 'source', 'covered_status', 'fractional_line_coverage']) | |
38 | |
39 | |
40 class _EmmaHtmlParser(object): | |
41 """Encapsulates HTML file parsing operations. | |
42 | |
43 This class contains all operations related to parsing HTML files that were | |
44 produced using the EMMA code coverage tool. It uses the lxml module for | |
45 parsing. | |
46 | |
47 Example HTML: | |
48 | |
49 Package links: | |
50 <a href="_files/1.html">org.chromium.chrome</a> | |
51 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|. | |
52 | |
53 Class links: | |
54 <a href="1e.html">DoActivity.java</a> | |
55 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|. | |
56 | |
57 Line coverage data: | |
58 <tr class="p"> | |
59 <td class="l" title="78% line coverage (7 out of 9)">108</td> | |
60 <td title="78% line coverage (7 out of 9 instructions)"> | |
61 if (index < 0 || index = mSelectors.size()) index = 0;</td> | |
62 </tr> | |
63 <tr> | |
64 <td class="l">109</td> | |
65 <td> </td> | |
66 </tr> | |
67 <tr class="c"> | |
68 <td class="l">110</td> | |
69 <td> if (mSelectors.get(index) != null) {</td> | |
70 </tr> | |
71 <tr class="z"> | |
72 <td class="l">111</td> | |
73 <td> for (int i = 0; i < mSelectors.size(); i++) {</td> | |
74 </tr> | |
75 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|. | |
76 | |
77 We can parse this to get: | |
78 1. Line number | |
79 2. Line of source code | |
80 3. Coverage status (c, z, or p) | |
81 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED) | |
82 """ | |
83 # Selector to match all <a> elements within the rows that are in the table | |
84 # that displays all of the different packages. | |
85 _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A' | |
86 | |
87 # Selector to match all <a> elements within the rows that are in the table | |
88 # that displays all of the different packages within a class. | |
89 _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A' | |
90 | |
91 # Selector to match all <tr> elements within the table containing Java source | |
92 # code in an EMMA HTML file. | |
93 _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR' | |
94 | |
95 # Children of HTML elements are represented as a list in lxml. These constants | |
96 # represent list indices corresponding to relevant child elements. | |
97 | |
98 # Child 1 contains percentage covered for a line. | |
99 _ELEMENT_PERCENT_COVERED = 1 | |
100 | |
101 # Child 1 contains the original line of source code. | |
102 _ELEMENT_CONTAINING_SOURCE_CODE = 1 | |
103 | |
104 # Child 0 contains the line number. | |
105 _ELEMENT_CONTAINING_LINENO = 0 | |
106 | |
107 # Maps CSS class names to corresponding coverage constants. | |
108 _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED} | |
109 | |
110 # UTF-8 no break space. | |
111 _NO_BREAK_SPACE = '\xc2\xa0' | |
112 | |
113 def __init__(self, emma_file_base_dir): | |
114 """Initializes _EmmaHtmlParser. | |
115 | |
116 Args: | |
117 emma_file_base_dir: Path to the location where EMMA report files are | |
118 stored. Should be where index.html is stored. | |
119 """ | |
120 self._base_dir = emma_file_base_dir | |
121 self._emma_files_path = os.path.join(self._base_dir, '_files') | |
122 self._index_path = os.path.join(self._base_dir, 'index.html') | |
123 | |
124 def GetLineCoverage(self, emma_file_path): | |
125 """Returns a list of LineCoverage objects for the given EMMA HTML file. | |
126 | |
127 Args: | |
128 emma_file_path: String representing the path to the EMMA HTML file. | |
129 | |
130 Returns: | |
131 A list of LineCoverage objects. | |
132 """ | |
133 line_tr_elements = self._FindElements( | |
134 emma_file_path, self._XPATH_SELECT_LOC) | |
135 line_coverage = [] | |
136 for tr in line_tr_elements: | |
137 # Get the coverage status. | |
138 coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE) | |
139 # Get the fractional coverage value. | |
140 if coverage_status == PARTIALLY_COVERED: | |
141 title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE')) | |
142 # Parse string that contains percent covered: "83% line coverage ...". | |
143 percent_covered = title_attribute.split('%')[0] | |
144 fractional_coverage = int(percent_covered) / 100.0 | |
145 else: | |
146 fractional_coverage = 1.0 | |
147 # Get the line number. | |
148 lineno_element = tr[self._ELEMENT_CONTAINING_LINENO] | |
149 # Handles oddly formatted HTML (where there is an extra <a> tag). | |
150 lineno = int(lineno_element.text or | |
151 lineno_element[self._ELEMENT_CONTAINING_LINENO].text) | |
152 # Get the original line of Java source code. | |
153 raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text | |
154 utf8_source = raw_source.encode('UTF-8') | |
155 source = utf8_source.replace(self._NO_BREAK_SPACE, ' ') | |
156 | |
157 line = LineCoverage(lineno, source, coverage_status, fractional_coverage) | |
158 line_coverage.append(line) | |
159 | |
160 return line_coverage | |
161 | |
162 def GetPackageNameToEmmaFileDict(self): | |
163 """Returns a dict mapping Java packages to EMMA HTML coverage files. | |
164 | |
165 Parses the EMMA index.html file to get a list of packages, then parses each | |
166 package HTML file to get a list of classes for that package, and creates | |
167 a dict with this info. | |
168 | |
169 Returns: | |
170 A dict mapping string representation of Java packages (with class | |
171 names appended) to the corresponding file paths of EMMA HTML files. | |
172 """ | |
173 # These <a> elements contain each package name and the path of the file | |
174 # where all classes within said package are listed. | |
175 package_link_elements = self._FindElements( | |
176 self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS) | |
177 # Maps file path of package directory (EMMA generated) to package name. | |
178 # Ex. emma_dir/f.html: org.chromium.chrome. | |
179 package_links = { | |
180 os.path.join(self._base_dir, link.attrib['HREF']): link.text | |
181 for link in package_link_elements if 'HREF' in link.attrib | |
182 } | |
183 | |
184 package_to_emma = {} | |
185 for package_emma_file_path, package_name in package_links.iteritems(): | |
186 # These <a> elements contain each class name in the current package and | |
187 # the path of the file where the coverage info is stored for each class. | |
188 coverage_file_link_elements = self._FindElements( | |
189 package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS) | |
190 | |
191 for coverage_file_element in coverage_file_link_elements: | |
192 emma_coverage_file_path = os.path.join( | |
193 self._emma_files_path, coverage_file_element.attrib['HREF']) | |
194 full_package_name = '%s.%s' % (package_name, coverage_file_element.text) | |
mikecase (-- gone --)
2015/07/27 18:45:52
coverage_file_element.text is the class names? cor
estevenson1
2015/07/30 02:34:10
Done.
| |
195 package_to_emma[full_package_name] = emma_coverage_file_path | |
mikecase (-- gone --)
2015/07/27 18:45:52
It seems like it would probably be this way, but a
estevenson1
2015/07/30 02:34:10
I wasn't able to find any cases where multiple emm
| |
196 | |
197 return package_to_emma | |
198 | |
199 def _FindElements(self, file_path, xpath_selector): | |
200 """Reads a HTML file and performs an XPath match. | |
201 | |
202 Args: | |
203 file_path: String representing the path to the HTML file. | |
204 xpath_selector: String representing xpath search pattern. | |
205 | |
206 Returns: | |
207 A list of lxml.html.HtmlElements matching the given XPath selector. | |
208 Returns an empty list if there is no match. | |
209 """ | |
210 with open(file_path) as f: | |
211 file_contents = f.read().decode('ISO-8859-1').encode('UTF-8') | |
212 root = ElementTree.fromstring(file_contents) | |
213 return root.findall(xpath_selector) | |
214 | |
215 | |
216 class _EmmaCoverageStats(object): | |
mikecase (-- gone --)
2015/07/27 18:45:52
I still think this interface is a little confusing
estevenson1
2015/07/30 02:34:10
Made some interface changes, let me know if it nee
| |
217 """Encapsulates coverage operations related to EMMA files. | |
218 | |
219 This class provides an API that allows users to capture absolute code coverage | |
220 and code coverage on a subset of lines for each file. | |
221 | |
222 Additionally, this class stores the information needed to correlate EMMA HTML | |
223 files with Java source files. EMMA XML and plain text reports do not provide | |
224 line by line coverage data, so HTML reports must be used instead. | |
225 Unfortunately, the HTML files that are created are given garbage names | |
226 (i.e 1.html) so we need to manually correlate EMMA HTML files | |
227 with the original Java source files. | |
228 | |
229 Attributes: | |
230 _emma_parser: An _EmmaHtmlParser for reading EMMA HTML files. | |
231 _source_to_emma: A dict mapping Java source files to EMMA HTML files. | |
232 """ | |
233 # Regular expression to get package name from Java package statement. | |
mikecase (-- gone --)
2015/07/27 18:45:53
super nit: two spaces btw "expression" and "to"
estevenson1
2015/07/30 02:34:10
Done.
| |
234 RE_PACKAGE = re.compile(r'package (?P<package>[\w.]*);') | |
235 RE_PACKAGE_MATCH_GROUP = 'package' | |
mikecase (-- gone --)
2015/07/27 18:45:52
If you are going to name the group. I would just r
estevenson1
2015/07/30 02:34:10
Done.
| |
236 | |
237 def __init__(self, emma_file_base_dir, files_for_coverage): | |
238 """Initialize _EmmaCoverageStats. | |
239 | |
240 Args: | |
241 emma_file_base_dir: String representing the path to the base directory | |
242 where EMMA HTML coverage files are stored, i.e. parent of index.html. | |
243 files_for_coverage: A list of Java file paths to get EMMA coverage for. | |
244 """ | |
245 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir) | |
246 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage) | |
247 | |
248 def GetCoverageDictForFiles(self, lines_for_coverage): | |
249 """Returns a dict containing detailed coverage info for all files. | |
250 | |
251 Args: | |
252 lines_for_coverage: A dict mapping Java source file paths to lists of | |
mikecase (-- gone --)
2015/07/27 18:45:53
This wording is confusing.
I would just simplify
estevenson1
2015/07/30 02:34:10
Done.
| |
253 integers representing a set of lines in the file. In addition to overall | |
254 coverage stats, _EmmaCoverageStats will perform coverage analysis on | |
255 this subset of files/lines separately. | |
mikecase (-- gone --)
2015/07/27 18:45:52
About,
In addition to overall
coverage st
estevenson1
2015/07/30 02:34:10
This method doesn't actually compute coverage for
| |
256 | |
257 Returns: | |
258 A dict containing coverage stats for the given dict of files and lines. | |
259 Contains absolute coverage stats for each file, coverage stats for each | |
260 file's lines specified in |lines_for_coverage|, and overall coverage | |
261 stats for the lines specified in |lines_for_coverage|. | |
262 """ | |
263 stats = {} | |
264 for file_name, lines in lines_for_coverage.iteritems(): | |
265 # Get a list of LineCoverage objects for the current file. | |
266 line_coverage = self._GetCoverageStatusForFile(file_name) | |
267 stats[file_name] = self.GetCoverageReportForLines(line_coverage, lines) | |
268 | |
269 statuses = [s['incremental'] for s in stats.itervalues()] | |
270 covered = sum(s['covered'] for s in statuses) | |
271 total = sum(s['total'] for s in statuses) | |
272 return { | |
273 'files': stats, | |
274 'patch': { | |
275 'incremental': { | |
276 'covered': covered, | |
277 'total': total | |
278 } | |
279 } | |
280 } | |
281 | |
282 def GetCoverageReportForLines(self, line_coverage, lines): | |
283 """Gets code coverage stats for a given set of LineCoverage objects. | |
284 | |
285 Args: | |
286 line_coverage: A list of LineCoverage objects holding coverage state | |
287 of each line. | |
288 lines: A list of integer line numbers to retrieve additional stats for. | |
289 | |
290 Returns: | |
291 A dict containing absolute, incremental, and line by line coverage for | |
292 a file. | |
293 """ | |
294 line_by_line_coverage = [ | |
295 { | |
296 'line': line.source, | |
297 'coverage': line.covered_status, | |
298 'changed': line.lineno in lines, | |
299 } | |
300 for line in line_coverage | |
301 ] | |
302 total_covered_lines, total_lines = self.GetStatsForLines(line_coverage) | |
303 incremental_covered_lines, incremental_total_lines = ( | |
304 self.GetStatsForLines(line_coverage, lines)) | |
305 | |
306 file_coverage_stats = { | |
307 'absolute': { | |
308 'covered': total_covered_lines, | |
309 'total': total_lines | |
310 }, | |
311 'incremental': { | |
312 'covered': incremental_covered_lines, | |
313 'total': incremental_total_lines | |
314 }, | |
315 'source': line_by_line_coverage, | |
316 } | |
317 return file_coverage_stats | |
318 | |
319 def GetStatsForLines(self, line_coverage, line_numbers=None): | |
320 """Gets coverage stats for a list of LineCoverage objects and line numbers. | |
321 | |
322 Args: | |
323 line_coverage: A list of LineCoverage objects representing the coverage | |
324 status of each line. | |
325 line_numbers: An optional list of integers representing changed lines. | |
326 If not specified, the method will return coverage stats for all lines. | |
327 | |
328 Returns: | |
329 a dict containing the incremental coverage stats for the given | |
330 |file_path|: (total added lines covered, total lines added). | |
331 """ | |
332 if not line_numbers: | |
333 line_numbers = range(1, len(line_coverage) + 1) | |
334 partially_covered_sum = 0 | |
335 totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0} | |
336 for line in line_coverage: | |
337 status = line.covered_status | |
338 if line.lineno not in line_numbers or status == NOT_EXECUTABLE: | |
339 continue | |
340 totals[status] += 1 | |
341 if status == PARTIALLY_COVERED: | |
342 partially_covered_sum += line.fractional_line_coverage | |
343 | |
344 total_covered = totals[COVERED] + partially_covered_sum | |
345 total_lines = sum(totals.values()) | |
346 return total_covered, total_lines | |
347 | |
348 def _GetCoverageStatusForFile(self, file_path): | |
349 """Gets a list LineCoverage objects corresponding to the given file path. | |
mikecase (-- gone --)
2015/07/27 18:45:53
nit: s/"Gets a list"/"Gets a list of"
estevenson1
2015/07/30 02:34:10
Done.
| |
350 | |
351 Args: | |
352 file_path: String representing the path to the Java source file. | |
353 | |
354 Returns: | |
355 A list of LineCoverage objects, or None if there is no EMMA file | |
356 for the given Java source file. | |
357 """ | |
358 if file_path in self._source_to_emma: | |
359 emma_file = self._source_to_emma[file_path] | |
360 return self._emma_parser.GetLineCoverage(emma_file) | |
361 else: | |
362 logging.warning( | |
363 'No code coverage data for %s, skipping.', file_path) | |
364 return None | |
365 | |
366 def _GetSourceFileToEmmaFileDict(self, files): | |
367 """Gets a dict used to correlate Java source files with EMMA HTML files. | |
368 | |
369 Args: | |
370 files: A list of file names for which coverage information is desired. | |
371 | |
372 Returns: | |
373 A dict mapping Java source file paths to EMMA HTML file paths. | |
374 """ | |
375 # Maps Java source file paths to package names. | |
376 # Example: /usr/code/file.java -> org.chromium.file.java. | |
377 source_to_package = {} | |
378 for file_path in files: | |
379 package = self.GetPackageNameFromFile(file_path) | |
380 if package: | |
381 source_to_package[file_path] = package | |
382 else: | |
383 logging.warning('Skipping %s because it doesn\'t have a package ' | |
384 'statement.', file_path) | |
385 | |
386 # Maps package names to EMMA report HTML files. | |
387 # Example: org.chromium.file.java -> out/coverage/1a.html. | |
388 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict() | |
389 # Finally, we have a dict mapping Java file paths to EMMA report files. | |
390 # Example: /usr/code/file.java -> out/coverage/1a.html. | |
391 source_to_emma = {source: package_to_emma.get(package) | |
392 for source, package in source_to_package.iteritems()} | |
393 return source_to_emma | |
394 | |
395 @staticmethod | |
396 def NeedsCoverage(file_path): | |
397 """Checks to see if the file needs to be analyzed for code coverage. | |
398 | |
399 Args: | |
400 file_path: A string representing path to the file. | |
401 | |
402 Returns: | |
403 True for Java files that exist, False for all others. | |
404 """ | |
405 return (os.path.splitext(file_path)[1] == '.java' | |
mikecase (-- gone --)
2015/07/27 18:45:52
Might want to add a logging.debug statement if som
estevenson1
2015/07/30 02:34:10
Done.
| |
406 and os.path.exists(file_path)) | |
407 | |
408 @staticmethod | |
409 def GetPackageNameFromFile(file_path): | |
410 """Gets the full package name including the file name for a given file path. | |
411 | |
412 Args: | |
413 file_path: String representing the path to the Java source file. | |
414 | |
415 Returns: | |
416 string representing the full package name | |
417 ex. org.chromium.chrome.Activity.java or None if there is no package | |
mikecase (-- gone --)
2015/07/27 18:45:53
super nit: Everywhere else you write "Example:" fo
estevenson1
2015/07/30 02:34:10
Done.
| |
418 statement in the file. | |
419 """ | |
420 with open(file_path) as f: | |
421 file_content = f.read() | |
422 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content) | |
423 if package_match: | |
424 package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP) | |
mikecase (-- gone --)
2015/07/27 18:45:53
nit: I would prefer self.RE_PACKAGE and self.RE_PA
estevenson1
2015/07/30 02:34:10
I did this because this is a static method (has no
| |
425 file_name = os.path.basename(file_path) | |
426 return '%s.%s' % (package, file_name) | |
427 else: | |
428 return None | |
429 | |
430 | |
431 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir): | |
432 """Generates a coverage report for a given set of lines. | |
433 | |
434 Writes the results of the coverage analysis to the file specified by | |
435 |out_file_path|. | |
436 | |
437 Args: | |
438 line_coverage_file: The path to a file which contains a dict mapping file | |
439 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means | |
440 that we should compute coverage information on lines 1 - 3 for file1. | |
441 Coverage reports will contain overall coverage (i.e. for all lines) and | |
442 coverage for the lines specified in |line_coverage_file|. | |
443 out_file_path: A string representing the location to write the JSON report. | |
444 coverage_dir: A string representing the file path where the EMMA | |
445 HTML coverage files are located (i.e. folder where index.html is located). | |
446 | |
447 Raises: | |
448 IOError: A non existent |line_coverage_file| was supplied. | |
449 ValueError: An improperly formatted |line_coverage_file| was supplied. | |
estevenson1
2015/07/27 17:10:10
Need to update this docstring. Function doesn't ra
| |
450 """ | |
451 with open(line_coverage_file) as f: | |
452 files_for_coverage = json.load(f) | |
453 files_for_coverage = {f: lines | |
454 for f, lines in files_for_coverage.iteritems() | |
455 if _EmmaCoverageStats.NeedsCoverage(f)} | |
456 if not files_for_coverage: | |
457 logging.info('No Java files requiring coverage were included in %s.', | |
458 line_coverage_file) | |
459 sys.exit(0) | |
mikecase (-- gone --)
2015/07/27 18:45:52
I would probably just return here instead of sys.e
estevenson1
2015/07/30 02:34:10
Done.
| |
460 | |
461 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys()) | |
462 coverage_results = code_coverage.GetCoverageDictForFiles(files_for_coverage) | |
463 # Log summary and save stats to file. | |
464 covered = coverage_results['patch']['incremental']['covered'] | |
465 total = coverage_results['patch']['incremental']['total'] | |
466 percent = (covered / float(total)) * 100 if total else 0 | |
467 logging.info('Covered %s out of %s lines (%.2f%%).', | |
468 covered, total, round(percent, 2)) | |
469 with open(out_file_path, 'w+') as out_status_file: | |
470 json.dump(coverage_results, out_status_file) | |
471 | |
472 | |
473 def main(): | |
474 argparser = argparse.ArgumentParser() | |
475 argparser.add_argument('--out', required=True, type=str, | |
476 help='Report output file path.') | |
477 argparser.add_argument('--emma-dir', required=True, type=str, | |
478 help='EMMA HTML report directory.') | |
479 argparser.add_argument('--lines-for-coverage-file', required=True, type=str, | |
480 help='File containing a JSON object. Should contain a ' | |
481 'dict mapping file names to lists of line numbers of ' | |
482 'code for which coverage information is desired.') | |
483 argparser.add_argument('-v', '--verbose', action='count', | |
484 help='Print verbose log information.') | |
485 args = argparser.parse_args() | |
486 run_tests_helper.SetLogLevel(args.verbose) | |
487 GenerateCoverageReport(args.lines_for_coverage, args.out, args.emma_dir) | |
488 | |
489 | |
490 if __name__ == '__main__': | |
491 sys.exit(main()) | |
OLD | NEW |