OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright 2015 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generates incremental code coverage reports for Java code in Chromium. | |
7 | |
8 Usage: | |
9 | |
10 build/android/coverage.py -v --out <output file path> --emma-dir | |
11 <EMMA file directory> --lines-for-coverage | |
12 <path to file containing lines for coverage> | |
13 | |
14 Creates a JSON representation of overall and file coverage stats and saves | |
15 this information to the specified output file. | |
16 """ | |
17 | |
18 import argparse | |
19 import json | |
20 import logging | |
21 import os | |
22 import re | |
23 import sys | |
24 from lxml import html | |
25 | |
26 from pylib.utils import run_tests_helper | |
27 | |
28 | |
29 class LineCoverage(object): | |
30 """Coverage information about a single line of code.""" | |
31 | |
32 NOT_EXECUTABLE = -1 | |
33 NOT_COVERED = 0 | |
34 COVERED = 1 | |
35 PARTIALLY_COVERED = 2 | |
36 | |
37 def __init__(self, lineno, source, covered_status, fractional_line_coverage): | |
38 """Initializes LineCoverage. | |
39 | |
40 Args: | |
41 lineno: Integer line number. | |
42 source: A string containing the original line of source code. | |
43 covered_status: The covered status of the line. | |
44 fractional_line_coverage: The fractional value representing the fraction | |
45 of instructions executed for a given line of code. Should be a floating | |
46 point number between [0.0 - 1.0]. | |
47 """ | |
48 self.lineno = lineno | |
49 self.source = source | |
50 self.covered_status = covered_status | |
51 self.fractional_line_coverage = fractional_line_coverage | |
52 | |
53 | |
54 class _EmmaHtmlParser(object): | |
55 """Encapsulates HTML file parsing operations. | |
56 | |
57 This class contains all operations related to parsing HTML files that were | |
58 produced using the EMMA code coverage tool. It uses the lxml module for | |
59 parsing. | |
60 | |
61 Example HTML: | |
62 | |
63 To get package links: | |
64 <a href="_files/1.html">org.chromium.chrome</a> | |
65 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|. | |
66 | |
67 To get class links: | |
68 <a href="1e.html">DoActivity.java</a> | |
69 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|. | |
70 | |
71 To get coverage information: | |
72 <tr class="p"> | |
73 <td class="l" title="78% line coverage (7 out of 9)">108</td> | |
74 <td title="78% line coverage (7 out of 9 instructions)"> | |
75 if (index < 0 || index = mSelectors.size()) index = 0;</td> | |
76 </tr> | |
77 <tr> | |
78 <td class="l">109</td> | |
79 <td> </td> | |
80 </tr> | |
81 <tr class="c"> | |
82 <td class="l">110</td> | |
83 <td> if (mSelectors.get(index) != null) {</td> | |
84 </tr> | |
85 <tr class="z"> | |
86 <td class="l">111</td> | |
87 <td> for (int i = 0; i < mSelectors.size(); i++) {</td> | |
88 </tr> | |
89 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|. | |
90 | |
91 We can parse this to get: | |
92 1. Line number | |
93 2. Line of source code | |
94 3. Coverage status (c, z, or p) | |
95 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED) | |
96 """ | |
97 # Selector to match all <a> elements within the rows that are in the table | |
98 # that displays all of the different packages. | |
99 _XPATH_SELECT_PACKAGE_ELEMENTS = '/html/body/table[4]/tr[*]/td/a' | |
100 | |
101 # Selector to match all <a> elements within the rows that are in the table | |
102 # that displays all of the different packages within a class. | |
103 _XPATH_SELECT_CLASS_ELEMENTS = '/html/body/table[3]/tr[*]/td/a' | |
104 | |
105 # Selector to match all <tr> elements within the table containing Java source | |
106 # code in an EMMA HTML file. | |
107 _XPATH_SELECT_LOC = '/html/body/table[4]/tr' | |
108 | |
109 # Children of HTML elements are represented as a list in lxml. These constants | |
110 # represent list indices corresponding to relevant child elements. | |
111 | |
112 # Technically both child 1 and 2 contain the percentage covered for a line. | |
113 _ELEMENT_PERCENT_COVERED = 1 | |
114 | |
115 # The second child contains the original line of source code. | |
116 _ELEMENT_CONTAINING_SOURCE_CODE = 1 | |
117 | |
118 # The first child contains the line number. | |
119 _ELEMENT_CONTAINING_LINENO = 0 | |
120 | |
121 # Maps CSS class names to corresponding coverage constants. | |
122 _CSS_TO_STATUS = {'c': LineCoverage.COVERED, | |
123 'p': LineCoverage.PARTIALLY_COVERED, | |
124 'z': LineCoverage.NOT_COVERED} | |
125 | |
126 # UTF-8 no break space. | |
127 _NO_BREAK_SPACE = '\xc2\xa0' | |
128 | |
129 def __init__(self, emma_file_base_dir): | |
130 """Initializes _EmmaHtmlParser. | |
131 | |
132 Args: | |
133 emma_file_base_dir: Path to the location where EMMA report files are | |
134 stored. Should be where index.html is stored. | |
135 """ | |
136 self._base_dir = emma_file_base_dir | |
137 self._emma_files_path = os.path.join(self._base_dir, '_files') | |
138 self._index_path = os.path.join(self._base_dir, 'index.html') | |
139 | |
140 def GetLineCoverage(self, emma_file_path): | |
141 """Returns a list of LineCoverage objects for the given EMMA HTML file. | |
142 | |
143 Args: | |
144 emma_file_path: String representing the path to the EMMA HTML file. | |
145 | |
146 Returns: | |
147 A list of LineCoverage objects. | |
148 """ | |
149 def get_status(tr_element): | |
150 """Returns coverage status for a <tr> element containing coverage info.""" | |
151 if 'class' not in tr_element.attrib: | |
152 status = LineCoverage.NOT_EXECUTABLE | |
153 else: | |
154 status = self._CSS_TO_STATUS.get( | |
155 tr_element.attrib['class'], LineCoverage.NOT_EXECUTABLE) | |
156 return status | |
157 | |
158 def get_fractional_line_coverage(tr_element, status): | |
159 """Returns coverage value for a <tr> element containing coverage info.""" | |
160 # If line is partially covered, parse the <td> tag to get the | |
161 # coverage percent. | |
162 if status == LineCoverage.PARTIALLY_COVERED: | |
163 title_attribute = ( | |
164 tr_element[self._ELEMENT_PERCENT_COVERED].attrib['title']) | |
165 # Parse string that contains percent covered: "83% line coverage ,,,". | |
166 percent_covered = title_attribute.split('%')[0] | |
167 fractional_coverage_value = int(percent_covered) / 100.0 | |
168 else: | |
169 fractional_coverage_value = 1.0 | |
170 return fractional_coverage_value | |
171 | |
172 def get_lineno(tr_element): | |
173 """Returns line number for a <tr> element containing coverage info.""" | |
174 lineno_element = tr_element[self._ELEMENT_CONTAINING_LINENO] | |
175 # Handles oddly formatted HTML (where there is an extra <a> tag). | |
176 lineno = int(lineno_element.text or | |
177 lineno_element[self._ELEMENT_CONTAINING_LINENO].text) | |
178 return lineno | |
179 | |
180 def get_source_code(tr_element): | |
181 """Returns Java source for a <tr> element containing coverage info.""" | |
182 raw_source = tr_element[self._ELEMENT_CONTAINING_SOURCE_CODE].text | |
183 utf8_source = raw_source.encode('UTF-8') | |
184 readable_source = utf8_source.replace(self._NO_BREAK_SPACE, ' ') | |
185 return readable_source | |
186 | |
187 line_tr_elements = self._FindElements(emma_file_path, | |
188 _path=self._XPATH_SELECT_LOC) | |
189 line_coverage = [] | |
190 for tr in line_tr_elements: | |
191 coverage_status = get_status(tr) | |
192 fractional_coverage = get_fractional_line_coverage(tr, coverage_status) | |
193 lineno = get_lineno(tr) | |
194 source = get_source_code(tr) | |
195 line = LineCoverage(lineno, source, coverage_status, fractional_coverage) | |
196 line_coverage.append(line) | |
197 | |
198 return line_coverage | |
199 | |
200 def GetPackageNameToEmmaFileDict(self): | |
201 """Returns a dict mapping Java packages to EMMA HTML coverage files. | |
202 | |
203 Parses the EMMA index.html file to get a list of packages, then parses each | |
204 package HTML file to get a list of classes for that package, and creates | |
205 a dict with this info. | |
206 | |
207 Returns: | |
208 A dict mapping string representation of Java packages (with class | |
209 names appended) to the corresponding file paths of EMMA HTML files. | |
210 """ | |
211 # The <a> elements that contain each package name and the path of the file | |
212 # where all classes within said package are listed. | |
213 package_a_elements = self._FindElements( | |
214 self._index_path, _path=self._XPATH_SELECT_PACKAGE_ELEMENTS) | |
215 # Maps file path of package directory (EMMA generated) to package name. | |
216 # Ex. emma_dir/f.html: org.chromium.chrome. | |
217 package_links = {os.path.join(self._base_dir, link.attrib['href']): | |
218 link.text | |
219 for link in package_a_elements if 'href' in link.attrib} | |
220 | |
221 package_to_emma = {} | |
222 for package_emma_file_path, package_name in package_links.iteritems(): | |
223 # The <a> elements that contain each class name in the current package and | |
224 # the path of the file where the coverage info is stored for each class. | |
225 coverage_file_a_elements = self._FindElements( | |
226 package_emma_file_path, | |
227 _path=self._XPATH_SELECT_CLASS_ELEMENTS) | |
228 | |
229 for coverage_file_element in coverage_file_a_elements: | |
230 emma_file_path = os.path.join(self._emma_files_path, | |
231 coverage_file_element.attrib['href']) | |
232 full_package_name = package_name + '.' + coverage_file_element.text | |
233 package_to_emma[full_package_name] = emma_file_path | |
234 | |
235 return package_to_emma | |
236 | |
237 def _FindElements(self, file_path, **kwargs): | |
238 """Reads a HTML file and performs an XPath match. | |
239 | |
240 Args: | |
241 file_path: String representing the path to the HTML file. | |
242 **kwargs: Keyword arguments for XPath match. | |
243 | |
244 Returns: | |
245 A list of lxml.html.HtmlElements matching the given XPath selector. | |
246 Returns an empty list if there is no match. | |
247 """ | |
248 try: | |
249 with open(file_path) as f: | |
250 file_contents = f.read().decode('ISO-8859-1') | |
251 root = html.fromstring(file_contents) | |
252 return root.xpath(**kwargs) | |
253 except IOError: | |
254 return [] | |
255 | |
256 | |
257 class _EmmaCoverageStats(object): | |
258 """Encapsulates coverage operations related to EMMA files. | |
mikecase (-- gone --)
2015/07/27 18:45:52
Probably change the description to be more about s
estevenson1
2015/07/30 02:34:09
Done.
| |
259 | |
260 This class provides an API that allows users to capture absolute code coverage | |
261 and code coverage on a set of lines for each file. | |
262 | |
263 Additionally, this class stores the information needed to correlate EMMA HTML | |
mikecase (-- gone --)
2015/07/27 18:45:52
I like this comment but it might not belong here.
estevenson1
2015/07/30 02:34:09
Done.
| |
264 files with Java source files. EMMA XML and plain text reports do not provide | |
265 line by line coverage data, so HTML reports must be used instead. | |
266 Unfortunately, the HTML files that are created are given garbage names | |
267 (i.e 1.html) so we need to manually correlate EMMA HTML files | |
268 with the original Java source files. | |
269 | |
270 Attributes: | |
271 _emma_parser: An _EmmaHtmlParser for reading EMMA HTML files. | |
272 _source_to_emma: A dict mapping Java source files to EMMA HTML files. | |
273 """ | |
274 # Regular expression to get package name from Java package statement. | |
275 RE_PACKAGE = re.compile(r'package ([\w.]*);') | |
276 | |
277 def __init__(self, emma_file_base_dir, files_for_coverage): | |
278 """Initialize _EmmaCoverageStats. | |
279 | |
280 Args: | |
281 emma_file_base_dir: String representing the path to the base directory | |
282 where EMMA HTML coverage files are stored, i.e. parent of index.html. | |
283 files_for_coverage: A list of Java file paths to get EMMA coverage for. | |
284 """ | |
285 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir) | |
286 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage) | |
287 | |
288 def GetCoverageDictForFiles(self, lines_for_coverage): | |
289 """Returns a dict containing detailed coverage info for all files. | |
290 | |
291 Args: | |
292 lines_for_coverage: A dict mapping Java source file paths to lists of | |
293 Integers representing a set of lines to find additional coverage | |
294 stats for. | |
295 | |
296 Returns: | |
297 A dict containing coverage stats for the given dict of files and lines. | |
298 Contains absolute coverage stats for each file, coverage stats for each | |
299 file's lines specified in |lines_for_coverage|, and overall coverage | |
300 stats for the lines specified in |lines_for_coverage|. | |
301 """ | |
302 stats = {} | |
303 for file_name, lines in lines_for_coverage.iteritems(): | |
304 # Get a list of LineCoverage objects for the current file. | |
305 line_coverage = self._GetCoverageStatusForFile(file_name) | |
306 # If there was an error with EMMA file generation, |line_coverage| could | |
307 # be None. | |
308 # TODO(estevenson): Find a better way to handle EMMA errors. | |
309 if not line_coverage: | |
310 continue | |
311 stats[file_name] = self.GetCoverageReportForLines(line_coverage, lines) | |
312 | |
313 statuses = [s['incremental'] for s in stats.itervalues()] | |
314 covered = sum(s['covered'] for s in statuses) | |
315 total = sum(s['total'] for s in statuses) | |
316 return { | |
317 'files': stats, | |
318 'patch': { | |
319 'incremental': { | |
320 'covered': covered, | |
321 'total': total | |
322 } | |
323 } | |
324 } | |
325 | |
326 def GetCoverageReportForLines(self, line_coverage, lines): | |
327 """Gets code coverage stats for a given set of LineCoverage objects. | |
328 | |
329 Args: | |
330 line_coverage: A list of LineCoverage objects holding coverage state | |
331 of each line. | |
332 lines: A list of lines to retrieve additional stats for. | |
333 | |
334 Returns: | |
335 A dict containing absolute, incremental, and line by line coverage for | |
336 a file. | |
337 """ | |
338 line_by_line_coverage = [ | |
339 { | |
340 'line': line.source, | |
341 'coverage': line.covered_status, | |
342 'changed': line.lineno in lines, | |
343 } | |
344 for line in line_coverage | |
345 ] | |
346 (total_covered_lines, total_lines) = self._GetStatsForLines(line_coverage) | |
347 (incremental_covered_lines, incremental_total_lines) = ( | |
348 self._GetStatsForLines(line_coverage, lines)) | |
349 | |
350 file_coverage_stats = { | |
351 'absolute': { | |
352 'covered': total_covered_lines, | |
353 'total': total_lines | |
354 }, | |
355 'incremental': { | |
356 'covered': incremental_covered_lines, | |
357 'total': incremental_total_lines | |
358 }, | |
359 'source': line_by_line_coverage, | |
360 } | |
361 return file_coverage_stats | |
362 | |
363 def _GetCoverageStatusForFile(self, file_path): | |
364 """Gets a list LineCoverage objects corresponding to the given file. | |
365 | |
366 Args: | |
367 file_path: String representing the path to the Java source file. | |
368 | |
369 Returns: | |
370 A list of LineCoverage objects, or None if there is no EMMA file | |
371 for the given file. | |
372 """ | |
373 if file_path in self._source_to_emma: | |
374 emma_file = self._source_to_emma[file_path] | |
375 return self._emma_parser.GetLineCoverage(emma_file) | |
376 else: | |
377 logging.warning( | |
378 'No code coverage data for %s, skipping.', file_path) | |
379 return None | |
380 | |
381 def _GetSourceFileToEmmaFileDict(self, files): | |
382 """Gets a dict used to correlate Java source files with EMMA HTML files. | |
383 | |
384 Args: | |
385 files: A list of file names for which coverage information is desired. | |
386 | |
387 Returns: | |
388 A dict mapping Java source file paths to EMMA HTML file paths. | |
389 """ | |
390 source_to_package = {} | |
391 for file_path in files: | |
392 package = self.GetPackageNameFromFile(file_path) | |
393 if package and os.path.exists(file_path): | |
394 source_to_package[file_path] = package | |
395 else: | |
396 logging.warning('Skipping %s because it doesn\'t have a package ' | |
397 'statement or it doesn\'t exist.', file_path) | |
398 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict() | |
399 source_to_emma = {source: package_to_emma.get(package, None) | |
400 for source, package in source_to_package.iteritems()} | |
401 return source_to_emma | |
402 | |
403 def _GetStatsForLines(self, line_coverage, line_numbers=None): | |
404 """Gets coverage stats for a list of LineCoverage objects and line numbers. | |
405 | |
406 Args: | |
407 line_coverage: A list of LineCoverage objects representing the coverage | |
408 status of each line. | |
409 line_numbers: An optional list of Integers representing changed lines. | |
410 If not specified, the method will return coverage stats for all lines. | |
411 | |
412 Returns: | |
413 a dict containing the incremental coverage stats for the given | |
414 |file_path|: (total added lines covered, total lines added). | |
415 """ | |
416 if not line_numbers: | |
417 line_numbers = range(1, len(line_coverage) + 1) | |
418 covered_count = 0 | |
419 not_covered_count = 0 | |
420 partially_covered_count = 0 | |
421 partially_covered_sum = 0 | |
422 for line in line_coverage: | |
423 if line.lineno not in line_numbers: | |
424 continue | |
425 if line.covered_status == LineCoverage.COVERED: | |
426 covered_count += 1 | |
427 elif line.covered_status == LineCoverage.NOT_COVERED: | |
428 not_covered_count += 1 | |
429 elif line.covered_status == LineCoverage.PARTIALLY_COVERED: | |
430 partially_covered_count += 1 | |
431 partially_covered_sum += line.fractional_line_coverage | |
432 | |
433 total_covered = covered_count + partially_covered_sum | |
434 total_lines = covered_count + partially_covered_count + not_covered_count | |
435 return (total_covered, total_lines) | |
436 | |
437 @staticmethod | |
438 def NeedsCoverage(file_path): | |
439 """Checks to see if the file needs to be analyzed for code coverage. | |
440 | |
441 Args: | |
442 file_path: A string representing path to the file. | |
443 | |
444 Returns: | |
445 True for Java files that exist, False for all others. | |
446 """ | |
447 return (os.path.splitext(file_path)[1] == '.java' | |
448 and os.path.exists(file_path)) | |
449 | |
450 @staticmethod | |
451 def GetPackageNameFromFile(file_path): | |
452 """Gets the full package name including the file name for a given file path. | |
453 | |
454 Args: | |
455 file_path: String representing the path to the Java source file. | |
456 | |
457 Returns: | |
458 string representing the full package name | |
459 ex. org.chromium.chrome.Activity.java or None if there is no package | |
460 statement in the file. | |
461 """ | |
462 with open(file_path) as f: | |
463 file_content = f.read() | |
464 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content) | |
465 if package_match: | |
466 package = package_match.group(1) | |
467 file_name = os.path.basename(file_path) | |
468 return package + '.' + file_name | |
469 else: | |
470 return None | |
471 | |
472 | |
473 def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir): | |
474 """Generates a coverage report for a given set of lines. | |
475 | |
476 Writes the results of the coverage analysis to the file specified by | |
477 |out_file_path|. | |
478 | |
479 Args: | |
480 line_coverage_file: A dict mapping file names to lists of line numbers: | |
481 ex. {file1: [1, 2, 3], ...} means that we should compute coverage | |
482 information on lines 1 - 3 for file1. | |
483 out_file_path: A string representing the file path to write the status | |
484 JSON file. | |
485 coverage_dir: A string representing the file path where the EMMA | |
486 HTML coverage files are located (i.e. folder where index.html is located). | |
487 | |
488 Raises: | |
489 IOError: A non existent |line_coverage_file| was supplied. | |
490 """ | |
491 if not os.path.exists(line_coverage_file): | |
492 raise IOError('The line coverage file: %s does not exist.') | |
493 | |
494 with open(line_coverage_file) as f: | |
495 files_for_coverage = json.load(f) | |
496 files_for_coverage = {f: lines for f, lines in files_for_coverage.iteritems() | |
497 if _EmmaCoverageStats.NeedsCoverage(f)} | |
498 if not files_for_coverage: | |
499 logging.info('No Java files requiring coverage stats were included in %s.', | |
500 line_coverage_file) | |
501 sys.exit(0) | |
502 | |
503 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys()) | |
504 coverage_results = code_coverage.GetCoverageDictForFiles(files_for_coverage) | |
505 # Log summary and save stats to file. | |
506 covered = coverage_results['patch']['incremental']['covered'] | |
507 total = coverage_results['patch']['incremental']['total'] | |
508 percent = float(covered) / float(total) * 100 if total else 0 | |
509 logging.info('Covered %s out of %s lines (%.2f%%).', | |
510 covered, total, round(percent, 2)) | |
511 with open(out_file_path, 'w+') as out_status_file: | |
512 json.dump(coverage_results, out_status_file) | |
513 | |
514 | |
515 def main(): | |
516 argparser = argparse.ArgumentParser() | |
517 argparser.add_argument('--out', required=True, type=str, | |
518 help='Report output file path.') | |
519 argparser.add_argument('--emma-dir', required=True, type=str, | |
520 help='EMMA HTML report directory.') | |
521 argparser.add_argument('--lines-for-coverage', required=True, type=str, | |
522 help='File containing a JSON object. Should contain a ' | |
523 'dict mapping file names to lists of line numbers of ' | |
524 'code for which coverage information is desired.') | |
525 argparser.add_argument('-v', '--verbose', action='count', | |
526 help='Print verbose log information.') | |
527 args = argparser.parse_args() | |
528 run_tests_helper.SetLogLevel(args.verbose) | |
529 GenerateCoverageReport(args.lines_for_coverage, args.out, args.emma_dir) | |
530 | |
531 | |
532 if __name__ == '__main__': | |
533 sys.exit(main()) | |
OLD | NEW |