Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: third_party/coverage/parser.py

Issue 63813002: Add python coverage 3.7 to depot tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/coverage/misc.py ('k') | third_party/coverage/phystokens.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 """Code parsing for Coverage."""
2
3 import dis, re, sys, token, tokenize
4
5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
6 from coverage.backward import open_source, range # pylint: disable=W0622
7 from coverage.backward import reversed # pylint: disable=W0622
8 from coverage.backward import bytes_to_ints
9 from coverage.bytecode import ByteCodes, CodeObjects
10 from coverage.misc import nice_pair, expensive, join_regex
11 from coverage.misc import CoverageException, NoSource, NotPython
12
13
14 class CodeParser(object):
15 """Parse code to find executable lines, excluded lines, etc."""
16
17 def __init__(self, text=None, filename=None, exclude=None):
18 """
19 Source can be provided as `text`, the text itself, or `filename`, from
20 which the text will be read. Excluded lines are those that match
21 `exclude`, a regex.
22
23 """
24 assert text or filename, "CodeParser needs either text or filename"
25 self.filename = filename or "<code>"
26 self.text = text
27 if not self.text:
28 try:
29 sourcef = open_source(self.filename)
30 try:
31 self.text = sourcef.read()
32 finally:
33 sourcef.close()
34 except IOError:
35 _, err, _ = sys.exc_info()
36 raise NoSource(
37 "No source for code: '%s': %s" % (self.filename, err)
38 )
39
40 # Scrap the BOM if it exists.
41 if self.text and ord(self.text[0]) == 0xfeff:
42 self.text = self.text[1:]
43
44 self.exclude = exclude
45
46 self.show_tokens = False
47
48 # The text lines of the parsed code.
49 self.lines = self.text.split('\n')
50
51 # The line numbers of excluded lines of code.
52 self.excluded = set()
53
54 # The line numbers of docstring lines.
55 self.docstrings = set()
56
57 # The line numbers of class definitions.
58 self.classdefs = set()
59
60 # A dict mapping line numbers to (lo,hi) for multi-line statements.
61 self.multiline = {}
62
63 # The line numbers that start statements.
64 self.statement_starts = set()
65
66 # Lazily-created ByteParser
67 self._byte_parser = None
68
69 def _get_byte_parser(self):
70 """Create a ByteParser on demand."""
71 if not self._byte_parser:
72 self._byte_parser = \
73 ByteParser(text=self.text, filename=self.filename)
74 return self._byte_parser
75 byte_parser = property(_get_byte_parser)
76
77 def lines_matching(self, *regexes):
78 """Find the lines matching one of a list of regexes.
79
80 Returns a set of line numbers, the lines that contain a match for one
81 of the regexes in `regexes`. The entire line needn't match, just a
82 part of it.
83
84 """
85 regex_c = re.compile(join_regex(regexes))
86 matches = set()
87 for i, ltext in enumerate(self.lines):
88 if regex_c.search(ltext):
89 matches.add(i+1)
90 return matches
91
92 def _raw_parse(self):
93 """Parse the source to find the interesting facts about its lines.
94
95 A handful of member fields are updated.
96
97 """
98 # Find lines which match an exclusion pattern.
99 if self.exclude:
100 self.excluded = self.lines_matching(self.exclude)
101
102 # Tokenize, to find excluded suites, to find docstrings, and to find
103 # multi-line statements.
104 indent = 0
105 exclude_indent = 0
106 excluding = False
107 prev_toktype = token.INDENT
108 first_line = None
109 empty = True
110
111 tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
113 if self.show_tokens: # pragma: not covered
114 print("%10s %5s %-20r %r" % (
115 tokenize.tok_name.get(toktype, toktype),
116 nice_pair((slineno, elineno)), ttext, ltext
117 ))
118 if toktype == token.INDENT:
119 indent += 1
120 elif toktype == token.DEDENT:
121 indent -= 1
122 elif toktype == token.NAME and ttext == 'class':
123 # Class definitions look like branches in the byte code, so
124 # we need to exclude them. The simplest way is to note the
125 # lines with the 'class' keyword.
126 self.classdefs.add(slineno)
127 elif toktype == token.OP and ttext == ':':
128 if not excluding and elineno in self.excluded:
129 # Start excluding a suite. We trigger off of the colon
130 # token so that the #pragma comment will be recognized on
131 # the same line as the colon.
132 exclude_indent = indent
133 excluding = True
134 elif toktype == token.STRING and prev_toktype == token.INDENT:
135 # Strings that are first on an indented line are docstrings.
136 # (a trick from trace.py in the stdlib.) This works for
137 # 99.9999% of cases. For the rest (!) see:
138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
139 self.docstrings.update(range(slineno, elineno+1))
140 elif toktype == token.NEWLINE:
141 if first_line is not None and elineno != first_line:
142 # We're at the end of a line, and we've ended on a
143 # different line than the first line of the statement,
144 # so record a multi-line range.
145 rng = (first_line, elineno)
146 for l in range(first_line, elineno+1):
147 self.multiline[l] = rng
148 first_line = None
149
150 if ttext.strip() and toktype != tokenize.COMMENT:
151 # A non-whitespace token.
152 empty = False
153 if first_line is None:
154 # The token is not whitespace, and is the first in a
155 # statement.
156 first_line = slineno
157 # Check whether to end an excluded suite.
158 if excluding and indent <= exclude_indent:
159 excluding = False
160 if excluding:
161 self.excluded.add(elineno)
162
163 prev_toktype = toktype
164
165 # Find the starts of the executable statements.
166 if not empty:
167 self.statement_starts.update(self.byte_parser._find_statements())
168
169 def first_line(self, line):
170 """Return the first line number of the statement including `line`."""
171 rng = self.multiline.get(line)
172 if rng:
173 first_line = rng[0]
174 else:
175 first_line = line
176 return first_line
177
178 def first_lines(self, lines, ignore=None):
179 """Map the line numbers in `lines` to the correct first line of the
180 statement.
181
182 Skip any line mentioned in `ignore`.
183
184 Returns a sorted list of the first lines.
185
186 """
187 ignore = ignore or []
188 lset = set()
189 for l in lines:
190 if l in ignore:
191 continue
192 new_l = self.first_line(l)
193 if new_l not in ignore:
194 lset.add(new_l)
195 return sorted(lset)
196
197 def parse_source(self):
198 """Parse source text to find executable lines, excluded lines, etc.
199
200 Return values are 1) a sorted list of executable line numbers, and
201 2) a sorted list of excluded line numbers.
202
203 Reported line numbers are normalized to the first line of multi-line
204 statements.
205
206 """
207 try:
208 self._raw_parse()
209 except (tokenize.TokenError, IndentationError):
210 _, tokerr, _ = sys.exc_info()
211 msg, lineno = tokerr.args
212 raise NotPython(
213 "Couldn't parse '%s' as Python source: '%s' at %s" %
214 (self.filename, msg, lineno)
215 )
216
217 excluded_lines = self.first_lines(self.excluded)
218 ignore = excluded_lines + list(self.docstrings)
219 lines = self.first_lines(self.statement_starts, ignore)
220
221 return lines, excluded_lines
222
223 def arcs(self):
224 """Get information about the arcs available in the code.
225
226 Returns a sorted list of line number pairs. Line numbers have been
227 normalized to the first line of multiline statements.
228
229 """
230 all_arcs = []
231 for l1, l2 in self.byte_parser._all_arcs():
232 fl1 = self.first_line(l1)
233 fl2 = self.first_line(l2)
234 if fl1 != fl2:
235 all_arcs.append((fl1, fl2))
236 return sorted(all_arcs)
237 arcs = expensive(arcs)
238
239 def exit_counts(self):
240 """Get a mapping from line numbers to count of exits from that line.
241
242 Excluded lines are excluded.
243
244 """
245 excluded_lines = self.first_lines(self.excluded)
246 exit_counts = {}
247 for l1, l2 in self.arcs():
248 if l1 < 0:
249 # Don't ever report -1 as a line number
250 continue
251 if l1 in excluded_lines:
252 # Don't report excluded lines as line numbers.
253 continue
254 if l2 in excluded_lines:
255 # Arcs to excluded lines shouldn't count.
256 continue
257 if l1 not in exit_counts:
258 exit_counts[l1] = 0
259 exit_counts[l1] += 1
260
261 # Class definitions have one extra exit, so remove one for each:
262 for l in self.classdefs:
263 # Ensure key is there: classdefs can include excluded lines.
264 if l in exit_counts:
265 exit_counts[l] -= 1
266
267 return exit_counts
268 exit_counts = expensive(exit_counts)
269
270
271 ## Opcodes that guide the ByteParser.
272
273 def _opcode(name):
274 """Return the opcode by name from the dis module."""
275 return dis.opmap[name]
276
277 def _opcode_set(*names):
278 """Return a set of opcodes by the names in `names`."""
279 s = set()
280 for name in names:
281 try:
282 s.add(_opcode(name))
283 except KeyError:
284 pass
285 return s
286
287 # Opcodes that leave the code object.
288 OPS_CODE_END = _opcode_set('RETURN_VALUE')
289
290 # Opcodes that unconditionally end the code chunk.
291 OPS_CHUNK_END = _opcode_set(
292 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
293 'BREAK_LOOP', 'CONTINUE_LOOP',
294 )
295
296 # Opcodes that unconditionally begin a new code chunk. By starting new chunks
297 # with unconditional jump instructions, we neatly deal with jumps to jumps
298 # properly.
299 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
300
301 # Opcodes that push a block on the block stack.
302 OPS_PUSH_BLOCK = _opcode_set(
303 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
304 )
305
306 # Block types for exception handling.
307 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
308
309 # Opcodes that pop a block from the block stack.
310 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
311
312 # Opcodes that have a jump destination, but aren't really a jump.
313 OPS_NO_JUMP = OPS_PUSH_BLOCK
314
315 # Individual opcodes we need below.
316 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
317 OP_END_FINALLY = _opcode('END_FINALLY')
318 OP_COMPARE_OP = _opcode('COMPARE_OP')
319 COMPARE_EXCEPTION = 10 # just have to get this const from the code.
320 OP_LOAD_CONST = _opcode('LOAD_CONST')
321 OP_RETURN_VALUE = _opcode('RETURN_VALUE')
322
323
324 class ByteParser(object):
325 """Parse byte codes to understand the structure of code."""
326
327 def __init__(self, code=None, text=None, filename=None):
328 if code:
329 self.code = code
330 self.text = text
331 else:
332 if not text:
333 assert filename, "If no code or text, need a filename"
334 sourcef = open_source(filename)
335 try:
336 text = sourcef.read()
337 finally:
338 sourcef.close()
339 self.text = text
340
341 try:
342 # Python 2.3 and 2.4 don't like partial last lines, so be sure
343 # the text ends nicely for them.
344 self.code = compile(text + '\n', filename, "exec")
345 except SyntaxError:
346 _, synerr, _ = sys.exc_info()
347 raise NotPython(
348 "Couldn't parse '%s' as Python source: '%s' at line %d" %
349 (filename, synerr.msg, synerr.lineno)
350 )
351
352 # Alternative Python implementations don't always provide all the
353 # attributes on code objects that we need to do the analysis.
354 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
355 if not hasattr(self.code, attr):
356 raise CoverageException(
357 "This implementation of Python doesn't support code "
358 "analysis.\n"
359 "Run coverage.py under CPython for this command."
360 )
361
362 def child_parsers(self):
363 """Iterate over all the code objects nested within this one.
364
365 The iteration includes `self` as its first value.
366
367 """
368 children = CodeObjects(self.code)
369 return [ByteParser(code=c, text=self.text) for c in children]
370
371 def _bytes_lines(self):
372 """Map byte offsets to line numbers in `code`.
373
374 Uses co_lnotab described in Python/compile.c to map byte offsets to
375 line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
376
377 Only byte offsets that correspond to line numbers are included in the
378 results.
379
380 """
381 # Adapted from dis.py in the standard library.
382 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
383 line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
384
385 last_line_num = None
386 line_num = self.code.co_firstlineno
387 byte_num = 0
388 for byte_incr, line_incr in zip(byte_increments, line_increments):
389 if byte_incr:
390 if line_num != last_line_num:
391 yield (byte_num, line_num)
392 last_line_num = line_num
393 byte_num += byte_incr
394 line_num += line_incr
395 if line_num != last_line_num:
396 yield (byte_num, line_num)
397
398 def _find_statements(self):
399 """Find the statements in `self.code`.
400
401 Produce a sequence of line numbers that start statements. Recurses
402 into all code objects reachable from `self.code`.
403
404 """
405 for bp in self.child_parsers():
406 # Get all of the lineno information from this code.
407 for _, l in bp._bytes_lines():
408 yield l
409
410 def _block_stack_repr(self, block_stack):
411 """Get a string version of `block_stack`, for debugging."""
412 blocks = ", ".join(
413 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
414 )
415 return "[" + blocks + "]"
416
417 def _split_into_chunks(self):
418 """Split the code object into a list of `Chunk` objects.
419
420 Each chunk is only entered at its first instruction, though there can
421 be many exits from a chunk.
422
423 Returns a list of `Chunk` objects.
424
425 """
426 # The list of chunks so far, and the one we're working on.
427 chunks = []
428 chunk = None
429
430 # A dict mapping byte offsets of line starts to the line numbers.
431 bytes_lines_map = dict(self._bytes_lines())
432
433 # The block stack: loops and try blocks get pushed here for the
434 # implicit jumps that can occur.
435 # Each entry is a tuple: (block type, destination)
436 block_stack = []
437
438 # Some op codes are followed by branches that should be ignored. This
439 # is a count of how many ignores are left.
440 ignore_branch = 0
441
442 # We have to handle the last two bytecodes specially.
443 ult = penult = None
444
445 # Get a set of all of the jump-to points.
446 jump_to = set()
447 for bc in ByteCodes(self.code.co_code):
448 if bc.jump_to >= 0:
449 jump_to.add(bc.jump_to)
450
451 chunk_lineno = 0
452
453 # Walk the byte codes building chunks.
454 for bc in ByteCodes(self.code.co_code):
455 # Maybe have to start a new chunk
456 start_new_chunk = False
457 first_chunk = False
458 if bc.offset in bytes_lines_map:
459 # Start a new chunk for each source line number.
460 start_new_chunk = True
461 chunk_lineno = bytes_lines_map[bc.offset]
462 first_chunk = True
463 elif bc.offset in jump_to:
464 # To make chunks have a single entrance, we have to make a new
465 # chunk when we get to a place some bytecode jumps to.
466 start_new_chunk = True
467 elif bc.op in OPS_CHUNK_BEGIN:
468 # Jumps deserve their own unnumbered chunk. This fixes
469 # problems with jumps to jumps getting confused.
470 start_new_chunk = True
471
472 if not chunk or start_new_chunk:
473 if chunk:
474 chunk.exits.add(bc.offset)
475 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
476 chunks.append(chunk)
477
478 # Look at the opcode
479 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
480 if ignore_branch:
481 # Someone earlier wanted us to ignore this branch.
482 ignore_branch -= 1
483 else:
484 # The opcode has a jump, it's an exit for this chunk.
485 chunk.exits.add(bc.jump_to)
486
487 if bc.op in OPS_CODE_END:
488 # The opcode can exit the code object.
489 chunk.exits.add(-self.code.co_firstlineno)
490 if bc.op in OPS_PUSH_BLOCK:
491 # The opcode adds a block to the block_stack.
492 block_stack.append((bc.op, bc.jump_to))
493 if bc.op in OPS_POP_BLOCK:
494 # The opcode pops a block from the block stack.
495 block_stack.pop()
496 if bc.op in OPS_CHUNK_END:
497 # This opcode forces the end of the chunk.
498 if bc.op == OP_BREAK_LOOP:
499 # A break is implicit: jump where the top of the
500 # block_stack points.
501 chunk.exits.add(block_stack[-1][1])
502 chunk = None
503 if bc.op == OP_END_FINALLY:
504 # For the finally clause we need to find the closest exception
505 # block, and use its jump target as an exit.
506 for block in reversed(block_stack):
507 if block[0] in OPS_EXCEPT_BLOCKS:
508 chunk.exits.add(block[1])
509 break
510 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
511 # This is an except clause. We want to overlook the next
512 # branch, so that except's don't count as branches.
513 ignore_branch += 1
514
515 penult = ult
516 ult = bc
517
518 if chunks:
519 # The last two bytecodes could be a dummy "return None" that
520 # shouldn't be counted as real code. Every Python code object seems
521 # to end with a return, and a "return None" is inserted if there
522 # isn't an explicit return in the source.
523 if ult and penult:
524 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
525 if self.code.co_consts[penult.arg] is None:
526 # This is "return None", but is it dummy? A real line
527 # would be a last chunk all by itself.
528 if chunks[-1].byte != penult.offset:
529 ex = -self.code.co_firstlineno
530 # Split the last chunk
531 last_chunk = chunks[-1]
532 last_chunk.exits.remove(ex)
533 last_chunk.exits.add(penult.offset)
534 chunk = Chunk(
535 penult.offset, last_chunk.line, False
536 )
537 chunk.exits.add(ex)
538 chunks.append(chunk)
539
540 # Give all the chunks a length.
541 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab le=W0631,C0301
542 for i in range(len(chunks)-1):
543 chunks[i].length = chunks[i+1].byte - chunks[i].byte
544
545 #self.validate_chunks(chunks)
546 return chunks
547
548 def validate_chunks(self, chunks):
549 """Validate the rule that chunks have a single entrance."""
550 # starts is the entrances to the chunks
551 starts = set([ch.byte for ch in chunks])
552 for ch in chunks:
553 assert all([(ex in starts or ex < 0) for ex in ch.exits])
554
555 def _arcs(self):
556 """Find the executable arcs in the code.
557
558 Yields pairs: (from,to). From and to are integer line numbers. If
559 from is < 0, then the arc is an entrance into the code object. If to
560 is < 0, the arc is an exit from the code object.
561
562 """
563 chunks = self._split_into_chunks()
564
565 # A map from byte offsets to chunks jumped into.
566 byte_chunks = dict([(c.byte, c) for c in chunks])
567
568 # There's always an entrance at the first chunk.
569 yield (-1, byte_chunks[0].line)
570
571 # Traverse from the first chunk in each line, and yield arcs where
572 # the trace function will be invoked.
573 for chunk in chunks:
574 if not chunk.first:
575 continue
576
577 chunks_considered = set()
578 chunks_to_consider = [chunk]
579 while chunks_to_consider:
580 # Get the chunk we're considering, and make sure we don't
581 # consider it again
582 this_chunk = chunks_to_consider.pop()
583 chunks_considered.add(this_chunk)
584
585 # For each exit, add the line number if the trace function
586 # would be triggered, or add the chunk to those being
587 # considered if not.
588 for ex in this_chunk.exits:
589 if ex < 0:
590 yield (chunk.line, ex)
591 else:
592 next_chunk = byte_chunks[ex]
593 if next_chunk in chunks_considered:
594 continue
595
596 # The trace function is invoked if visiting the first
597 # bytecode in a line, or if the transition is a
598 # backward jump.
599 backward_jump = next_chunk.byte < this_chunk.byte
600 if next_chunk.first or backward_jump:
601 if next_chunk.line != chunk.line:
602 yield (chunk.line, next_chunk.line)
603 else:
604 chunks_to_consider.append(next_chunk)
605
606 def _all_chunks(self):
607 """Returns a list of `Chunk` objects for this code and its children.
608
609 See `_split_into_chunks` for details.
610
611 """
612 chunks = []
613 for bp in self.child_parsers():
614 chunks.extend(bp._split_into_chunks())
615
616 return chunks
617
618 def _all_arcs(self):
619 """Get the set of all arcs in this code object and its children.
620
621 See `_arcs` for details.
622
623 """
624 arcs = set()
625 for bp in self.child_parsers():
626 arcs.update(bp._arcs())
627
628 return arcs
629
630
631 class Chunk(object):
632 """A sequence of byte codes with a single entrance.
633
634 To analyze byte code, we have to divide it into chunks, sequences of byte
635 codes such that each chunk has only one entrance, the first instruction in
636 the block.
637
638 This is almost the CS concept of `basic block`_, except that we're willing
639 to have many exits from a chunk, and "basic block" is a more cumbersome
640 term.
641
642 .. _basic block: http://en.wikipedia.org/wiki/Basic_block
643
644 `line` is the source line number containing this chunk.
645
646 `first` is true if this is the first chunk in the source line.
647
648 An exit < 0 means the chunk can leave the code (return). The exit is
649 the negative of the starting line number of the code block.
650
651 """
652 def __init__(self, byte, line, first):
653 self.byte = byte
654 self.line = line
655 self.first = first
656 self.length = 0
657 self.exits = set()
658
659 def __repr__(self):
660 if self.first:
661 bang = "!"
662 else:
663 bang = ""
664 return "<%d+%d @%d%s %r>" % (
665 self.byte, self.length, self.line, bang, list(self.exits)
666 )
OLDNEW
« no previous file with comments | « third_party/coverage/misc.py ('k') | third_party/coverage/phystokens.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698