third_party/coverage/parser.py - Issue 63813002: Add python coverage 3.7 to depot tools.

Side by Side Diff: third_party/coverage/parser.py

Issue 63813002: Add python coverage 3.7 to depot tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 """Code parsing for Coverage."""

	2

	3 import dis, re, sys, token, tokenize

	4

	5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622

	6 from coverage.backward import open_source, range # pylint: disable=W0622

	7 from coverage.backward import reversed # pylint: disable=W0622

	8 from coverage.backward import bytes_to_ints

	9 from coverage.bytecode import ByteCodes, CodeObjects

	10 from coverage.misc import nice_pair, expensive, join_regex

	11 from coverage.misc import CoverageException, NoSource, NotPython

	12

	13

	14 class CodeParser(object):

	15 """Parse code to find executable lines, excluded lines, etc."""

	16

	17 def __init__(self, text=None, filename=None, exclude=None):

	18 """

	19 Source can be provided as `text`, the text itself, or `filename`, from

	20 which the text will be read. Excluded lines are those that match

	21 `exclude`, a regex.

	22

	23 """

	24 assert text or filename, "CodeParser needs either text or filename"

	25 self.filename = filename or "<code>"

	26 self.text = text

	27 if not self.text:

	28 try:

	29 sourcef = open_source(self.filename)

	30 try:

	31 self.text = sourcef.read()

	32 finally:

	33 sourcef.close()

	34 except IOError:

	35 _, err, _ = sys.exc_info()

	36 raise NoSource(

	37 "No source for code: '%s': %s" % (self.filename, err)

	38 )

	39

	40 # Scrap the BOM if it exists.

	41 if self.text and ord(self.text[0]) == 0xfeff:

	42 self.text = self.text[1:]

	43

	44 self.exclude = exclude

	45

	46 self.show_tokens = False

	47

	48 # The text lines of the parsed code.

	49 self.lines = self.text.split('\n')

	50

	51 # The line numbers of excluded lines of code.

	52 self.excluded = set()

	53

	54 # The line numbers of docstring lines.

	55 self.docstrings = set()

	56

	57 # The line numbers of class definitions.

	58 self.classdefs = set()

	59

	60 # A dict mapping line numbers to (lo,hi) for multi-line statements.

	61 self.multiline = {}

	62

	63 # The line numbers that start statements.

	64 self.statement_starts = set()

	65

	66 # Lazily-created ByteParser

	67 self._byte_parser = None

	68

	69 def _get_byte_parser(self):

	70 """Create a ByteParser on demand."""

	71 if not self._byte_parser:

	72 self._byte_parser = \

	73 ByteParser(text=self.text, filename=self.filename)

	74 return self._byte_parser

	75 byte_parser = property(_get_byte_parser)

	76

	77 def lines_matching(self, *regexes):

	78 """Find the lines matching one of a list of regexes.

	79

	80 Returns a set of line numbers, the lines that contain a match for one

	81 of the regexes in `regexes`. The entire line needn't match, just a

	82 part of it.

	83

	84 """

	85 regex_c = re.compile(join_regex(regexes))

	86 matches = set()

	87 for i, ltext in enumerate(self.lines):

	88 if regex_c.search(ltext):

	89 matches.add(i+1)

	90 return matches

	91

	92 def _raw_parse(self):

	93 """Parse the source to find the interesting facts about its lines.

	94

	95 A handful of member fields are updated.

	96

	97 """

	98 # Find lines which match an exclusion pattern.

	99 if self.exclude:

	100 self.excluded = self.lines_matching(self.exclude)

	101

	102 # Tokenize, to find excluded suites, to find docstrings, and to find

	103 # multi-line statements.

	104 indent = 0

	105 exclude_indent = 0

	106 excluding = False

	107 prev_toktype = token.INDENT

	108 first_line = None

	109 empty = True

	110

	111 tokgen = tokenize.generate_tokens(StringIO(self.text).readline)

	112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:

	113 if self.show_tokens: # pragma: not covered

	114 print("%10s %5s %-20r %r" % (

	115 tokenize.tok_name.get(toktype, toktype),

	116 nice_pair((slineno, elineno)), ttext, ltext

	117 ))

	118 if toktype == token.INDENT:

	119 indent += 1

	120 elif toktype == token.DEDENT:

	121 indent -= 1

	122 elif toktype == token.NAME and ttext == 'class':

	123 # Class definitions look like branches in the byte code, so

	124 # we need to exclude them. The simplest way is to note the

	125 # lines with the 'class' keyword.

	126 self.classdefs.add(slineno)

	127 elif toktype == token.OP and ttext == ':':

	128 if not excluding and elineno in self.excluded:

	129 # Start excluding a suite. We trigger off of the colon

	130 # token so that the #pragma comment will be recognized on

	131 # the same line as the colon.

	132 exclude_indent = indent

	133 excluding = True

	134 elif toktype == token.STRING and prev_toktype == token.INDENT:

	135 # Strings that are first on an indented line are docstrings.

	136 # (a trick from trace.py in the stdlib.) This works for

	137 # 99.9999% of cases. For the rest (!) see:

	138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794

	139 self.docstrings.update(range(slineno, elineno+1))

	140 elif toktype == token.NEWLINE:

	141 if first_line is not None and elineno != first_line:

	142 # We're at the end of a line, and we've ended on a

	143 # different line than the first line of the statement,

	144 # so record a multi-line range.

	145 rng = (first_line, elineno)

	146 for l in range(first_line, elineno+1):

	147 self.multiline[l] = rng

	148 first_line = None

	149

	150 if ttext.strip() and toktype != tokenize.COMMENT:

	151 # A non-whitespace token.

	152 empty = False

	153 if first_line is None:

	154 # The token is not whitespace, and is the first in a

	155 # statement.

	156 first_line = slineno

	157 # Check whether to end an excluded suite.

	158 if excluding and indent <= exclude_indent:

	159 excluding = False

	160 if excluding:

	161 self.excluded.add(elineno)

	162

	163 prev_toktype = toktype

	164

	165 # Find the starts of the executable statements.

	166 if not empty:

	167 self.statement_starts.update(self.byte_parser._find_statements())

	168

	169 def first_line(self, line):

	170 """Return the first line number of the statement including `line`."""

	171 rng = self.multiline.get(line)

	172 if rng:

	173 first_line = rng[0]

	174 else:

	175 first_line = line

	176 return first_line

	177

	178 def first_lines(self, lines, ignore=None):

	179 """Map the line numbers in `lines` to the correct first line of the

	180 statement.

	181

	182 Skip any line mentioned in `ignore`.

	183

	184 Returns a sorted list of the first lines.

	185

	186 """

	187 ignore = ignore or []

	188 lset = set()

	189 for l in lines:

	190 if l in ignore:

	191 continue

	192 new_l = self.first_line(l)

	193 if new_l not in ignore:

	194 lset.add(new_l)

	195 return sorted(lset)

	196

	197 def parse_source(self):

	198 """Parse source text to find executable lines, excluded lines, etc.

	199

	200 Return values are 1) a sorted list of executable line numbers, and

	201 2) a sorted list of excluded line numbers.

	202

	203 Reported line numbers are normalized to the first line of multi-line

	204 statements.

	205

	206 """

	207 try:

	208 self._raw_parse()

	209 except (tokenize.TokenError, IndentationError):

	210 _, tokerr, _ = sys.exc_info()

	211 msg, lineno = tokerr.args

	212 raise NotPython(

	213 "Couldn't parse '%s' as Python source: '%s' at %s" %

	214 (self.filename, msg, lineno)

	215 )

	216

	217 excluded_lines = self.first_lines(self.excluded)

	218 ignore = excluded_lines + list(self.docstrings)

	219 lines = self.first_lines(self.statement_starts, ignore)

	220

	221 return lines, excluded_lines

	222

	223 def arcs(self):

	224 """Get information about the arcs available in the code.

	225

	226 Returns a sorted list of line number pairs. Line numbers have been

	227 normalized to the first line of multiline statements.

	228

	229 """

	230 all_arcs = []

	231 for l1, l2 in self.byte_parser._all_arcs():

	232 fl1 = self.first_line(l1)

	233 fl2 = self.first_line(l2)

	234 if fl1 != fl2:

	235 all_arcs.append((fl1, fl2))

	236 return sorted(all_arcs)

	237 arcs = expensive(arcs)

	238

	239 def exit_counts(self):

	240 """Get a mapping from line numbers to count of exits from that line.

	241

	242 Excluded lines are excluded.

	243

	244 """

	245 excluded_lines = self.first_lines(self.excluded)

	246 exit_counts = {}

	247 for l1, l2 in self.arcs():

	248 if l1 < 0:

	249 # Don't ever report -1 as a line number

	250 continue

	251 if l1 in excluded_lines:

	252 # Don't report excluded lines as line numbers.

	253 continue

	254 if l2 in excluded_lines:

	255 # Arcs to excluded lines shouldn't count.

	256 continue

	257 if l1 not in exit_counts:

	258 exit_counts[l1] = 0

	259 exit_counts[l1] += 1

	260

	261 # Class definitions have one extra exit, so remove one for each:

	262 for l in self.classdefs:

	263 # Ensure key is there: classdefs can include excluded lines.

	264 if l in exit_counts:

	265 exit_counts[l] -= 1

	266

	267 return exit_counts

	268 exit_counts = expensive(exit_counts)

	269

	270

	271 ## Opcodes that guide the ByteParser.

	272

	273 def _opcode(name):

	274 """Return the opcode by name from the dis module."""

	275 return dis.opmap[name]

	276

	277 def _opcode_set(*names):

	278 """Return a set of opcodes by the names in `names`."""

	279 s = set()

	280 for name in names:

	281 try:

	282 s.add(_opcode(name))

	283 except KeyError:

	284 pass

	285 return s

	286

	287 # Opcodes that leave the code object.

	288 OPS_CODE_END = _opcode_set('RETURN_VALUE')

	289

	290 # Opcodes that unconditionally end the code chunk.

	291 OPS_CHUNK_END = _opcode_set(

	292 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',

	293 'BREAK_LOOP', 'CONTINUE_LOOP',

	294 )

	295

	296 # Opcodes that unconditionally begin a new code chunk. By starting new chunks

	297 # with unconditional jump instructions, we neatly deal with jumps to jumps

	298 # properly.

	299 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')

	300

	301 # Opcodes that push a block on the block stack.

	302 OPS_PUSH_BLOCK = _opcode_set(

	303 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'

	304 )

	305

	306 # Block types for exception handling.

	307 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')

	308

	309 # Opcodes that pop a block from the block stack.

	310 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')

	311

	312 # Opcodes that have a jump destination, but aren't really a jump.

	313 OPS_NO_JUMP = OPS_PUSH_BLOCK

	314

	315 # Individual opcodes we need below.

	316 OP_BREAK_LOOP = _opcode('BREAK_LOOP')

	317 OP_END_FINALLY = _opcode('END_FINALLY')

	318 OP_COMPARE_OP = _opcode('COMPARE_OP')

	319 COMPARE_EXCEPTION = 10 # just have to get this const from the code.

	320 OP_LOAD_CONST = _opcode('LOAD_CONST')

	321 OP_RETURN_VALUE = _opcode('RETURN_VALUE')

	322

	323

	324 class ByteParser(object):

	325 """Parse byte codes to understand the structure of code."""

	326

	327 def __init__(self, code=None, text=None, filename=None):

	328 if code:

	329 self.code = code

	330 self.text = text

	331 else:

	332 if not text:

	333 assert filename, "If no code or text, need a filename"

	334 sourcef = open_source(filename)

	335 try:

	336 text = sourcef.read()

	337 finally:

	338 sourcef.close()

	339 self.text = text

	340

	341 try:

	342 # Python 2.3 and 2.4 don't like partial last lines, so be sure

	343 # the text ends nicely for them.

	344 self.code = compile(text + '\n', filename, "exec")

	345 except SyntaxError:

	346 _, synerr, _ = sys.exc_info()

	347 raise NotPython(

	348 "Couldn't parse '%s' as Python source: '%s' at line %d" %

	349 (filename, synerr.msg, synerr.lineno)

	350 )

	351

	352 # Alternative Python implementations don't always provide all the

	353 # attributes on code objects that we need to do the analysis.

	354 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:

	355 if not hasattr(self.code, attr):

	356 raise CoverageException(

	357 "This implementation of Python doesn't support code "

	358 "analysis.\n"

	359 "Run coverage.py under CPython for this command."

	360 )

	361

	362 def child_parsers(self):

	363 """Iterate over all the code objects nested within this one.

	364

	365 The iteration includes `self` as its first value.

	366

	367 """

	368 children = CodeObjects(self.code)

	369 return [ByteParser(code=c, text=self.text) for c in children]

	370

	371 def _bytes_lines(self):

	372 """Map byte offsets to line numbers in `code`.

	373

	374 Uses co_lnotab described in Python/compile.c to map byte offsets to

	375 line numbers. Produces a sequence: (b0, l0), (b1, l1), ...

	376

	377 Only byte offsets that correspond to line numbers are included in the

	378 results.

	379

	380 """

	381 # Adapted from dis.py in the standard library.

	382 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])

	383 line_increments = bytes_to_ints(self.code.co_lnotab[1::2])

	384

	385 last_line_num = None

	386 line_num = self.code.co_firstlineno

	387 byte_num = 0

	388 for byte_incr, line_incr in zip(byte_increments, line_increments):

	389 if byte_incr:

	390 if line_num != last_line_num:

	391 yield (byte_num, line_num)

	392 last_line_num = line_num

	393 byte_num += byte_incr

	394 line_num += line_incr

	395 if line_num != last_line_num:

	396 yield (byte_num, line_num)

	397

	398 def _find_statements(self):

	399 """Find the statements in `self.code`.

	400

	401 Produce a sequence of line numbers that start statements. Recurses

	402 into all code objects reachable from `self.code`.

	403

	404 """

	405 for bp in self.child_parsers():

	406 # Get all of the lineno information from this code.

	407 for _, l in bp._bytes_lines():

	408 yield l

	409

	410 def _block_stack_repr(self, block_stack):

	411 """Get a string version of `block_stack`, for debugging."""

	412 blocks = ", ".join(

	413 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]

	414 )

	415 return "[" + blocks + "]"

	416

	417 def _split_into_chunks(self):

	418 """Split the code object into a list of `Chunk` objects.

	419

	420 Each chunk is only entered at its first instruction, though there can

	421 be many exits from a chunk.

	422

	423 Returns a list of `Chunk` objects.

	424

	425 """

	426 # The list of chunks so far, and the one we're working on.

	427 chunks = []

	428 chunk = None

	429

	430 # A dict mapping byte offsets of line starts to the line numbers.

	431 bytes_lines_map = dict(self._bytes_lines())

	432

	433 # The block stack: loops and try blocks get pushed here for the

	434 # implicit jumps that can occur.

	435 # Each entry is a tuple: (block type, destination)

	436 block_stack = []

	437

	438 # Some op codes are followed by branches that should be ignored. This

	439 # is a count of how many ignores are left.

	440 ignore_branch = 0

	441

	442 # We have to handle the last two bytecodes specially.

	443 ult = penult = None

	444

	445 # Get a set of all of the jump-to points.

	446 jump_to = set()

	447 for bc in ByteCodes(self.code.co_code):

	448 if bc.jump_to >= 0:

	449 jump_to.add(bc.jump_to)

	450

	451 chunk_lineno = 0

	452

	453 # Walk the byte codes building chunks.

	454 for bc in ByteCodes(self.code.co_code):

	455 # Maybe have to start a new chunk

	456 start_new_chunk = False

	457 first_chunk = False

	458 if bc.offset in bytes_lines_map:

	459 # Start a new chunk for each source line number.

	460 start_new_chunk = True

	461 chunk_lineno = bytes_lines_map[bc.offset]

	462 first_chunk = True

	463 elif bc.offset in jump_to:

	464 # To make chunks have a single entrance, we have to make a new

	465 # chunk when we get to a place some bytecode jumps to.

	466 start_new_chunk = True

	467 elif bc.op in OPS_CHUNK_BEGIN:

	468 # Jumps deserve their own unnumbered chunk. This fixes

	469 # problems with jumps to jumps getting confused.

	470 start_new_chunk = True

	471

	472 if not chunk or start_new_chunk:

	473 if chunk:

	474 chunk.exits.add(bc.offset)

	475 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)

	476 chunks.append(chunk)

	477

	478 # Look at the opcode

	479 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:

	480 if ignore_branch:

	481 # Someone earlier wanted us to ignore this branch.

	482 ignore_branch -= 1

	483 else:

	484 # The opcode has a jump, it's an exit for this chunk.

	485 chunk.exits.add(bc.jump_to)

	486

	487 if bc.op in OPS_CODE_END:

	488 # The opcode can exit the code object.

	489 chunk.exits.add(-self.code.co_firstlineno)

	490 if bc.op in OPS_PUSH_BLOCK:

	491 # The opcode adds a block to the block_stack.

	492 block_stack.append((bc.op, bc.jump_to))

	493 if bc.op in OPS_POP_BLOCK:

	494 # The opcode pops a block from the block stack.

	495 block_stack.pop()

	496 if bc.op in OPS_CHUNK_END:

	497 # This opcode forces the end of the chunk.

	498 if bc.op == OP_BREAK_LOOP:

	499 # A break is implicit: jump where the top of the

	500 # block_stack points.

	501 chunk.exits.add(block_stack[-1][1])

	502 chunk = None

	503 if bc.op == OP_END_FINALLY:

	504 # For the finally clause we need to find the closest exception

	505 # block, and use its jump target as an exit.

	506 for block in reversed(block_stack):

	507 if block[0] in OPS_EXCEPT_BLOCKS:

	508 chunk.exits.add(block[1])

	509 break

	510 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:

	511 # This is an except clause. We want to overlook the next

	512 # branch, so that except's don't count as branches.

	513 ignore_branch += 1

	514

	515 penult = ult

	516 ult = bc

	517

	518 if chunks:

	519 # The last two bytecodes could be a dummy "return None" that

	520 # shouldn't be counted as real code. Every Python code object seems

	521 # to end with a return, and a "return None" is inserted if there

	522 # isn't an explicit return in the source.

	523 if ult and penult:

	524 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:

	525 if self.code.co_consts[penult.arg] is None:

	526 # This is "return None", but is it dummy? A real line

	527 # would be a last chunk all by itself.

	528 if chunks[-1].byte != penult.offset:

	529 ex = -self.code.co_firstlineno

	530 # Split the last chunk

	531 last_chunk = chunks[-1]

	532 last_chunk.exits.remove(ex)

	533 last_chunk.exits.add(penult.offset)

	534 chunk = Chunk(

	535 penult.offset, last_chunk.line, False

	536 )

	537 chunk.exits.add(ex)

	538 chunks.append(chunk)

	539

	540 # Give all the chunks a length.

	541 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab le=W0631,C0301

	542 for i in range(len(chunks)-1):

	543 chunks[i].length = chunks[i+1].byte - chunks[i].byte

	544

	545 #self.validate_chunks(chunks)

	546 return chunks

	547

	548 def validate_chunks(self, chunks):

	549 """Validate the rule that chunks have a single entrance."""

	550 # starts is the entrances to the chunks

	551 starts = set([ch.byte for ch in chunks])

	552 for ch in chunks:

	553 assert all([(ex in starts or ex < 0) for ex in ch.exits])

	554

	555 def _arcs(self):

	556 """Find the executable arcs in the code.

	557

	558 Yields pairs: (from,to). From and to are integer line numbers. If

	559 from is < 0, then the arc is an entrance into the code object. If to

	560 is < 0, the arc is an exit from the code object.

	561

	562 """

	563 chunks = self._split_into_chunks()

	564

	565 # A map from byte offsets to chunks jumped into.

	566 byte_chunks = dict([(c.byte, c) for c in chunks])

	567

	568 # There's always an entrance at the first chunk.

	569 yield (-1, byte_chunks[0].line)

	570

	571 # Traverse from the first chunk in each line, and yield arcs where

	572 # the trace function will be invoked.

	573 for chunk in chunks:

	574 if not chunk.first:

	575 continue

	576

	577 chunks_considered = set()

	578 chunks_to_consider = [chunk]

	579 while chunks_to_consider:

	580 # Get the chunk we're considering, and make sure we don't

	581 # consider it again

	582 this_chunk = chunks_to_consider.pop()

	583 chunks_considered.add(this_chunk)

	584

	585 # For each exit, add the line number if the trace function

	586 # would be triggered, or add the chunk to those being

	587 # considered if not.

	588 for ex in this_chunk.exits:

	589 if ex < 0:

	590 yield (chunk.line, ex)

	591 else:

	592 next_chunk = byte_chunks[ex]

	593 if next_chunk in chunks_considered:

	594 continue

	595

	596 # The trace function is invoked if visiting the first

	597 # bytecode in a line, or if the transition is a

	598 # backward jump.

	599 backward_jump = next_chunk.byte < this_chunk.byte

	600 if next_chunk.first or backward_jump:

	601 if next_chunk.line != chunk.line:

	602 yield (chunk.line, next_chunk.line)

	603 else:

	604 chunks_to_consider.append(next_chunk)

	605

	606 def _all_chunks(self):

	607 """Returns a list of `Chunk` objects for this code and its children.

	608

	609 See `_split_into_chunks` for details.

	610

	611 """

	612 chunks = []

	613 for bp in self.child_parsers():

	614 chunks.extend(bp._split_into_chunks())

	615

	616 return chunks

	617

	618 def _all_arcs(self):

	619 """Get the set of all arcs in this code object and its children.

	620

	621 See `_arcs` for details.

	622

	623 """

	624 arcs = set()

	625 for bp in self.child_parsers():

	626 arcs.update(bp._arcs())

	627

	628 return arcs

	629

	630

	631 class Chunk(object):

	632 """A sequence of byte codes with a single entrance.

	633

	634 To analyze byte code, we have to divide it into chunks, sequences of byte

	635 codes such that each chunk has only one entrance, the first instruction in

	636 the block.

	637

	638 This is almost the CS concept of `basic block`_, except that we're willing

	639 to have many exits from a chunk, and "basic block" is a more cumbersome

	640 term.

	641

	642 .. _basic block: http://en.wikipedia.org/wiki/Basic_block

	643

	644 `line` is the source line number containing this chunk.

	645

	646 `first` is true if this is the first chunk in the source line.

	647

	648 An exit < 0 means the chunk can leave the code (return). The exit is

	649 the negative of the starting line number of the code block.

	650

	651 """

	652 def __init__(self, byte, line, first):

	653 self.byte = byte

	654 self.line = line

	655 self.first = first

	656 self.length = 0

	657 self.exits = set()

	658

	659 def __repr__(self):

	660 if self.first:

	661 bang = "!"

	662 else:

	663 bang = ""

	664 return "<%d+%d @%d%s %r>" % (

	665 self.byte, self.length, self.line, bang, list(self.exits)

	666 )

OLD	NEW

« no previous file with comments | « third_party/coverage/misc.py ('k') | third_party/coverage/phystokens.py » ('j') | no next file with comments »