third_party/coverage-3.6/coverage/parser.py - Issue 14988009: First cut of testing infrastructure for recipes.

Side by Side Diff: third_party/coverage-3.6/coverage/parser.py

Issue 14988009: First cut of testing infrastructure for recipes. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build

Patch Set: nitfixen Created 7 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 """Code parsing for Coverage."""

	2

	3 import opcode, re, sys, token, tokenize

	4

	5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622

	6 from coverage.backward import open_source

	7 from coverage.bytecode import ByteCodes, CodeObjects

	8 from coverage.misc import nice_pair, expensive, join_regex

	9 from coverage.misc import CoverageException, NoSource, NotPython

	10

	11

	12 class CodeParser(object):

	13 """Parse code to find executable lines, excluded lines, etc."""

	14

	15 def __init__(self, text=None, filename=None, exclude=None):

	16 """

	17 Source can be provided as `text`, the text itself, or `filename`, from

	18 which the text will be read. Excluded lines are those that match

	19 `exclude`, a regex.

	20

	21 """

	22 assert text or filename, "CodeParser needs either text or filename"

	23 self.filename = filename or "<code>"

	24 self.text = text

	25 if not self.text:

	26 try:

	27 sourcef = open_source(self.filename)

	28 try:

	29 self.text = sourcef.read()

	30 finally:

	31 sourcef.close()

	32 except IOError:

	33 _, err, _ = sys.exc_info()

	34 raise NoSource(

	35 "No source for code: '%s': %s" % (self.filename, err)

	36 )

	37

	38 # Scrap the BOM if it exists.

	39 if self.text and ord(self.text[0]) == 0xfeff:

	40 self.text = self.text[1:]

	41

	42 self.exclude = exclude

	43

	44 self.show_tokens = False

	45

	46 # The text lines of the parsed code.

	47 self.lines = self.text.split('\n')

	48

	49 # The line numbers of excluded lines of code.

	50 self.excluded = set()

	51

	52 # The line numbers of docstring lines.

	53 self.docstrings = set()

	54

	55 # The line numbers of class definitions.

	56 self.classdefs = set()

	57

	58 # A dict mapping line numbers to (lo,hi) for multi-line statements.

	59 self.multiline = {}

	60

	61 # The line numbers that start statements.

	62 self.statement_starts = set()

	63

	64 # Lazily-created ByteParser

	65 self._byte_parser = None

	66

	67 def _get_byte_parser(self):

	68 """Create a ByteParser on demand."""

	69 if not self._byte_parser:

	70 self._byte_parser = \

	71 ByteParser(text=self.text, filename=self.filename)

	72 return self._byte_parser

	73 byte_parser = property(_get_byte_parser)

	74

	75 def lines_matching(self, *regexes):

	76 """Find the lines matching one of a list of regexes.

	77

	78 Returns a set of line numbers, the lines that contain a match for one

	79 of the regexes in `regexes`. The entire line needn't match, just a

	80 part of it.

	81

	82 """

	83 regex_c = re.compile(join_regex(regexes))

	84 matches = set()

	85 for i, ltext in enumerate(self.lines):

	86 if regex_c.search(ltext):

	87 matches.add(i+1)

	88 return matches

	89

	90 def _raw_parse(self):

	91 """Parse the source to find the interesting facts about its lines.

	92

	93 A handful of member fields are updated.

	94

	95 """

	96 # Find lines which match an exclusion pattern.

	97 if self.exclude:

	98 self.excluded = self.lines_matching(self.exclude)

	99

	100 # Tokenize, to find excluded suites, to find docstrings, and to find

	101 # multi-line statements.

	102 indent = 0

	103 exclude_indent = 0

	104 excluding = False

	105 prev_toktype = token.INDENT

	106 first_line = None

	107 empty = True

	108

	109 tokgen = tokenize.generate_tokens(StringIO(self.text).readline)

	110 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:

	111 if self.show_tokens: # pragma: not covered

	112 print("%10s %5s %-20r %r" % (

	113 tokenize.tok_name.get(toktype, toktype),

	114 nice_pair((slineno, elineno)), ttext, ltext

	115 ))

	116 if toktype == token.INDENT:

	117 indent += 1

	118 elif toktype == token.DEDENT:

	119 indent -= 1

	120 elif toktype == token.NAME and ttext == 'class':

	121 # Class definitions look like branches in the byte code, so

	122 # we need to exclude them. The simplest way is to note the

	123 # lines with the 'class' keyword.

	124 self.classdefs.add(slineno)

	125 elif toktype == token.OP and ttext == ':':

	126 if not excluding and elineno in self.excluded:

	127 # Start excluding a suite. We trigger off of the colon

	128 # token so that the #pragma comment will be recognized on

	129 # the same line as the colon.

	130 exclude_indent = indent

	131 excluding = True

	132 elif toktype == token.STRING and prev_toktype == token.INDENT:

	133 # Strings that are first on an indented line are docstrings.

	134 # (a trick from trace.py in the stdlib.) This works for

	135 # 99.9999% of cases. For the rest (!) see:

	136 # http://stackoverflow.com/questions/1769332/x/1769794#1769794

	137 for i in range(slineno, elineno+1):

	138 self.docstrings.add(i)

	139 elif toktype == token.NEWLINE:

	140 if first_line is not None and elineno != first_line:

	141 # We're at the end of a line, and we've ended on a

	142 # different line than the first line of the statement,

	143 # so record a multi-line range.

	144 rng = (first_line, elineno)

	145 for l in range(first_line, elineno+1):

	146 self.multiline[l] = rng

	147 first_line = None

	148

	149 if ttext.strip() and toktype != tokenize.COMMENT:

	150 # A non-whitespace token.

	151 empty = False

	152 if first_line is None:

	153 # The token is not whitespace, and is the first in a

	154 # statement.

	155 first_line = slineno

	156 # Check whether to end an excluded suite.

	157 if excluding and indent <= exclude_indent:

	158 excluding = False

	159 if excluding:

	160 self.excluded.add(elineno)

	161

	162 prev_toktype = toktype

	163

	164 # Find the starts of the executable statements.

	165 if not empty:

	166 self.statement_starts.update(self.byte_parser._find_statements())

	167

	168 def first_line(self, line):

	169 """Return the first line number of the statement including `line`."""

	170 rng = self.multiline.get(line)

	171 if rng:

	172 first_line = rng[0]

	173 else:

	174 first_line = line

	175 return first_line

	176

	177 def first_lines(self, lines, ignore=None):

	178 """Map the line numbers in `lines` to the correct first line of the

	179 statement.

	180

	181 Skip any line mentioned in `ignore`.

	182

	183 Returns a sorted list of the first lines.

	184

	185 """

	186 ignore = ignore or []

	187 lset = set()

	188 for l in lines:

	189 if l in ignore:

	190 continue

	191 new_l = self.first_line(l)

	192 if new_l not in ignore:

	193 lset.add(new_l)

	194 return sorted(lset)

	195

	196 def parse_source(self):

	197 """Parse source text to find executable lines, excluded lines, etc.

	198

	199 Return values are 1) a sorted list of executable line numbers, and

	200 2) a sorted list of excluded line numbers.

	201

	202 Reported line numbers are normalized to the first line of multi-line

	203 statements.

	204

	205 """

	206 try:

	207 self._raw_parse()

	208 except (tokenize.TokenError, IndentationError):

	209 _, tokerr, _ = sys.exc_info()

	210 msg, lineno = tokerr.args

	211 raise NotPython(

	212 "Couldn't parse '%s' as Python source: '%s' at %s" %

	213 (self.filename, msg, lineno)

	214 )

	215

	216 excluded_lines = self.first_lines(self.excluded)

	217 ignore = excluded_lines + list(self.docstrings)

	218 lines = self.first_lines(self.statement_starts, ignore)

	219

	220 return lines, excluded_lines

	221

	222 def arcs(self):

	223 """Get information about the arcs available in the code.

	224

	225 Returns a sorted list of line number pairs. Line numbers have been

	226 normalized to the first line of multiline statements.

	227

	228 """

	229 all_arcs = []

	230 for l1, l2 in self.byte_parser._all_arcs():

	231 fl1 = self.first_line(l1)

	232 fl2 = self.first_line(l2)

	233 if fl1 != fl2:

	234 all_arcs.append((fl1, fl2))

	235 return sorted(all_arcs)

	236 arcs = expensive(arcs)

	237

	238 def exit_counts(self):

	239 """Get a mapping from line numbers to count of exits from that line.

	240

	241 Excluded lines are excluded.

	242

	243 """

	244 excluded_lines = self.first_lines(self.excluded)

	245 exit_counts = {}

	246 for l1, l2 in self.arcs():

	247 if l1 < 0:

	248 # Don't ever report -1 as a line number

	249 continue

	250 if l1 in excluded_lines:

	251 # Don't report excluded lines as line numbers.

	252 continue

	253 if l2 in excluded_lines:

	254 # Arcs to excluded lines shouldn't count.

	255 continue

	256 if l1 not in exit_counts:

	257 exit_counts[l1] = 0

	258 exit_counts[l1] += 1

	259

	260 # Class definitions have one extra exit, so remove one for each:

	261 for l in self.classdefs:

	262 # Ensure key is there: classdefs can include excluded lines.

	263 if l in exit_counts:

	264 exit_counts[l] -= 1

	265

	266 return exit_counts

	267 exit_counts = expensive(exit_counts)

	268

	269

	270 ## Opcodes that guide the ByteParser.

	271

	272 def _opcode(name):

	273 """Return the opcode by name from the opcode module."""

	274 return opcode.opmap[name]

	275

	276 def _opcode_set(*names):

	277 """Return a set of opcodes by the names in `names`."""

	278 s = set()

	279 for name in names:

	280 try:

	281 s.add(_opcode(name))

	282 except KeyError:

	283 pass

	284 return s

	285

	286 # Opcodes that leave the code object.

	287 OPS_CODE_END = _opcode_set('RETURN_VALUE')

	288

	289 # Opcodes that unconditionally end the code chunk.

	290 OPS_CHUNK_END = _opcode_set(

	291 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',

	292 'BREAK_LOOP', 'CONTINUE_LOOP',

	293 )

	294

	295 # Opcodes that unconditionally begin a new code chunk. By starting new chunks

	296 # with unconditional jump instructions, we neatly deal with jumps to jumps

	297 # properly.

	298 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')

	299

	300 # Opcodes that push a block on the block stack.

	301 OPS_PUSH_BLOCK = _opcode_set(

	302 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'

	303 )

	304

	305 # Block types for exception handling.

	306 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')

	307

	308 # Opcodes that pop a block from the block stack.

	309 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')

	310

	311 # Opcodes that have a jump destination, but aren't really a jump.

	312 OPS_NO_JUMP = OPS_PUSH_BLOCK

	313

	314 # Individual opcodes we need below.

	315 OP_BREAK_LOOP = _opcode('BREAK_LOOP')

	316 OP_END_FINALLY = _opcode('END_FINALLY')

	317 OP_COMPARE_OP = _opcode('COMPARE_OP')

	318 COMPARE_EXCEPTION = 10 # just have to get this const from the code.

	319 OP_LOAD_CONST = _opcode('LOAD_CONST')

	320 OP_RETURN_VALUE = _opcode('RETURN_VALUE')

	321

	322

	323 class ByteParser(object):

	324 """Parse byte codes to understand the structure of code."""

	325

	326 def __init__(self, code=None, text=None, filename=None):

	327 if code:

	328 self.code = code

	329 self.text = text

	330 else:

	331 if not text:

	332 assert filename, "If no code or text, need a filename"

	333 sourcef = open_source(filename)

	334 try:

	335 text = sourcef.read()

	336 finally:

	337 sourcef.close()

	338 self.text = text

	339

	340 try:

	341 # Python 2.3 and 2.4 don't like partial last lines, so be sure

	342 # the text ends nicely for them.

	343 self.code = compile(text + '\n', filename, "exec")

	344 except SyntaxError:

	345 _, synerr, _ = sys.exc_info()

	346 raise NotPython(

	347 "Couldn't parse '%s' as Python source: '%s' at line %d" %

	348 (filename, synerr.msg, synerr.lineno)

	349 )

	350

	351 # Alternative Python implementations don't always provide all the

	352 # attributes on code objects that we need to do the analysis.

	353 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:

	354 if not hasattr(self.code, attr):

	355 raise CoverageException(

	356 "This implementation of Python doesn't support code "

	357 "analysis.\n"

	358 "Run coverage.py under CPython for this command."

	359 )

	360

	361 def child_parsers(self):

	362 """Iterate over all the code objects nested within this one.

	363

	364 The iteration includes `self` as its first value.

	365

	366 """

	367 children = CodeObjects(self.code)

	368 return [ByteParser(code=c, text=self.text) for c in children]

	369

	370 # Getting numbers from the lnotab value changed in Py3.0.

	371 if sys.version_info >= (3, 0):

	372 def _lnotab_increments(self, lnotab):

	373 """Return a list of ints from the lnotab bytes in 3.x"""

	374 return list(lnotab)

	375 else:

	376 def _lnotab_increments(self, lnotab):

	377 """Return a list of ints from the lnotab string in 2.x"""

	378 return [ord(c) for c in lnotab]

	379

	380 def _bytes_lines(self):

	381 """Map byte offsets to line numbers in `code`.

	382

	383 Uses co_lnotab described in Python/compile.c to map byte offsets to

	384 line numbers. Returns a list: [(b0, l0), (b1, l1), ...]

	385

	386 """

	387 # Adapted from dis.py in the standard library.

	388 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])

	389 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])

	390

	391 bytes_lines = []

	392 last_line_num = None

	393 line_num = self.code.co_firstlineno

	394 byte_num = 0

	395 for byte_incr, line_incr in zip(byte_increments, line_increments):

	396 if byte_incr:

	397 if line_num != last_line_num:

	398 bytes_lines.append((byte_num, line_num))

	399 last_line_num = line_num

	400 byte_num += byte_incr

	401 line_num += line_incr

	402 if line_num != last_line_num:

	403 bytes_lines.append((byte_num, line_num))

	404 return bytes_lines

	405

	406 def _find_statements(self):

	407 """Find the statements in `self.code`.

	408

	409 Return a set of line numbers that start statements. Recurses into all

	410 code objects reachable from `self.code`.

	411

	412 """

	413 stmts = set()

	414 for bp in self.child_parsers():

	415 # Get all of the lineno information from this code.

	416 for _, l in bp._bytes_lines():

	417 stmts.add(l)

	418 return stmts

	419

	420 def _split_into_chunks(self):

	421 """Split the code object into a list of `Chunk` objects.

	422

	423 Each chunk is only entered at its first instruction, though there can

	424 be many exits from a chunk.

	425

	426 Returns a list of `Chunk` objects.

	427

	428 """

	429

	430 # The list of chunks so far, and the one we're working on.

	431 chunks = []

	432 chunk = None

	433 bytes_lines_map = dict(self._bytes_lines())

	434

	435 # The block stack: loops and try blocks get pushed here for the

	436 # implicit jumps that can occur.

	437 # Each entry is a tuple: (block type, destination)

	438 block_stack = []

	439

	440 # Some op codes are followed by branches that should be ignored. This

	441 # is a count of how many ignores are left.

	442 ignore_branch = 0

	443

	444 # We have to handle the last two bytecodes specially.

	445 ult = penult = None

	446

	447 for bc in ByteCodes(self.code.co_code):

	448 # Maybe have to start a new chunk

	449 if bc.offset in bytes_lines_map:

	450 # Start a new chunk for each source line number.

	451 if chunk:

	452 chunk.exits.add(bc.offset)

	453 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])

	454 chunks.append(chunk)

	455 elif bc.op in OPS_CHUNK_BEGIN:

	456 # Jumps deserve their own unnumbered chunk. This fixes

	457 # problems with jumps to jumps getting confused.

	458 if chunk:

	459 chunk.exits.add(bc.offset)

	460 chunk = Chunk(bc.offset)

	461 chunks.append(chunk)

	462

	463 if not chunk:

	464 chunk = Chunk(bc.offset)

	465 chunks.append(chunk)

	466

	467 # Look at the opcode

	468 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:

	469 if ignore_branch:

	470 # Someone earlier wanted us to ignore this branch.

	471 ignore_branch -= 1

	472 else:

	473 # The opcode has a jump, it's an exit for this chunk.

	474 chunk.exits.add(bc.jump_to)

	475

	476 if bc.op in OPS_CODE_END:

	477 # The opcode can exit the code object.

	478 chunk.exits.add(-self.code.co_firstlineno)

	479 if bc.op in OPS_PUSH_BLOCK:

	480 # The opcode adds a block to the block_stack.

	481 block_stack.append((bc.op, bc.jump_to))

	482 if bc.op in OPS_POP_BLOCK:

	483 # The opcode pops a block from the block stack.

	484 block_stack.pop()

	485 if bc.op in OPS_CHUNK_END:

	486 # This opcode forces the end of the chunk.

	487 if bc.op == OP_BREAK_LOOP:

	488 # A break is implicit: jump where the top of the

	489 # block_stack points.

	490 chunk.exits.add(block_stack[-1][1])

	491 chunk = None

	492 if bc.op == OP_END_FINALLY:

	493 if block_stack:

	494 # A break that goes through a finally will jump to whatever

	495 # block is on top of the stack.

	496 chunk.exits.add(block_stack[-1][1])

	497 # For the finally clause we need to find the closest exception

	498 # block, and use its jump target as an exit.

	499 for iblock in range(len(block_stack)-1, -1, -1):

	500 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:

	501 chunk.exits.add(block_stack[iblock][1])

	502 break

	503 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:

	504 # This is an except clause. We want to overlook the next

	505 # branch, so that except's don't count as branches.

	506 ignore_branch += 1

	507

	508 penult = ult

	509 ult = bc

	510

	511 if chunks:

	512 # The last two bytecodes could be a dummy "return None" that

	513 # shouldn't be counted as real code. Every Python code object seems

	514 # to end with a return, and a "return None" is inserted if there

	515 # isn't an explicit return in the source.

	516 if ult and penult:

	517 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:

	518 if self.code.co_consts[penult.arg] is None:

	519 # This is "return None", but is it dummy? A real line

	520 # would be a last chunk all by itself.

	521 if chunks[-1].byte != penult.offset:

	522 ex = -self.code.co_firstlineno

	523 # Split the last chunk

	524 last_chunk = chunks[-1]

	525 last_chunk.exits.remove(ex)

	526 last_chunk.exits.add(penult.offset)

	527 chunk = Chunk(penult.offset)

	528 chunk.exits.add(ex)

	529 chunks.append(chunk)

	530

	531 # Give all the chunks a length.

	532 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab le=W0631,C0301

	533 for i in range(len(chunks)-1):

	534 chunks[i].length = chunks[i+1].byte - chunks[i].byte

	535

	536 return chunks

	537

	538 def _arcs(self):

	539 """Find the executable arcs in the code.

	540

	541 Returns a set of pairs, (from,to). From and to are integer line

	542 numbers. If from is < 0, then the arc is an entrance into the code

	543 object. If to is < 0, the arc is an exit from the code object.

	544

	545 """

	546 chunks = self._split_into_chunks()

	547

	548 # A map from byte offsets to chunks jumped into.

	549 byte_chunks = dict([(c.byte, c) for c in chunks])

	550

	551 # Build a map from byte offsets to actual lines reached.

	552 byte_lines = {}

	553 bytes_to_add = set([c.byte for c in chunks])

	554

	555 while bytes_to_add:

	556 byte_to_add = bytes_to_add.pop()

	557 if byte_to_add in byte_lines or byte_to_add < 0:

	558 continue

	559

	560 # Which lines does this chunk lead to?

	561 bytes_considered = set()

	562 bytes_to_consider = [byte_to_add]

	563 lines = set()

	564

	565 while bytes_to_consider:

	566 byte = bytes_to_consider.pop()

	567 bytes_considered.add(byte)

	568

	569 # Find chunk for byte

	570 try:

	571 ch = byte_chunks[byte]

	572 except KeyError:

	573 for ch in chunks:

	574 if ch.byte <= byte < ch.byte+ch.length:

	575 break

	576 else:

	577 # No chunk for this byte!

	578 raise Exception("Couldn't find chunk @ %d" % byte)

	579 byte_chunks[byte] = ch # pylint: disable=W0631

	580

	581 if ch.line:

	582 lines.add(ch.line)

	583 else:

	584 for ex in ch.exits:

	585 if ex < 0:

	586 lines.add(ex)

	587 elif ex not in bytes_considered:

	588 bytes_to_consider.append(ex)

	589

	590 bytes_to_add.update(ch.exits)

	591

	592 byte_lines[byte_to_add] = lines

	593

	594 # Figure out for each chunk where the exits go.

	595 arcs = set()

	596 for chunk in chunks:

	597 if chunk.line:

	598 for ex in chunk.exits:

	599 if ex < 0:

	600 exit_lines = [ex]

	601 else:

	602 exit_lines = byte_lines[ex]

	603 for exit_line in exit_lines:

	604 if chunk.line != exit_line:

	605 arcs.add((chunk.line, exit_line))

	606 for line in byte_lines[0]:

	607 arcs.add((-1, line))

	608

	609 return arcs

	610

	611 def _all_chunks(self):

	612 """Returns a list of `Chunk` objects for this code and its children.

	613

	614 See `_split_into_chunks` for details.

	615

	616 """

	617 chunks = []

	618 for bp in self.child_parsers():

	619 chunks.extend(bp._split_into_chunks())

	620

	621 return chunks

	622

	623 def _all_arcs(self):

	624 """Get the set of all arcs in this code object and its children.

	625

	626 See `_arcs` for details.

	627

	628 """

	629 arcs = set()

	630 for bp in self.child_parsers():

	631 arcs.update(bp._arcs())

	632

	633 return arcs

	634

	635

	636 class Chunk(object):

	637 """A sequence of bytecodes with a single entrance.

	638

	639 To analyze byte code, we have to divide it into chunks, sequences of byte

	640 codes such that each basic block has only one entrance, the first

	641 instruction in the block.

	642

	643 This is almost the CS concept of `basic block`_, except that we're willing

	644 to have many exits from a chunk, and "basic block" is a more cumbersome

	645 term.

	646

	647 .. _basic block: http://en.wikipedia.org/wiki/Basic_block

	648

	649 An exit < 0 means the chunk can leave the code (return). The exit is

	650 the negative of the starting line number of the code block.

	651

	652 """

	653 def __init__(self, byte, line=0):

	654 self.byte = byte

	655 self.line = line

	656 self.length = 0

	657 self.exits = set()

	658

	659 def __repr__(self):

	660 return "<%d+%d @%d %r>" % (

	661 self.byte, self.length, self.line, list(self.exits)

	662 )

OLD	NEW

« no previous file with comments | « third_party/coverage-3.6/coverage/misc.py ('k') | third_party/coverage-3.6/coverage/phystokens.py » ('j') | no next file with comments »