tools/telemetry/third_party/coverage/coverage/parser.py - Issue 1366913004: Add coverage

Unified Diff: tools/telemetry/third_party/coverage/coverage/parser.py

Issue 1366913004: Add coverage Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « tools/telemetry/third_party/coverage/coverage/monkey.py ('k') | tools/telemetry/third_party/coverage/coverage/phystokens.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/third_party/coverage/coverage/parser.py

diff --git a/third_party/pycoverage/coverage/parser.py b/tools/telemetry/third_party/coverage/coverage/parser.py

similarity index 78%

copy from third_party/pycoverage/coverage/parser.py

copy to tools/telemetry/third_party/coverage/coverage/parser.py

index 7a145a2a5346cc806848813566998bdee854d370..882c972b4e78f45bfb74ec9388987dc7d2ab8da9 100644

--- a/third_party/pycoverage/coverage/parser.py

+++ b/tools/telemetry/third_party/coverage/coverage/parser.py

@@ -1,19 +1,26 @@

-"""Code parsing for Coverage."""

+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0

+# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt

-import dis, re, sys, token, tokenize

+"""Code parsing for coverage.py."""

-from coverage.backward import set, sorted, StringIO # pylint: disable=W0622

-from coverage.backward import open_source, range # pylint: disable=W0622

-from coverage.backward import reversed # pylint: disable=W0622

+import collections

+import dis

+import re

+import token

+import tokenize

+from coverage.backward import range # pylint: disable=redefined-builtin

from coverage.backward import bytes_to_ints

from coverage.bytecode import ByteCodes, CodeObjects

-from coverage.misc import nice_pair, expensive, join_regex

+from coverage.misc import contract, nice_pair, expensive, join_regex

from coverage.misc import CoverageException, NoSource, NotPython

+from coverage.phystokens import compile_unicode, generate_tokens

-class CodeParser(object):

+class PythonParser(object):

"""Parse code to find executable lines, excluded lines, etc."""

+ @contract(text='unicode|None')

def __init__(self, text=None, filename=None, exclude=None):

"""

Source can be provided as `text`, the text itself, or `filename`, from

@@ -21,25 +28,17 @@ class CodeParser(object):

`exclude`, a regex.

"""

- assert text or filename, "CodeParser needs either text or filename"

+ assert text or filename, "PythonParser needs either text or filename"

self.filename = filename or "<code>"

self.text = text

if not self.text:

+ from coverage.python import get_python_source

try:

- sourcef = open_source(self.filename)

- try:

- self.text = sourcef.read()

- finally:

- sourcef.close()

- except IOError:

- _, err, _ = sys.exc_info()

+ self.text = get_python_source(self.filename)

+ except IOError as err:

raise NoSource(

"No source for code: '%s': %s" % (self.filename, err)

- )

- # Scrap the BOM if it exists.

- if self.text and ord(self.text[0]) == 0xfeff:

- self.text = self.text[1:]

+ )

self.exclude = exclude

@@ -63,16 +62,16 @@ class CodeParser(object):

# The line numbers that start statements.

self.statement_starts = set()

- # Lazily-created ByteParser

+ # Lazily-created ByteParser and arc data.

self._byte_parser = None

+ self._all_arcs = None

- def _get_byte_parser(self):

+ @property

+ def byte_parser(self):

"""Create a ByteParser on demand."""

if not self._byte_parser:

- self._byte_parser = \

- ByteParser(text=self.text, filename=self.filename)

+ self._byte_parser = ByteParser(self.text, filename=self.filename)

return self._byte_parser

- byte_parser = property(_get_byte_parser)

def lines_matching(self, *regexes):

"""Find the lines matching one of a list of regexes.

@@ -84,9 +83,9 @@ class CodeParser(object):

"""

regex_c = re.compile(join_regex(regexes))

matches = set()

- for i, ltext in enumerate(self.lines):

+ for i, ltext in enumerate(self.lines, start=1):

if regex_c.search(ltext):

- matches.add(i+1)

+ matches.add(i)

return matches

def _raw_parse(self):

@@ -114,7 +113,7 @@ class CodeParser(object):

print("%10s %5s %-20r %r" % (

tokenize.tok_name.get(toktype, toktype),

nice_pair((slineno, elineno)), ttext, ltext

- ))

+ ))

if toktype == token.INDENT:

indent += 1

elif toktype == token.DEDENT:

@@ -142,9 +141,8 @@ class CodeParser(object):

# We're at the end of a line, and we've ended on a

# different line than the first line of the statement,

# so record a multi-line range.

- rng = (first_line, elineno)

for l in range(first_line, elineno+1):

- self.multiline[l] = rng

+ self.multiline[l] = first_line

first_line = None

if ttext.strip() and toktype != tokenize.COMMENT:

@@ -168,34 +166,33 @@ class CodeParser(object):

def first_line(self, line):

"""Return the first line number of the statement including `line`."""

- rng = self.multiline.get(line)

- if rng:

- first_line = rng[0]

+ first_line = self.multiline.get(line)

+ if first_line:

+ return first_line

else:

- first_line = line

- return first_line

+ return line

- def first_lines(self, lines, *ignores):

+ def first_lines(self, lines):

"""Map the line numbers in `lines` to the correct first line of the

statement.

- Skip any line mentioned in any of the sequences in `ignores`.

Returns a set of the first lines.

"""

- ignore = set()

- for ign in ignores:

- ignore.update(ign)

- lset = set()

- for l in lines:

- if l in ignore:

- continue

- new_l = self.first_line(l)

- if new_l not in ignore:

- lset.add(new_l)

- return lset

+ return set(self.first_line(l) for l in lines)

+ def translate_lines(self, lines):

+ """Implement `FileReporter.translate_lines`."""

+ return self.first_lines(lines)

+ def translate_arcs(self, arcs):

+ """Implement `FileReporter.translate_arcs`."""

+ return [

+ (self.first_line(a), self.first_line(b))

+ for (a, b) in arcs

+ ]

+ @expensive

def parse_source(self):

"""Parse source text to find executable lines, excluded lines, etc.

@@ -208,47 +205,51 @@ class CodeParser(object):

"""

try:

self._raw_parse()

- except (tokenize.TokenError, IndentationError):

- _, tokerr, _ = sys.exc_info()

- msg, lineno = tokerr.args

+ except (tokenize.TokenError, IndentationError) as err:

+ if hasattr(err, "lineno"):

+ lineno = err.lineno # IndentationError

+ else:

+ lineno = err.args[1][0] # TokenError

raise NotPython(

- "Couldn't parse '%s' as Python source: '%s' at %s" %

- (self.filename, msg, lineno)

+ "Couldn't parse '%s' as Python source: '%s' at line %d" % (

+ self.filename, err.args[0], lineno

)

+ )

excluded_lines = self.first_lines(self.excluded)

- lines = self.first_lines(

- self.statement_starts,

- excluded_lines,

- self.docstrings

- )

+ ignore = set()

+ ignore.update(excluded_lines)

+ ignore.update(self.docstrings)

+ starts = self.statement_starts - ignore

+ lines = self.first_lines(starts)

+ lines -= ignore

return lines, excluded_lines

def arcs(self):

"""Get information about the arcs available in the code.

- Returns a sorted list of line number pairs. Line numbers have been

- normalized to the first line of multiline statements.

+ Returns a set of line number pairs. Line numbers have been normalized

+ to the first line of multi-line statements.

"""

- all_arcs = []

- for l1, l2 in self.byte_parser._all_arcs():

- fl1 = self.first_line(l1)

- fl2 = self.first_line(l2)

- if fl1 != fl2:

- all_arcs.append((fl1, fl2))

- return sorted(all_arcs)

- arcs = expensive(arcs)

+ if self._all_arcs is None:

+ self._all_arcs = set()

+ for l1, l2 in self.byte_parser._all_arcs():

+ fl1 = self.first_line(l1)

+ fl2 = self.first_line(l2)

+ if fl1 != fl2:

+ self._all_arcs.add((fl1, fl2))

+ return self._all_arcs

def exit_counts(self):

- """Get a mapping from line numbers to count of exits from that line.

+ """Get a count of exits from that each line.

Excluded lines are excluded.

"""

excluded_lines = self.first_lines(self.excluded)

- exit_counts = {}

+ exit_counts = collections.defaultdict(int)

for l1, l2 in self.arcs():

if l1 < 0:

# Don't ever report -1 as a line number

@@ -259,18 +260,15 @@ class CodeParser(object):

if l2 in excluded_lines:

# Arcs to excluded lines shouldn't count.

continue

- if l1 not in exit_counts:

- exit_counts[l1] = 0

exit_counts[l1] += 1

# Class definitions have one extra exit, so remove one for each:

for l in self.classdefs:

- # Ensure key is there: classdefs can include excluded lines.

+ # Ensure key is there: class definitions can include excluded lines.

if l in exit_counts:

exit_counts[l] -= 1

return exit_counts

- exit_counts = expensive(exit_counts)

## Opcodes that guide the ByteParser.

@@ -279,6 +277,7 @@ def _opcode(name):

"""Return the opcode by name from the dis module."""

return dis.opmap[name]

def _opcode_set(*names):

"""Return a set of opcodes by the names in `names`."""

s = set()

@@ -296,7 +295,7 @@ OPS_CODE_END = _opcode_set('RETURN_VALUE')

OPS_CHUNK_END = _opcode_set(

'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',

'BREAK_LOOP', 'CONTINUE_LOOP',

- )

# Opcodes that unconditionally begin a new code chunk. By starting new chunks

# with unconditional jump instructions, we neatly deal with jumps to jumps

@@ -306,7 +305,7 @@ OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')

# Opcodes that push a block on the block stack.

OPS_PUSH_BLOCK = _opcode_set(

'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'

- )

# Block types for exception handling.

OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')

@@ -321,7 +320,7 @@ OPS_NO_JUMP = OPS_PUSH_BLOCK

OP_BREAK_LOOP = _opcode('BREAK_LOOP')

OP_END_FINALLY = _opcode('END_FINALLY')

OP_COMPARE_OP = _opcode('COMPARE_OP')

-COMPARE_EXCEPTION = 10 # just have to get this const from the code.

+COMPARE_EXCEPTION = 10 # just have to get this constant from the code.

OP_LOAD_CONST = _opcode('LOAD_CONST')

OP_RETURN_VALUE = _opcode('RETURN_VALUE')

@@ -329,40 +328,29 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE')

class ByteParser(object):

"""Parse byte codes to understand the structure of code."""

- def __init__(self, code=None, text=None, filename=None):

+ @contract(text='unicode')

+ def __init__(self, text, code=None, filename=None):

+ self.text = text

if code:

self.code = code

- self.text = text

else:

- if not text:

- assert filename, "If no code or text, need a filename"

- sourcef = open_source(filename)

- try:

- text = sourcef.read()

- finally:

- sourcef.close()

- self.text = text

try:

- # Python 2.3 and 2.4 don't like partial last lines, so be sure

- # the text ends nicely for them.

- self.code = compile(text + '\n', filename, "exec")

- except SyntaxError:

- _, synerr, _ = sys.exc_info()

+ self.code = compile_unicode(text, filename, "exec")

+ except SyntaxError as synerr:

raise NotPython(

- "Couldn't parse '%s' as Python source: '%s' at line %d" %

- (filename, synerr.msg, synerr.lineno)

+ "Couldn't parse '%s' as Python source: '%s' at line %d" % (

+ filename, synerr.msg, synerr.lineno

)

+ )

# Alternative Python implementations don't always provide all the

# attributes on code objects that we need to do the analysis.

for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:

if not hasattr(self.code, attr):

raise CoverageException(

- "This implementation of Python doesn't support code "

- "analysis.\n"

+ "This implementation of Python doesn't support code analysis.\n"

"Run coverage.py under CPython for this command."

- )

+ )

def child_parsers(self):

"""Iterate over all the code objects nested within this one.

@@ -371,7 +359,7 @@ class ByteParser(object):

"""

children = CodeObjects(self.code)

- return [ByteParser(code=c, text=self.text) for c in children]

+ return (ByteParser(self.text, code=c) for c in children)

def _bytes_lines(self):

"""Map byte offsets to line numbers in `code`.

@@ -412,10 +400,10 @@ class ByteParser(object):

for _, l in bp._bytes_lines():

yield l

- def _block_stack_repr(self, block_stack):

+ def _block_stack_repr(self, block_stack): # pragma: debugging

"""Get a string version of `block_stack`, for debugging."""

blocks = ", ".join(

- ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]

+ "(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack

)

return "[" + blocks + "]"

@@ -458,7 +446,7 @@ class ByteParser(object):

# Walk the byte codes building chunks.

for bc in bytecodes:

- # Maybe have to start a new chunk

+ # Maybe have to start a new chunk.

start_new_chunk = False

first_chunk = False

if bc.offset in bytes_lines_map:

@@ -479,9 +467,13 @@ class ByteParser(object):

if chunk:

chunk.exits.add(bc.offset)

chunk = Chunk(bc.offset, chunk_lineno, first_chunk)

+ if not chunks:

+ # The very first chunk of a code object is always an

+ # entrance.

+ chunk.entrance = True

chunks.append(chunk)

- # Look at the opcode

+ # Look at the opcode.

if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:

if ignore_branch:

# Someone earlier wanted us to ignore this branch.

@@ -544,19 +536,19 @@ class ByteParser(object):

chunks.append(chunk)

# Give all the chunks a length.

- chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301

+ chunks[-1].length = bc.next_offset - chunks[-1].byte

for i in range(len(chunks)-1):

chunks[i].length = chunks[i+1].byte - chunks[i].byte

#self.validate_chunks(chunks)

return chunks

- def validate_chunks(self, chunks):

+ def validate_chunks(self, chunks): # pragma: debugging

"""Validate the rule that chunks have a single entrance."""

# starts is the entrances to the chunks

- starts = set([ch.byte for ch in chunks])

+ starts = set(ch.byte for ch in chunks)

for ch in chunks:

- assert all([(ex in starts or ex < 0) for ex in ch.exits])

+ assert all((ex in starts or ex < 0) for ex in ch.exits)

def _arcs(self):

"""Find the executable arcs in the code.

@@ -568,15 +560,15 @@ class ByteParser(object):

"""

chunks = self._split_into_chunks()

- # A map from byte offsets to chunks jumped into.

- byte_chunks = dict([(c.byte, c) for c in chunks])

- # There's always an entrance at the first chunk.

- yield (-1, byte_chunks[0].line)

+ # A map from byte offsets to the chunk starting at that offset.

+ byte_chunks = dict((c.byte, c) for c in chunks)

# Traverse from the first chunk in each line, and yield arcs where

# the trace function will be invoked.

for chunk in chunks:

+ if chunk.entrance:

+ yield (-1, chunk.line)

if not chunk.first:

continue

@@ -584,7 +576,7 @@ class ByteParser(object):

chunks_to_consider = [chunk]

while chunks_to_consider:

# Get the chunk we're considering, and make sure we don't

- # consider it again

+ # consider it again.

this_chunk = chunks_to_consider.pop()

chunks_considered.add(this_chunk)

@@ -647,6 +639,8 @@ class Chunk(object):

.. _basic block: http://en.wikipedia.org/wiki/Basic_block

+ `byte` is the offset to the bytecode starting this chunk.

`line` is the source line number containing this chunk.

`first` is true if this is the first chunk in the source line.

@@ -654,47 +648,24 @@ class Chunk(object):

An exit < 0 means the chunk can leave the code (return). The exit is

the negative of the starting line number of the code block.

+ The `entrance` attribute is a boolean indicating whether the code object

+ can be entered at this chunk.

"""

def __init__(self, byte, line, first):

self.byte = byte

self.line = line

self.first = first

self.length = 0

+ self.entrance = False

self.exits = set()

def __repr__(self):

- if self.first:

- bang = "!"

- else:

- bang = ""

- return "<%d+%d @%d%s %r>" % (

- self.byte, self.length, self.line, bang, list(self.exits)

- )

-class CachedTokenizer(object):

- """A one-element cache around tokenize.generate_tokens.

- When reporting, coverage.py tokenizes files twice, once to find the

- structure of the file, and once to syntax-color it. Tokenizing is

- expensive, and easily cached.

- This is a one-element cache so that our twice-in-a-row tokenizing doesn't

- actually tokenize twice.

- """

- def __init__(self):

- self.last_text = None

- self.last_tokens = None

- def generate_tokens(self, text):

- """A stand-in for `tokenize.generate_tokens`."""

- if text != self.last_text:

- self.last_text = text

- self.last_tokens = list(

- tokenize.generate_tokens(StringIO(text).readline)

- )

- return self.last_tokens

-# Create our generate_tokens cache as a callable replacement function.

-generate_tokens = CachedTokenizer().generate_tokens

+ return "<%d+%d @%d%s%s %r>" % (

+ self.byte,

+ self.length,

+ self.line,

+ "!" if self.first else "",

+ "v" if self.entrance else "",

+ list(self.exits),

+ )