| Index: tools/telemetry/third_party/coverage/coverage/parser.py
|
| diff --git a/third_party/pycoverage/coverage/parser.py b/tools/telemetry/third_party/coverage/coverage/parser.py
|
| similarity index 78%
|
| copy from third_party/pycoverage/coverage/parser.py
|
| copy to tools/telemetry/third_party/coverage/coverage/parser.py
|
| index 7a145a2a5346cc806848813566998bdee854d370..882c972b4e78f45bfb74ec9388987dc7d2ab8da9 100644
|
| --- a/third_party/pycoverage/coverage/parser.py
|
| +++ b/tools/telemetry/third_party/coverage/coverage/parser.py
|
| @@ -1,19 +1,26 @@
|
| -"""Code parsing for Coverage."""
|
| +# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
|
| +# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
|
|
|
| -import dis, re, sys, token, tokenize
|
| +"""Code parsing for coverage.py."""
|
|
|
| -from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
|
| -from coverage.backward import open_source, range # pylint: disable=W0622
|
| -from coverage.backward import reversed # pylint: disable=W0622
|
| +import collections
|
| +import dis
|
| +import re
|
| +import token
|
| +import tokenize
|
| +
|
| +from coverage.backward import range # pylint: disable=redefined-builtin
|
| from coverage.backward import bytes_to_ints
|
| from coverage.bytecode import ByteCodes, CodeObjects
|
| -from coverage.misc import nice_pair, expensive, join_regex
|
| +from coverage.misc import contract, nice_pair, expensive, join_regex
|
| from coverage.misc import CoverageException, NoSource, NotPython
|
| +from coverage.phystokens import compile_unicode, generate_tokens
|
|
|
|
|
| -class CodeParser(object):
|
| +class PythonParser(object):
|
| """Parse code to find executable lines, excluded lines, etc."""
|
|
|
| + @contract(text='unicode|None')
|
| def __init__(self, text=None, filename=None, exclude=None):
|
| """
|
| Source can be provided as `text`, the text itself, or `filename`, from
|
| @@ -21,25 +28,17 @@ class CodeParser(object):
|
| `exclude`, a regex.
|
|
|
| """
|
| - assert text or filename, "CodeParser needs either text or filename"
|
| + assert text or filename, "PythonParser needs either text or filename"
|
| self.filename = filename or "<code>"
|
| self.text = text
|
| if not self.text:
|
| + from coverage.python import get_python_source
|
| try:
|
| - sourcef = open_source(self.filename)
|
| - try:
|
| - self.text = sourcef.read()
|
| - finally:
|
| - sourcef.close()
|
| - except IOError:
|
| - _, err, _ = sys.exc_info()
|
| + self.text = get_python_source(self.filename)
|
| + except IOError as err:
|
| raise NoSource(
|
| "No source for code: '%s': %s" % (self.filename, err)
|
| - )
|
| -
|
| - # Scrap the BOM if it exists.
|
| - if self.text and ord(self.text[0]) == 0xfeff:
|
| - self.text = self.text[1:]
|
| + )
|
|
|
| self.exclude = exclude
|
|
|
| @@ -63,16 +62,16 @@ class CodeParser(object):
|
| # The line numbers that start statements.
|
| self.statement_starts = set()
|
|
|
| - # Lazily-created ByteParser
|
| + # Lazily-created ByteParser and arc data.
|
| self._byte_parser = None
|
| + self._all_arcs = None
|
|
|
| - def _get_byte_parser(self):
|
| + @property
|
| + def byte_parser(self):
|
| """Create a ByteParser on demand."""
|
| if not self._byte_parser:
|
| - self._byte_parser = \
|
| - ByteParser(text=self.text, filename=self.filename)
|
| + self._byte_parser = ByteParser(self.text, filename=self.filename)
|
| return self._byte_parser
|
| - byte_parser = property(_get_byte_parser)
|
|
|
| def lines_matching(self, *regexes):
|
| """Find the lines matching one of a list of regexes.
|
| @@ -84,9 +83,9 @@ class CodeParser(object):
|
| """
|
| regex_c = re.compile(join_regex(regexes))
|
| matches = set()
|
| - for i, ltext in enumerate(self.lines):
|
| + for i, ltext in enumerate(self.lines, start=1):
|
| if regex_c.search(ltext):
|
| - matches.add(i+1)
|
| + matches.add(i)
|
| return matches
|
|
|
| def _raw_parse(self):
|
| @@ -114,7 +113,7 @@ class CodeParser(object):
|
| print("%10s %5s %-20r %r" % (
|
| tokenize.tok_name.get(toktype, toktype),
|
| nice_pair((slineno, elineno)), ttext, ltext
|
| - ))
|
| + ))
|
| if toktype == token.INDENT:
|
| indent += 1
|
| elif toktype == token.DEDENT:
|
| @@ -142,9 +141,8 @@ class CodeParser(object):
|
| # We're at the end of a line, and we've ended on a
|
| # different line than the first line of the statement,
|
| # so record a multi-line range.
|
| - rng = (first_line, elineno)
|
| for l in range(first_line, elineno+1):
|
| - self.multiline[l] = rng
|
| + self.multiline[l] = first_line
|
| first_line = None
|
|
|
| if ttext.strip() and toktype != tokenize.COMMENT:
|
| @@ -168,34 +166,33 @@ class CodeParser(object):
|
|
|
| def first_line(self, line):
|
| """Return the first line number of the statement including `line`."""
|
| - rng = self.multiline.get(line)
|
| - if rng:
|
| - first_line = rng[0]
|
| + first_line = self.multiline.get(line)
|
| + if first_line:
|
| + return first_line
|
| else:
|
| - first_line = line
|
| - return first_line
|
| + return line
|
|
|
| - def first_lines(self, lines, *ignores):
|
| + def first_lines(self, lines):
|
| """Map the line numbers in `lines` to the correct first line of the
|
| statement.
|
|
|
| - Skip any line mentioned in any of the sequences in `ignores`.
|
| -
|
| Returns a set of the first lines.
|
|
|
| """
|
| - ignore = set()
|
| - for ign in ignores:
|
| - ignore.update(ign)
|
| - lset = set()
|
| - for l in lines:
|
| - if l in ignore:
|
| - continue
|
| - new_l = self.first_line(l)
|
| - if new_l not in ignore:
|
| - lset.add(new_l)
|
| - return lset
|
| + return set(self.first_line(l) for l in lines)
|
| +
|
| + def translate_lines(self, lines):
|
| + """Implement `FileReporter.translate_lines`."""
|
| + return self.first_lines(lines)
|
| +
|
| + def translate_arcs(self, arcs):
|
| + """Implement `FileReporter.translate_arcs`."""
|
| + return [
|
| + (self.first_line(a), self.first_line(b))
|
| + for (a, b) in arcs
|
| + ]
|
|
|
| + @expensive
|
| def parse_source(self):
|
| """Parse source text to find executable lines, excluded lines, etc.
|
|
|
| @@ -208,47 +205,51 @@ class CodeParser(object):
|
| """
|
| try:
|
| self._raw_parse()
|
| - except (tokenize.TokenError, IndentationError):
|
| - _, tokerr, _ = sys.exc_info()
|
| - msg, lineno = tokerr.args
|
| + except (tokenize.TokenError, IndentationError) as err:
|
| + if hasattr(err, "lineno"):
|
| + lineno = err.lineno # IndentationError
|
| + else:
|
| + lineno = err.args[1][0] # TokenError
|
| raise NotPython(
|
| - "Couldn't parse '%s' as Python source: '%s' at %s" %
|
| - (self.filename, msg, lineno)
|
| + "Couldn't parse '%s' as Python source: '%s' at line %d" % (
|
| + self.filename, err.args[0], lineno
|
| )
|
| + )
|
|
|
| excluded_lines = self.first_lines(self.excluded)
|
| - lines = self.first_lines(
|
| - self.statement_starts,
|
| - excluded_lines,
|
| - self.docstrings
|
| - )
|
| + ignore = set()
|
| + ignore.update(excluded_lines)
|
| + ignore.update(self.docstrings)
|
| + starts = self.statement_starts - ignore
|
| + lines = self.first_lines(starts)
|
| + lines -= ignore
|
|
|
| return lines, excluded_lines
|
|
|
| def arcs(self):
|
| """Get information about the arcs available in the code.
|
|
|
| - Returns a sorted list of line number pairs. Line numbers have been
|
| - normalized to the first line of multiline statements.
|
| + Returns a set of line number pairs. Line numbers have been normalized
|
| + to the first line of multi-line statements.
|
|
|
| """
|
| - all_arcs = []
|
| - for l1, l2 in self.byte_parser._all_arcs():
|
| - fl1 = self.first_line(l1)
|
| - fl2 = self.first_line(l2)
|
| - if fl1 != fl2:
|
| - all_arcs.append((fl1, fl2))
|
| - return sorted(all_arcs)
|
| - arcs = expensive(arcs)
|
| + if self._all_arcs is None:
|
| + self._all_arcs = set()
|
| + for l1, l2 in self.byte_parser._all_arcs():
|
| + fl1 = self.first_line(l1)
|
| + fl2 = self.first_line(l2)
|
| + if fl1 != fl2:
|
| + self._all_arcs.add((fl1, fl2))
|
| + return self._all_arcs
|
|
|
| def exit_counts(self):
|
| - """Get a mapping from line numbers to count of exits from that line.
|
| + """Get a count of exits from that each line.
|
|
|
| Excluded lines are excluded.
|
|
|
| """
|
| excluded_lines = self.first_lines(self.excluded)
|
| - exit_counts = {}
|
| + exit_counts = collections.defaultdict(int)
|
| for l1, l2 in self.arcs():
|
| if l1 < 0:
|
| # Don't ever report -1 as a line number
|
| @@ -259,18 +260,15 @@ class CodeParser(object):
|
| if l2 in excluded_lines:
|
| # Arcs to excluded lines shouldn't count.
|
| continue
|
| - if l1 not in exit_counts:
|
| - exit_counts[l1] = 0
|
| exit_counts[l1] += 1
|
|
|
| # Class definitions have one extra exit, so remove one for each:
|
| for l in self.classdefs:
|
| - # Ensure key is there: classdefs can include excluded lines.
|
| + # Ensure key is there: class definitions can include excluded lines.
|
| if l in exit_counts:
|
| exit_counts[l] -= 1
|
|
|
| return exit_counts
|
| - exit_counts = expensive(exit_counts)
|
|
|
|
|
| ## Opcodes that guide the ByteParser.
|
| @@ -279,6 +277,7 @@ def _opcode(name):
|
| """Return the opcode by name from the dis module."""
|
| return dis.opmap[name]
|
|
|
| +
|
| def _opcode_set(*names):
|
| """Return a set of opcodes by the names in `names`."""
|
| s = set()
|
| @@ -296,7 +295,7 @@ OPS_CODE_END = _opcode_set('RETURN_VALUE')
|
| OPS_CHUNK_END = _opcode_set(
|
| 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
|
| 'BREAK_LOOP', 'CONTINUE_LOOP',
|
| - )
|
| +)
|
|
|
| # Opcodes that unconditionally begin a new code chunk. By starting new chunks
|
| # with unconditional jump instructions, we neatly deal with jumps to jumps
|
| @@ -306,7 +305,7 @@ OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
|
| # Opcodes that push a block on the block stack.
|
| OPS_PUSH_BLOCK = _opcode_set(
|
| 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
|
| - )
|
| +)
|
|
|
| # Block types for exception handling.
|
| OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
|
| @@ -321,7 +320,7 @@ OPS_NO_JUMP = OPS_PUSH_BLOCK
|
| OP_BREAK_LOOP = _opcode('BREAK_LOOP')
|
| OP_END_FINALLY = _opcode('END_FINALLY')
|
| OP_COMPARE_OP = _opcode('COMPARE_OP')
|
| -COMPARE_EXCEPTION = 10 # just have to get this const from the code.
|
| +COMPARE_EXCEPTION = 10 # just have to get this constant from the code.
|
| OP_LOAD_CONST = _opcode('LOAD_CONST')
|
| OP_RETURN_VALUE = _opcode('RETURN_VALUE')
|
|
|
| @@ -329,40 +328,29 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE')
|
| class ByteParser(object):
|
| """Parse byte codes to understand the structure of code."""
|
|
|
| - def __init__(self, code=None, text=None, filename=None):
|
| + @contract(text='unicode')
|
| + def __init__(self, text, code=None, filename=None):
|
| + self.text = text
|
| if code:
|
| self.code = code
|
| - self.text = text
|
| else:
|
| - if not text:
|
| - assert filename, "If no code or text, need a filename"
|
| - sourcef = open_source(filename)
|
| - try:
|
| - text = sourcef.read()
|
| - finally:
|
| - sourcef.close()
|
| - self.text = text
|
| -
|
| try:
|
| - # Python 2.3 and 2.4 don't like partial last lines, so be sure
|
| - # the text ends nicely for them.
|
| - self.code = compile(text + '\n', filename, "exec")
|
| - except SyntaxError:
|
| - _, synerr, _ = sys.exc_info()
|
| + self.code = compile_unicode(text, filename, "exec")
|
| + except SyntaxError as synerr:
|
| raise NotPython(
|
| - "Couldn't parse '%s' as Python source: '%s' at line %d" %
|
| - (filename, synerr.msg, synerr.lineno)
|
| + "Couldn't parse '%s' as Python source: '%s' at line %d" % (
|
| + filename, synerr.msg, synerr.lineno
|
| )
|
| + )
|
|
|
| # Alternative Python implementations don't always provide all the
|
| # attributes on code objects that we need to do the analysis.
|
| for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
|
| if not hasattr(self.code, attr):
|
| raise CoverageException(
|
| - "This implementation of Python doesn't support code "
|
| - "analysis.\n"
|
| + "This implementation of Python doesn't support code analysis.\n"
|
| "Run coverage.py under CPython for this command."
|
| - )
|
| + )
|
|
|
| def child_parsers(self):
|
| """Iterate over all the code objects nested within this one.
|
| @@ -371,7 +359,7 @@ class ByteParser(object):
|
|
|
| """
|
| children = CodeObjects(self.code)
|
| - return [ByteParser(code=c, text=self.text) for c in children]
|
| + return (ByteParser(self.text, code=c) for c in children)
|
|
|
| def _bytes_lines(self):
|
| """Map byte offsets to line numbers in `code`.
|
| @@ -412,10 +400,10 @@ class ByteParser(object):
|
| for _, l in bp._bytes_lines():
|
| yield l
|
|
|
| - def _block_stack_repr(self, block_stack):
|
| + def _block_stack_repr(self, block_stack): # pragma: debugging
|
| """Get a string version of `block_stack`, for debugging."""
|
| blocks = ", ".join(
|
| - ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
|
| + "(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack
|
| )
|
| return "[" + blocks + "]"
|
|
|
| @@ -458,7 +446,7 @@ class ByteParser(object):
|
|
|
| # Walk the byte codes building chunks.
|
| for bc in bytecodes:
|
| - # Maybe have to start a new chunk
|
| + # Maybe have to start a new chunk.
|
| start_new_chunk = False
|
| first_chunk = False
|
| if bc.offset in bytes_lines_map:
|
| @@ -479,9 +467,13 @@ class ByteParser(object):
|
| if chunk:
|
| chunk.exits.add(bc.offset)
|
| chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
|
| + if not chunks:
|
| + # The very first chunk of a code object is always an
|
| + # entrance.
|
| + chunk.entrance = True
|
| chunks.append(chunk)
|
|
|
| - # Look at the opcode
|
| + # Look at the opcode.
|
| if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
|
| if ignore_branch:
|
| # Someone earlier wanted us to ignore this branch.
|
| @@ -544,19 +536,19 @@ class ByteParser(object):
|
| chunks.append(chunk)
|
|
|
| # Give all the chunks a length.
|
| - chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
|
| + chunks[-1].length = bc.next_offset - chunks[-1].byte
|
| for i in range(len(chunks)-1):
|
| chunks[i].length = chunks[i+1].byte - chunks[i].byte
|
|
|
| #self.validate_chunks(chunks)
|
| return chunks
|
|
|
| - def validate_chunks(self, chunks):
|
| + def validate_chunks(self, chunks): # pragma: debugging
|
| """Validate the rule that chunks have a single entrance."""
|
| # starts is the entrances to the chunks
|
| - starts = set([ch.byte for ch in chunks])
|
| + starts = set(ch.byte for ch in chunks)
|
| for ch in chunks:
|
| - assert all([(ex in starts or ex < 0) for ex in ch.exits])
|
| + assert all((ex in starts or ex < 0) for ex in ch.exits)
|
|
|
| def _arcs(self):
|
| """Find the executable arcs in the code.
|
| @@ -568,15 +560,15 @@ class ByteParser(object):
|
| """
|
| chunks = self._split_into_chunks()
|
|
|
| - # A map from byte offsets to chunks jumped into.
|
| - byte_chunks = dict([(c.byte, c) for c in chunks])
|
| -
|
| - # There's always an entrance at the first chunk.
|
| - yield (-1, byte_chunks[0].line)
|
| + # A map from byte offsets to the chunk starting at that offset.
|
| + byte_chunks = dict((c.byte, c) for c in chunks)
|
|
|
| # Traverse from the first chunk in each line, and yield arcs where
|
| # the trace function will be invoked.
|
| for chunk in chunks:
|
| + if chunk.entrance:
|
| + yield (-1, chunk.line)
|
| +
|
| if not chunk.first:
|
| continue
|
|
|
| @@ -584,7 +576,7 @@ class ByteParser(object):
|
| chunks_to_consider = [chunk]
|
| while chunks_to_consider:
|
| # Get the chunk we're considering, and make sure we don't
|
| - # consider it again
|
| + # consider it again.
|
| this_chunk = chunks_to_consider.pop()
|
| chunks_considered.add(this_chunk)
|
|
|
| @@ -647,6 +639,8 @@ class Chunk(object):
|
|
|
| .. _basic block: http://en.wikipedia.org/wiki/Basic_block
|
|
|
| + `byte` is the offset to the bytecode starting this chunk.
|
| +
|
| `line` is the source line number containing this chunk.
|
|
|
| `first` is true if this is the first chunk in the source line.
|
| @@ -654,47 +648,24 @@ class Chunk(object):
|
| An exit < 0 means the chunk can leave the code (return). The exit is
|
| the negative of the starting line number of the code block.
|
|
|
| + The `entrance` attribute is a boolean indicating whether the code object
|
| + can be entered at this chunk.
|
| +
|
| """
|
| def __init__(self, byte, line, first):
|
| self.byte = byte
|
| self.line = line
|
| self.first = first
|
| self.length = 0
|
| + self.entrance = False
|
| self.exits = set()
|
|
|
| def __repr__(self):
|
| - if self.first:
|
| - bang = "!"
|
| - else:
|
| - bang = ""
|
| - return "<%d+%d @%d%s %r>" % (
|
| - self.byte, self.length, self.line, bang, list(self.exits)
|
| - )
|
| -
|
| -
|
| -class CachedTokenizer(object):
|
| - """A one-element cache around tokenize.generate_tokens.
|
| -
|
| - When reporting, coverage.py tokenizes files twice, once to find the
|
| - structure of the file, and once to syntax-color it. Tokenizing is
|
| - expensive, and easily cached.
|
| -
|
| - This is a one-element cache so that our twice-in-a-row tokenizing doesn't
|
| - actually tokenize twice.
|
| -
|
| - """
|
| - def __init__(self):
|
| - self.last_text = None
|
| - self.last_tokens = None
|
| -
|
| - def generate_tokens(self, text):
|
| - """A stand-in for `tokenize.generate_tokens`."""
|
| - if text != self.last_text:
|
| - self.last_text = text
|
| - self.last_tokens = list(
|
| - tokenize.generate_tokens(StringIO(text).readline)
|
| - )
|
| - return self.last_tokens
|
| -
|
| -# Create our generate_tokens cache as a callable replacement function.
|
| -generate_tokens = CachedTokenizer().generate_tokens
|
| + return "<%d+%d @%d%s%s %r>" % (
|
| + self.byte,
|
| + self.length,
|
| + self.line,
|
| + "!" if self.first else "",
|
| + "v" if self.entrance else "",
|
| + list(self.exits),
|
| + )
|
|
|