tools/telemetry/third_party/coverage/coverage/phystokens.py - Issue 1366913004: Add coverage

Unified Diff: tools/telemetry/third_party/coverage/coverage/phystokens.py

Issue 1366913004: Add coverage Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « tools/telemetry/third_party/coverage/coverage/parser.py ('k') | tools/telemetry/third_party/coverage/coverage/pickle2json.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/third_party/coverage/coverage/phystokens.py

diff --git a/third_party/pycoverage/coverage/phystokens.py b/tools/telemetry/third_party/coverage/coverage/phystokens.py

similarity index 61%

copy from third_party/pycoverage/coverage/phystokens.py

copy to tools/telemetry/third_party/coverage/coverage/phystokens.py

index 99b1d5ba0c79771e43338cc8a37ce09e7085d7e2..7092d39e2b055b1bb875041c590c59508e8c9f33 100644

--- a/third_party/pycoverage/coverage/phystokens.py

+++ b/tools/telemetry/third_party/coverage/coverage/phystokens.py

@@ -1,8 +1,17 @@

+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0

+# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt

"""Better tokenizing for coverage.py."""

-import codecs, keyword, re, sys, token, tokenize

-from coverage.backward import set # pylint: disable=W0622

-from coverage.parser import generate_tokens

+import codecs

+import keyword

+import re

+import token

+import tokenize

+from coverage import env

+from coverage.backward import iternext

+from coverage.misc import contract

def phys_tokens(toks):

@@ -43,7 +52,7 @@ def phys_tokens(toks):

inject_backslash = False

elif ttype == token.STRING:

if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':

- # It's a multiline string and the first line ends with

+ # It's a multi-line string and the first line ends with

# a backslash, so we don't need to inject another.

inject_backslash = False

if inject_backslash:

@@ -61,6 +70,7 @@ def phys_tokens(toks):

last_lineno = elineno

+@contract(source='unicode')

def source_token_lines(source):

"""Generate a series of lines, one for each line in `source`.

@@ -76,11 +86,15 @@ def source_token_lines(source):

is indistinguishable from a final line with a newline.

"""

ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])

line = []

col = 0

- source = source.expandtabs(8).replace('\r\n', '\n')

+ # The \f is because of http://bugs.python.org/issue19035

+ source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')

tokgen = generate_tokens(source)

for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):

mark_start = True

for part in re.split('(\n)', ttext):

@@ -95,7 +109,7 @@ def source_token_lines(source):

mark_end = False

else:

if mark_start and scol > col:

- line.append(("ws", " " * (scol - col)))

+ line.append(("ws", u" " * (scol - col)))

mark_start = False

tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]

if ttype == token.NAME and keyword.iskeyword(ttext):

@@ -109,22 +123,52 @@ def source_token_lines(source):

if line:

yield line

-def source_encoding(source):

- """Determine the encoding for `source` (a string), according to PEP 263.

- Returns a string, the name of the encoding.

+class CachedTokenizer(object):

+ """A one-element cache around tokenize.generate_tokens.

+ When reporting, coverage.py tokenizes files twice, once to find the

+ structure of the file, and once to syntax-color it. Tokenizing is

+ expensive, and easily cached.

+ This is a one-element cache so that our twice-in-a-row tokenizing doesn't

+ actually tokenize twice.

"""

- # Note: this function should never be called on Python 3, since py3 has

- # built-in tools to do this.

- assert sys.version_info < (3, 0)

+ def __init__(self):

+ self.last_text = None

+ self.last_tokens = None

+ @contract(text='unicode')

+ def generate_tokens(self, text):

+ """A stand-in for `tokenize.generate_tokens`."""

+ if text != self.last_text:

+ self.last_text = text

+ readline = iternext(text.splitlines(True))

+ self.last_tokens = list(tokenize.generate_tokens(readline))

+ return self.last_tokens

+# Create our generate_tokens cache as a callable replacement function.

+generate_tokens = CachedTokenizer().generate_tokens

- # This is mostly code adapted from Py3.2's tokenize module.

- cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")

+COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)

+@contract(source='bytes')

+def _source_encoding_py2(source):

+ """Determine the encoding for `source`, according to PEP 263.

+ `source` is a byte string, the text of the program.

+ Returns a string, the name of the encoding.

+ """

+ assert isinstance(source, bytes)

# Do this so the detect_encode code we copied will work.

- readline = iter(source.splitlines(True)).next

+ readline = iternext(source.splitlines(True))

+ # This is mostly code adapted from Py3.2's tokenize module.

def _get_normal_name(orig_enc):

"""Imitates get_normal_name in tokenizer.c."""

@@ -137,19 +181,14 @@ def source_encoding(source):

return orig_enc

# From detect_encode():

- # It detects the encoding from the presence of a utf-8 bom or an encoding

- # cookie as specified in pep-0263. If both a bom and a cookie are present,

+ # It detects the encoding from the presence of a UTF-8 BOM or an encoding

+ # cookie as specified in PEP-0263. If both a BOM and a cookie are present,

# but disagree, a SyntaxError will be raised. If the encoding cookie is an

- # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,

+ # invalid charset, raise a SyntaxError. Note that if a UTF-8 BOM is found,

# 'utf-8-sig' is returned.

- # If no encoding is specified, then the default will be returned. The

- # default varied with version.

- if sys.version_info <= (2, 4):

- default = 'iso-8859-1'

- else:

- default = 'ascii'

+ # If no encoding is specified, then the default will be returned.

+ default = 'ascii'

bom_found = False

encoding = None

@@ -168,21 +207,21 @@ def source_encoding(source):

except UnicodeDecodeError:

return None

- matches = cookie_re.findall(line_string)

+ matches = COOKIE_RE.findall(line_string)

if not matches:

return None

encoding = _get_normal_name(matches[0])

try:

codec = codecs.lookup(encoding)

except LookupError:

- # This behaviour mimics the Python interpreter

+ # This behavior mimics the Python interpreter

raise SyntaxError("unknown encoding: " + encoding)

if bom_found:

# codecs in 2.3 were raw tuples of functions, assume the best.

codec_name = getattr(codec, 'name', encoding)

if codec_name != 'utf-8':

- # This behaviour mimics the Python interpreter

+ # This behavior mimics the Python interpreter

raise SyntaxError('encoding problem: utf-8')

encoding += '-sig'

return encoding

@@ -208,3 +247,57 @@ def source_encoding(source):

return encoding

return default

+@contract(source='bytes')

+def _source_encoding_py3(source):

+ """Determine the encoding for `source`, according to PEP 263.

+ `source` is a byte string: the text of the program.

+ Returns a string, the name of the encoding.

+ """

+ readline = iternext(source.splitlines(True))

+ return tokenize.detect_encoding(readline)[0]

+if env.PY3:

+ source_encoding = _source_encoding_py3

+else:

+ source_encoding = _source_encoding_py2

+@contract(source='unicode')

+def compile_unicode(source, filename, mode):

+ """Just like the `compile` builtin, but works on any Unicode string.

+ Python 2's compile() builtin has a stupid restriction: if the source string

+ is Unicode, then it may not have a encoding declaration in it. Why not?

+ Who knows!

+ This function catches that exception, neuters the coding declaration, and

+ compiles it anyway.

+ """

+ try:

+ code = compile(source, filename, mode)

+ except SyntaxError as synerr:

+ if "coding declaration in unicode string" not in synerr.args[0].lower():

+ raise

+ source = neuter_encoding_declaration(source)

+ code = compile(source, filename, mode)

+ return code

+@contract(source='unicode', returns='unicode')

+def neuter_encoding_declaration(source):

+ """Return `source`, with any encoding declaration neutered.

+ This function will only ever be called on `source` that has an encoding

+ declaration, so some edge cases can be ignored.

+ """

+ source = COOKIE_RE.sub("# (deleted declaration)", source)

+ return source