Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: tools/telemetry/third_party/coverage/coverage/phystokens.py

Issue 1366913004: Add coverage Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/third_party/coverage/coverage/phystokens.py
diff --git a/third_party/pycoverage/coverage/phystokens.py b/tools/telemetry/third_party/coverage/coverage/phystokens.py
similarity index 61%
copy from third_party/pycoverage/coverage/phystokens.py
copy to tools/telemetry/third_party/coverage/coverage/phystokens.py
index 99b1d5ba0c79771e43338cc8a37ce09e7085d7e2..7092d39e2b055b1bb875041c590c59508e8c9f33 100644
--- a/third_party/pycoverage/coverage/phystokens.py
+++ b/tools/telemetry/third_party/coverage/coverage/phystokens.py
@@ -1,8 +1,17 @@
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
+
"""Better tokenizing for coverage.py."""
-import codecs, keyword, re, sys, token, tokenize
-from coverage.backward import set # pylint: disable=W0622
-from coverage.parser import generate_tokens
+import codecs
+import keyword
+import re
+import token
+import tokenize
+
+from coverage import env
+from coverage.backward import iternext
+from coverage.misc import contract
def phys_tokens(toks):
@@ -43,7 +52,7 @@ def phys_tokens(toks):
inject_backslash = False
elif ttype == token.STRING:
if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':
- # It's a multiline string and the first line ends with
+ # It's a multi-line string and the first line ends with
# a backslash, so we don't need to inject another.
inject_backslash = False
if inject_backslash:
@@ -61,6 +70,7 @@ def phys_tokens(toks):
last_lineno = elineno
+@contract(source='unicode')
def source_token_lines(source):
"""Generate a series of lines, one for each line in `source`.
@@ -76,11 +86,15 @@ def source_token_lines(source):
is indistinguishable from a final line with a newline.
"""
+
ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
line = []
col = 0
- source = source.expandtabs(8).replace('\r\n', '\n')
+
+ # The \f is because of http://bugs.python.org/issue19035
+ source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ')
tokgen = generate_tokens(source)
+
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):
@@ -95,7 +109,7 @@ def source_token_lines(source):
mark_end = False
else:
if mark_start and scol > col:
- line.append(("ws", " " * (scol - col)))
+ line.append(("ws", u" " * (scol - col)))
mark_start = False
tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
if ttype == token.NAME and keyword.iskeyword(ttext):
@@ -109,22 +123,52 @@ def source_token_lines(source):
if line:
yield line
-def source_encoding(source):
- """Determine the encoding for `source` (a string), according to PEP 263.
- Returns a string, the name of the encoding.
+class CachedTokenizer(object):
+ """A one-element cache around tokenize.generate_tokens.
+
+ When reporting, coverage.py tokenizes files twice, once to find the
+ structure of the file, and once to syntax-color it. Tokenizing is
+ expensive, and easily cached.
+
+ This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+ actually tokenize twice.
"""
- # Note: this function should never be called on Python 3, since py3 has
- # built-in tools to do this.
- assert sys.version_info < (3, 0)
+ def __init__(self):
+ self.last_text = None
+ self.last_tokens = None
+
+ @contract(text='unicode')
+ def generate_tokens(self, text):
+ """A stand-in for `tokenize.generate_tokens`."""
+ if text != self.last_text:
+ self.last_text = text
+ readline = iternext(text.splitlines(True))
+ self.last_tokens = list(tokenize.generate_tokens(readline))
+ return self.last_tokens
+
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens
- # This is mostly code adapted from Py3.2's tokenize module.
- cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
+COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE)
+
+@contract(source='bytes')
+def _source_encoding_py2(source):
+ """Determine the encoding for `source`, according to PEP 263.
+
+ `source` is a byte string, the text of the program.
+
+ Returns a string, the name of the encoding.
+
+ """
+ assert isinstance(source, bytes)
# Do this so the detect_encode code we copied will work.
- readline = iter(source.splitlines(True)).next
+ readline = iternext(source.splitlines(True))
+
+ # This is mostly code adapted from Py3.2's tokenize module.
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
@@ -137,19 +181,14 @@ def source_encoding(source):
return orig_enc
# From detect_encode():
- # It detects the encoding from the presence of a utf-8 bom or an encoding
- # cookie as specified in pep-0263. If both a bom and a cookie are present,
+ # It detects the encoding from the presence of a UTF-8 BOM or an encoding
+ # cookie as specified in PEP-0263. If both a BOM and a cookie are present,
# but disagree, a SyntaxError will be raised. If the encoding cookie is an
- # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
+ # invalid charset, raise a SyntaxError. Note that if a UTF-8 BOM is found,
# 'utf-8-sig' is returned.
- # If no encoding is specified, then the default will be returned. The
- # default varied with version.
-
- if sys.version_info <= (2, 4):
- default = 'iso-8859-1'
- else:
- default = 'ascii'
+ # If no encoding is specified, then the default will be returned.
+ default = 'ascii'
bom_found = False
encoding = None
@@ -168,21 +207,21 @@ def source_encoding(source):
except UnicodeDecodeError:
return None
- matches = cookie_re.findall(line_string)
+ matches = COOKIE_RE.findall(line_string)
if not matches:
return None
encoding = _get_normal_name(matches[0])
try:
codec = codecs.lookup(encoding)
except LookupError:
- # This behaviour mimics the Python interpreter
+ # This behavior mimics the Python interpreter
raise SyntaxError("unknown encoding: " + encoding)
if bom_found:
# codecs in 2.3 were raw tuples of functions, assume the best.
codec_name = getattr(codec, 'name', encoding)
if codec_name != 'utf-8':
- # This behaviour mimics the Python interpreter
+ # This behavior mimics the Python interpreter
raise SyntaxError('encoding problem: utf-8')
encoding += '-sig'
return encoding
@@ -208,3 +247,57 @@ def source_encoding(source):
return encoding
return default
+
+
+@contract(source='bytes')
+def _source_encoding_py3(source):
+ """Determine the encoding for `source`, according to PEP 263.
+
+ `source` is a byte string: the text of the program.
+
+ Returns a string, the name of the encoding.
+
+ """
+ readline = iternext(source.splitlines(True))
+ return tokenize.detect_encoding(readline)[0]
+
+
+if env.PY3:
+ source_encoding = _source_encoding_py3
+else:
+ source_encoding = _source_encoding_py2
+
+
+@contract(source='unicode')
+def compile_unicode(source, filename, mode):
+ """Just like the `compile` builtin, but works on any Unicode string.
+
+ Python 2's compile() builtin has a stupid restriction: if the source string
+ is Unicode, then it may not have a encoding declaration in it. Why not?
+ Who knows!
+
+ This function catches that exception, neuters the coding declaration, and
+ compiles it anyway.
+
+ """
+ try:
+ code = compile(source, filename, mode)
+ except SyntaxError as synerr:
+ if "coding declaration in unicode string" not in synerr.args[0].lower():
+ raise
+ source = neuter_encoding_declaration(source)
+ code = compile(source, filename, mode)
+
+ return code
+
+
+@contract(source='unicode', returns='unicode')
+def neuter_encoding_declaration(source):
+ """Return `source`, with any encoding declaration neutered.
+
+ This function will only ever be called on `source` that has an encoding
+ declaration, so some edge cases can be ignored.
+
+ """
+ source = COOKIE_RE.sub("# (deleted declaration)", source)
+ return source
« no previous file with comments | « tools/telemetry/third_party/coverage/coverage/parser.py ('k') | tools/telemetry/third_party/coverage/coverage/pickle2json.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698