third_party/coverage-3.6/coverage/phystokens.py - Issue 225633007: Upgrade to coverage 3.7.1 and have it auto-build itself on first use.

Unified Diff: third_party/coverage-3.6/coverage/phystokens.py

Issue 225633007: Upgrade to coverage 3.7.1 and have it auto-build itself on first use. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build

Patch Set: sigh our imports are a mess Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/coverage-3.6/coverage/phystokens.py

diff --git a/third_party/coverage-3.6/coverage/phystokens.py b/third_party/coverage-3.6/coverage/phystokens.py

deleted file mode 100644

index 166020e1fac0bd4afe41b1ba2d96e80fd54a1769..0000000000000000000000000000000000000000

--- a/third_party/coverage-3.6/coverage/phystokens.py

+++ /dev/null

@@ -1,206 +0,0 @@

-"""Better tokenizing for coverage.py."""

-import codecs, keyword, re, sys, token, tokenize

-from coverage.backward import StringIO # pylint: disable=W0622

-def phys_tokens(toks):

- """Return all physical tokens, even line continuations.

- tokenize.generate_tokens() doesn't return a token for the backslash that

- continues lines. This wrapper provides those tokens so that we can

- re-create a faithful representation of the original source.

- Returns the same values as generate_tokens()

- """

- last_line = None

- last_lineno = -1

- last_ttype = None

- for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:

- if last_lineno != elineno:

- if last_line and last_line[-2:] == "\\\n":

- # We are at the beginning of a new line, and the last line

- # ended with a backslash. We probably have to inject a

- # backslash token into the stream. Unfortunately, there's more

- # to figure out. This code::

- #

- # usage = """\

- # HEY THERE

- # """

- #

- # triggers this condition, but the token text is::

- #

- # '"""\\\nHEY THERE\n"""'

- #

- # so we need to figure out if the backslash is already in the

- # string token or not.

- inject_backslash = True

- if last_ttype == tokenize.COMMENT:

- # Comments like this \

- # should never result in a new token.

- inject_backslash = False

- elif ttype == token.STRING:

- if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':

- # It's a multiline string and the first line ends with

- # a backslash, so we don't need to inject another.

- inject_backslash = False

- if inject_backslash:

- # Figure out what column the backslash is in.

- ccol = len(last_line.split("\n")[-2]) - 1

- # Yield the token, with a fake token type.

- yield (

- 99999, "\\\n",

- (slineno, ccol), (slineno, ccol+2),

- last_line

- )

- last_line = ltext

- last_ttype = ttype

- yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext

- last_lineno = elineno

-def source_token_lines(source):

- """Generate a series of lines, one for each line in `source`.

- Each line is a list of pairs, each pair is a token::

- [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]

- Each pair has a token class, and the token text.

- If you concatenate all the token texts, and then join them with newlines,

- you should have your original `source` back, with two differences:

- trailing whitespace is not preserved, and a final line with no newline

- is indistinguishable from a final line with a newline.

- """

- ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]

- line = []

- col = 0

- source = source.expandtabs(8).replace('\r\n', '\n')

- tokgen = tokenize.generate_tokens(StringIO(source).readline)

- for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):

- mark_start = True

- for part in re.split('(\n)', ttext):

- if part == '\n':

- yield line

- line = []

- col = 0

- mark_end = False

- elif part == '':

- mark_end = False

- elif ttype in ws_tokens:

- mark_end = False

- else:

- if mark_start and scol > col:

- line.append(("ws", " " * (scol - col)))

- mark_start = False

- tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]

- if ttype == token.NAME and keyword.iskeyword(ttext):

- tok_class = "key"

- line.append((tok_class, part))

- mark_end = True

- scol = 0

- if mark_end:

- col = ecol

- if line:

- yield line

-def source_encoding(source):

- """Determine the encoding for `source` (a string), according to PEP 263.

- Returns a string, the name of the encoding.

- """

- # Note: this function should never be called on Python 3, since py3 has

- # built-in tools to do this.

- assert sys.version_info < (3, 0)

- # This is mostly code adapted from Py3.2's tokenize module.

- cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")

- # Do this so the detect_encode code we copied will work.

- readline = iter(source.splitlines()).next

- def _get_normal_name(orig_enc):

- """Imitates get_normal_name in tokenizer.c."""

- # Only care about the first 12 characters.

- enc = orig_enc[:12].lower().replace("_", "-")

- if re.match(r"^utf-8($|-)", enc):

- return "utf-8"

- if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):

- return "iso-8859-1"

- return orig_enc

- # From detect_encode():

- # It detects the encoding from the presence of a utf-8 bom or an encoding

- # cookie as specified in pep-0263. If both a bom and a cookie are present,

- # but disagree, a SyntaxError will be raised. If the encoding cookie is an

- # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,

- # 'utf-8-sig' is returned.

- # If no encoding is specified, then the default will be returned. The

- # default varied with version.

- if sys.version_info <= (2, 4):

- default = 'iso-8859-1'

- else:

- default = 'ascii'

- bom_found = False

- encoding = None

- def read_or_stop():

- """Get the next source line, or ''."""

- try:

- return readline()

- except StopIteration:

- return ''

- def find_cookie(line):

- """Find an encoding cookie in `line`."""

- try:

- line_string = line.decode('ascii')

- except UnicodeDecodeError:

- return None

- matches = cookie_re.findall(line_string)

- if not matches:

- return None

- encoding = _get_normal_name(matches[0])

- try:

- codec = codecs.lookup(encoding)

- except LookupError:

- # This behaviour mimics the Python interpreter

- raise SyntaxError("unknown encoding: " + encoding)

- if bom_found:

- if codec.name != 'utf-8':

- # This behaviour mimics the Python interpreter

- raise SyntaxError('encoding problem: utf-8')

- encoding += '-sig'

- return encoding

- first = read_or_stop()

- if first.startswith(codecs.BOM_UTF8):

- bom_found = True

- first = first[3:]

- default = 'utf-8-sig'

- if not first:

- return default

- encoding = find_cookie(first)

- if encoding:

- return encoding

- second = read_or_stop()

- if not second:

- return default

- encoding = find_cookie(second)

- if encoding:

- return encoding

- return default

« no previous file with comments | « third_party/coverage-3.6/coverage/parser.py ('k') | third_party/coverage-3.6/coverage/report.py » ('j') | no next file with comments »