third_party/coverage/phystokens.py - Issue 63813002: Add python coverage 3.7 to depot tools.

Side by Side Diff: third_party/coverage/phystokens.py

Issue 63813002: Add python coverage 3.7 to depot tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 """Better tokenizing for coverage.py."""

	2

	3 import codecs, keyword, re, sys, token, tokenize

	4 from coverage.backward import StringIO # pylint: disable=W0622

	5

	6 def phys_tokens(toks):

	7 """Return all physical tokens, even line continuations.

	8

	9 tokenize.generate_tokens() doesn't return a token for the backslash that

	10 continues lines. This wrapper provides those tokens so that we can

	11 re-create a faithful representation of the original source.

	12

	13 Returns the same values as generate_tokens()

	14

	15 """

	16 last_line = None

	17 last_lineno = -1

	18 last_ttype = None

	19 for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:

	20 if last_lineno != elineno:

	21 if last_line and last_line[-2:] == "\\\n":

	22 # We are at the beginning of a new line, and the last line

	23 # ended with a backslash. We probably have to inject a

	24 # backslash token into the stream. Unfortunately, there's more

	25 # to figure out. This code::

	26 #

	27 # usage = """\

	28 # HEY THERE

	29 # """

	30 #

	31 # triggers this condition, but the token text is::

	32 #

	33 # '"""\\\nHEY THERE\n"""'

	34 #

	35 # so we need to figure out if the backslash is already in the

	36 # string token or not.

	37 inject_backslash = True

	38 if last_ttype == tokenize.COMMENT:

	39 # Comments like this \

	40 # should never result in a new token.

	41 inject_backslash = False

	42 elif ttype == token.STRING:

	43 if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':

	44 # It's a multiline string and the first line ends with

	45 # a backslash, so we don't need to inject another.

	46 inject_backslash = False

	47 if inject_backslash:

	48 # Figure out what column the backslash is in.

	49 ccol = len(last_line.split("\n")[-2]) - 1

	50 # Yield the token, with a fake token type.

	51 yield (

	52 99999, "\\\n",

	53 (slineno, ccol), (slineno, ccol+2),

	54 last_line

	55 )

	56 last_line = ltext

	57 last_ttype = ttype

	58 yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext

	59 last_lineno = elineno

	60

	61

	62 def source_token_lines(source):

	63 """Generate a series of lines, one for each line in `source`.

	64

	65 Each line is a list of pairs, each pair is a token::

	66

	67 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]

	68

	69 Each pair has a token class, and the token text.

	70

	71 If you concatenate all the token texts, and then join them with newlines,

	72 you should have your original `source` back, with two differences:

	73 trailing whitespace is not preserved, and a final line with no newline

	74 is indistinguishable from a final line with a newline.

	75

	76 """

	77 ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]

	78 line = []

	79 col = 0

	80 source = source.expandtabs(8).replace('\r\n', '\n')

	81 tokgen = tokenize.generate_tokens(StringIO(source).readline)

	82 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):

	83 mark_start = True

	84 for part in re.split('(\n)', ttext):

	85 if part == '\n':

	86 yield line

	87 line = []

	88 col = 0

	89 mark_end = False

	90 elif part == '':

	91 mark_end = False

	92 elif ttype in ws_tokens:

	93 mark_end = False

	94 else:

	95 if mark_start and scol > col:

	96 line.append(("ws", " " * (scol - col)))

	97 mark_start = False

	98 tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]

	99 if ttype == token.NAME and keyword.iskeyword(ttext):

	100 tok_class = "key"

	101 line.append((tok_class, part))

	102 mark_end = True

	103 scol = 0

	104 if mark_end:

	105 col = ecol

	106

	107 if line:

	108 yield line

	109

	110 def source_encoding(source):

	111 """Determine the encoding for `source` (a string), according to PEP 263.

	112

	113 Returns a string, the name of the encoding.

	114

	115 """

	116 # Note: this function should never be called on Python 3, since py3 has

	117 # built-in tools to do this.

	118 assert sys.version_info < (3, 0)

	119

	120 # This is mostly code adapted from Py3.2's tokenize module.

	121

	122 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")

	123

	124 # Do this so the detect_encode code we copied will work.

	125 readline = iter(source.splitlines(True)).next

	126

	127 def _get_normal_name(orig_enc):

	128 """Imitates get_normal_name in tokenizer.c."""

	129 # Only care about the first 12 characters.

	130 enc = orig_enc[:12].lower().replace("_", "-")

	131 if re.match(r"^utf-8($\|-)", enc):

	132 return "utf-8"

	133 if re.match(r"^(latin-1\|iso-8859-1\|iso-latin-1)($\|-)", enc):

	134 return "iso-8859-1"

	135 return orig_enc

	136

	137 # From detect_encode():

	138 # It detects the encoding from the presence of a utf-8 bom or an encoding

	139 # cookie as specified in pep-0263. If both a bom and a cookie are present,

	140 # but disagree, a SyntaxError will be raised. If the encoding cookie is an

	141 # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,

	142 # 'utf-8-sig' is returned.

	143

	144 # If no encoding is specified, then the default will be returned. The

	145 # default varied with version.

	146

	147 if sys.version_info <= (2, 4):

	148 default = 'iso-8859-1'

	149 else:

	150 default = 'ascii'

	151

	152 bom_found = False

	153 encoding = None

	154

	155 def read_or_stop():

	156 """Get the next source line, or ''."""

	157 try:

	158 return readline()

	159 except StopIteration:

	160 return ''

	161

	162 def find_cookie(line):

	163 """Find an encoding cookie in `line`."""

	164 try:

	165 line_string = line.decode('ascii')

	166 except UnicodeDecodeError:

	167 return None

	168

	169 matches = cookie_re.findall(line_string)

	170 if not matches:

	171 return None

	172 encoding = _get_normal_name(matches[0])

	173 try:

	174 codec = codecs.lookup(encoding)

	175 except LookupError:

	176 # This behaviour mimics the Python interpreter

	177 raise SyntaxError("unknown encoding: " + encoding)

	178

	179 if bom_found:

	180 # codecs in 2.3 were raw tuples of functions, assume the best.

	181 codec_name = getattr(codec, 'name', encoding)

	182 if codec_name != 'utf-8':

	183 # This behaviour mimics the Python interpreter

	184 raise SyntaxError('encoding problem: utf-8')

	185 encoding += '-sig'

	186 return encoding

	187

	188 first = read_or_stop()

	189 if first.startswith(codecs.BOM_UTF8):

	190 bom_found = True

	191 first = first[3:]

	192 default = 'utf-8-sig'

	193 if not first:

	194 return default

	195

	196 encoding = find_cookie(first)

	197 if encoding:

	198 return encoding

	199

	200 second = read_or_stop()

	201 if not second:

	202 return default

	203

	204 encoding = find_cookie(second)

	205 if encoding:

	206 return encoding

	207

	208 return default

OLD	NEW

« no previous file with comments | « third_party/coverage/parser.py ('k') | third_party/coverage/report.py » ('j') | no next file with comments »