third_party/pycoverage/coverage/phystokens.py - Issue 727003004: Add python coverage module to third_party

Side by Side Diff: third_party/pycoverage/coverage/phystokens.py

Issue 727003004: Add python coverage module to third_party (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 """Better tokenizing for coverage.py."""

	2

	3 import codecs, keyword, re, sys, token, tokenize

	4 from coverage.backward import set # pylint: disable=W0622

	5 from coverage.parser import generate_tokens

	6

	7

	8 def phys_tokens(toks):

	9 """Return all physical tokens, even line continuations.

	10

	11 tokenize.generate_tokens() doesn't return a token for the backslash that

	12 continues lines. This wrapper provides those tokens so that we can

	13 re-create a faithful representation of the original source.

	14

	15 Returns the same values as generate_tokens()

	16

	17 """

	18 last_line = None

	19 last_lineno = -1

	20 last_ttype = None

	21 for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:

	22 if last_lineno != elineno:

	23 if last_line and last_line.endswith("\\\n"):

	24 # We are at the beginning of a new line, and the last line

	25 # ended with a backslash. We probably have to inject a

	26 # backslash token into the stream. Unfortunately, there's more

	27 # to figure out. This code::

	28 #

	29 # usage = """\

	30 # HEY THERE

	31 # """

	32 #

	33 # triggers this condition, but the token text is::

	34 #

	35 # '"""\\\nHEY THERE\n"""'

	36 #

	37 # so we need to figure out if the backslash is already in the

	38 # string token or not.

	39 inject_backslash = True

	40 if last_ttype == tokenize.COMMENT:

	41 # Comments like this \

	42 # should never result in a new token.

	43 inject_backslash = False

	44 elif ttype == token.STRING:

	45 if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':

	46 # It's a multiline string and the first line ends with

	47 # a backslash, so we don't need to inject another.

	48 inject_backslash = False

	49 if inject_backslash:

	50 # Figure out what column the backslash is in.

	51 ccol = len(last_line.split("\n")[-2]) - 1

	52 # Yield the token, with a fake token type.

	53 yield (

	54 99999, "\\\n",

	55 (slineno, ccol), (slineno, ccol+2),

	56 last_line

	57 )

	58 last_line = ltext

	59 last_ttype = ttype

	60 yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext

	61 last_lineno = elineno

	62

	63

	64 def source_token_lines(source):

	65 """Generate a series of lines, one for each line in `source`.

	66

	67 Each line is a list of pairs, each pair is a token::

	68

	69 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]

	70

	71 Each pair has a token class, and the token text.

	72

	73 If you concatenate all the token texts, and then join them with newlines,

	74 you should have your original `source` back, with two differences:

	75 trailing whitespace is not preserved, and a final line with no newline

	76 is indistinguishable from a final line with a newline.

	77

	78 """

	79 ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])

	80 line = []

	81 col = 0

	82 source = source.expandtabs(8).replace('\r\n', '\n')

	83 tokgen = generate_tokens(source)

	84 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):

	85 mark_start = True

	86 for part in re.split('(\n)', ttext):

	87 if part == '\n':

	88 yield line

	89 line = []

	90 col = 0

	91 mark_end = False

	92 elif part == '':

	93 mark_end = False

	94 elif ttype in ws_tokens:

	95 mark_end = False

	96 else:

	97 if mark_start and scol > col:

	98 line.append(("ws", " " * (scol - col)))

	99 mark_start = False

	100 tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]

	101 if ttype == token.NAME and keyword.iskeyword(ttext):

	102 tok_class = "key"

	103 line.append((tok_class, part))

	104 mark_end = True

	105 scol = 0

	106 if mark_end:

	107 col = ecol

	108

	109 if line:

	110 yield line

	111

	112 def source_encoding(source):

	113 """Determine the encoding for `source` (a string), according to PEP 263.

	114

	115 Returns a string, the name of the encoding.

	116

	117 """

	118 # Note: this function should never be called on Python 3, since py3 has

	119 # built-in tools to do this.

	120 assert sys.version_info < (3, 0)

	121

	122 # This is mostly code adapted from Py3.2's tokenize module.

	123

	124 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")

	125

	126 # Do this so the detect_encode code we copied will work.

	127 readline = iter(source.splitlines(True)).next

	128

	129 def _get_normal_name(orig_enc):

	130 """Imitates get_normal_name in tokenizer.c."""

	131 # Only care about the first 12 characters.

	132 enc = orig_enc[:12].lower().replace("_", "-")

	133 if re.match(r"^utf-8($\|-)", enc):

	134 return "utf-8"

	135 if re.match(r"^(latin-1\|iso-8859-1\|iso-latin-1)($\|-)", enc):

	136 return "iso-8859-1"

	137 return orig_enc

	138

	139 # From detect_encode():

	140 # It detects the encoding from the presence of a utf-8 bom or an encoding

	141 # cookie as specified in pep-0263. If both a bom and a cookie are present,

	142 # but disagree, a SyntaxError will be raised. If the encoding cookie is an

	143 # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,

	144 # 'utf-8-sig' is returned.

	145

	146 # If no encoding is specified, then the default will be returned. The

	147 # default varied with version.

	148

	149 if sys.version_info <= (2, 4):

	150 default = 'iso-8859-1'

	151 else:

	152 default = 'ascii'

	153

	154 bom_found = False

	155 encoding = None

	156

	157 def read_or_stop():

	158 """Get the next source line, or ''."""

	159 try:

	160 return readline()

	161 except StopIteration:

	162 return ''

	163

	164 def find_cookie(line):

	165 """Find an encoding cookie in `line`."""

	166 try:

	167 line_string = line.decode('ascii')

	168 except UnicodeDecodeError:

	169 return None

	170

	171 matches = cookie_re.findall(line_string)

	172 if not matches:

	173 return None

	174 encoding = _get_normal_name(matches[0])

	175 try:

	176 codec = codecs.lookup(encoding)

	177 except LookupError:

	178 # This behaviour mimics the Python interpreter

	179 raise SyntaxError("unknown encoding: " + encoding)

	180

	181 if bom_found:

	182 # codecs in 2.3 were raw tuples of functions, assume the best.

	183 codec_name = getattr(codec, 'name', encoding)

	184 if codec_name != 'utf-8':

	185 # This behaviour mimics the Python interpreter

	186 raise SyntaxError('encoding problem: utf-8')

	187 encoding += '-sig'

	188 return encoding

	189

	190 first = read_or_stop()

	191 if first.startswith(codecs.BOM_UTF8):

	192 bom_found = True

	193 first = first[3:]

	194 default = 'utf-8-sig'

	195 if not first:

	196 return default

	197

	198 encoding = find_cookie(first)

	199 if encoding:

	200 return encoding

	201

	202 second = read_or_stop()

	203 if not second:

	204 return default

	205

	206 encoding = find_cookie(second)

	207 if encoding:

	208 return encoding

	209

	210 return default

OLD	NEW

« no previous file with comments | « third_party/pycoverage/coverage/parser.py ('k') | third_party/pycoverage/coverage/report.py » ('j') | no next file with comments »