Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1471)

Side by Side Diff: third_party/coverage-3.7.1/coverage/phystokens.py

Issue 225633007: Upgrade to coverage 3.7.1 and have it auto-build itself on first use. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: sigh our imports are a mess Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 """Better tokenizing for coverage.py.""" 1 """Better tokenizing for coverage.py."""
2 2
3 import codecs, keyword, re, sys, token, tokenize 3 import codecs, keyword, re, sys, token, tokenize
4 from coverage.backward import StringIO # pylint: disable=W0622 4 from coverage.backward import set # pylint: disable=W0622
5 from coverage.parser import generate_tokens
6
5 7
6 def phys_tokens(toks): 8 def phys_tokens(toks):
7 """Return all physical tokens, even line continuations. 9 """Return all physical tokens, even line continuations.
8 10
9 tokenize.generate_tokens() doesn't return a token for the backslash that 11 tokenize.generate_tokens() doesn't return a token for the backslash that
10 continues lines. This wrapper provides those tokens so that we can 12 continues lines. This wrapper provides those tokens so that we can
11 re-create a faithful representation of the original source. 13 re-create a faithful representation of the original source.
12 14
13 Returns the same values as generate_tokens() 15 Returns the same values as generate_tokens()
14 16
15 """ 17 """
16 last_line = None 18 last_line = None
17 last_lineno = -1 19 last_lineno = -1
18 last_ttype = None 20 last_ttype = None
19 for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: 21 for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
20 if last_lineno != elineno: 22 if last_lineno != elineno:
21 if last_line and last_line[-2:] == "\\\n": 23 if last_line and last_line.endswith("\\\n"):
22 # We are at the beginning of a new line, and the last line 24 # We are at the beginning of a new line, and the last line
23 # ended with a backslash. We probably have to inject a 25 # ended with a backslash. We probably have to inject a
24 # backslash token into the stream. Unfortunately, there's more 26 # backslash token into the stream. Unfortunately, there's more
25 # to figure out. This code:: 27 # to figure out. This code::
26 # 28 #
27 # usage = """\ 29 # usage = """\
28 # HEY THERE 30 # HEY THERE
29 # """ 31 # """
30 # 32 #
31 # triggers this condition, but the token text is:: 33 # triggers this condition, but the token text is::
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] 69 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
68 70
69 Each pair has a token class, and the token text. 71 Each pair has a token class, and the token text.
70 72
71 If you concatenate all the token texts, and then join them with newlines, 73 If you concatenate all the token texts, and then join them with newlines,
72 you should have your original `source` back, with two differences: 74 you should have your original `source` back, with two differences:
73 trailing whitespace is not preserved, and a final line with no newline 75 trailing whitespace is not preserved, and a final line with no newline
74 is indistinguishable from a final line with a newline. 76 is indistinguishable from a final line with a newline.
75 77
76 """ 78 """
77 ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] 79 ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
78 line = [] 80 line = []
79 col = 0 81 col = 0
80 source = source.expandtabs(8).replace('\r\n', '\n') 82 source = source.expandtabs(8).replace('\r\n', '\n')
81 tokgen = tokenize.generate_tokens(StringIO(source).readline) 83 tokgen = generate_tokens(source)
82 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): 84 for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
83 mark_start = True 85 mark_start = True
84 for part in re.split('(\n)', ttext): 86 for part in re.split('(\n)', ttext):
85 if part == '\n': 87 if part == '\n':
86 yield line 88 yield line
87 line = [] 89 line = []
88 col = 0 90 col = 0
89 mark_end = False 91 mark_end = False
90 elif part == '': 92 elif part == '':
91 mark_end = False 93 mark_end = False
(...skipping 23 matching lines...) Expand all
115 """ 117 """
116 # Note: this function should never be called on Python 3, since py3 has 118 # Note: this function should never be called on Python 3, since py3 has
117 # built-in tools to do this. 119 # built-in tools to do this.
118 assert sys.version_info < (3, 0) 120 assert sys.version_info < (3, 0)
119 121
120 # This is mostly code adapted from Py3.2's tokenize module. 122 # This is mostly code adapted from Py3.2's tokenize module.
121 123
122 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)") 124 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
123 125
124 # Do this so the detect_encode code we copied will work. 126 # Do this so the detect_encode code we copied will work.
125 readline = iter(source.splitlines()).next 127 readline = iter(source.splitlines(True)).next
126 128
127 def _get_normal_name(orig_enc): 129 def _get_normal_name(orig_enc):
128 """Imitates get_normal_name in tokenizer.c.""" 130 """Imitates get_normal_name in tokenizer.c."""
129 # Only care about the first 12 characters. 131 # Only care about the first 12 characters.
130 enc = orig_enc[:12].lower().replace("_", "-") 132 enc = orig_enc[:12].lower().replace("_", "-")
131 if re.match(r"^utf-8($|-)", enc): 133 if re.match(r"^utf-8($|-)", enc):
132 return "utf-8" 134 return "utf-8"
133 if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc): 135 if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
134 return "iso-8859-1" 136 return "iso-8859-1"
135 return orig_enc 137 return orig_enc
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 if not matches: 172 if not matches:
171 return None 173 return None
172 encoding = _get_normal_name(matches[0]) 174 encoding = _get_normal_name(matches[0])
173 try: 175 try:
174 codec = codecs.lookup(encoding) 176 codec = codecs.lookup(encoding)
175 except LookupError: 177 except LookupError:
176 # This behaviour mimics the Python interpreter 178 # This behaviour mimics the Python interpreter
177 raise SyntaxError("unknown encoding: " + encoding) 179 raise SyntaxError("unknown encoding: " + encoding)
178 180
179 if bom_found: 181 if bom_found:
180 if codec.name != 'utf-8': 182 # codecs in 2.3 were raw tuples of functions, assume the best.
183 codec_name = getattr(codec, 'name', encoding)
184 if codec_name != 'utf-8':
181 # This behaviour mimics the Python interpreter 185 # This behaviour mimics the Python interpreter
182 raise SyntaxError('encoding problem: utf-8') 186 raise SyntaxError('encoding problem: utf-8')
183 encoding += '-sig' 187 encoding += '-sig'
184 return encoding 188 return encoding
185 189
186 first = read_or_stop() 190 first = read_or_stop()
187 if first.startswith(codecs.BOM_UTF8): 191 if first.startswith(codecs.BOM_UTF8):
188 bom_found = True 192 bom_found = True
189 first = first[3:] 193 first = first[3:]
190 default = 'utf-8-sig' 194 default = 'utf-8-sig'
191 if not first: 195 if not first:
192 return default 196 return default
193 197
194 encoding = find_cookie(first) 198 encoding = find_cookie(first)
195 if encoding: 199 if encoding:
196 return encoding 200 return encoding
197 201
198 second = read_or_stop() 202 second = read_or_stop()
199 if not second: 203 if not second:
200 return default 204 return default
201 205
202 encoding = find_cookie(second) 206 encoding = find_cookie(second)
203 if encoding: 207 if encoding:
204 return encoding 208 return encoding
205 209
206 return default 210 return default
OLDNEW
« no previous file with comments | « third_party/coverage-3.7.1/coverage/parser.py ('k') | third_party/coverage-3.7.1/coverage/report.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698