OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """ | 2 """ |
3 jinja2.lexer | 3 jinja2.lexer |
4 ~~~~~~~~~~~~ | 4 ~~~~~~~~~~~~ |
5 | 5 |
6 This module implements a Jinja / Python combination lexer. The | 6 This module implements a Jinja / Python combination lexer. The |
7 `Lexer` class provided by this module is used to do some preprocessing | 7 `Lexer` class provided by this module is used to do some preprocessing |
8 for Jinja. | 8 for Jinja. |
9 | 9 |
10 On the one hand it filters out invalid operators like the bitshift | 10 On the one hand it filters out invalid operators like the bitshift |
11 operators we don't allow in templates. On the other hand it separates | 11 operators we don't allow in templates. On the other hand it separates |
12 template code and python code in expressions. | 12 template code and python code in expressions. |
13 | 13 |
14 :copyright: (c) 2010 by the Jinja Team. | 14 :copyright: (c) 2010 by the Jinja Team. |
15 :license: BSD, see LICENSE for more details. | 15 :license: BSD, see LICENSE for more details. |
16 """ | 16 """ |
17 import re | 17 import re |
| 18 |
18 from operator import itemgetter | 19 from operator import itemgetter |
19 from collections import deque | 20 from collections import deque |
20 from jinja2.exceptions import TemplateSyntaxError | 21 from jinja2.exceptions import TemplateSyntaxError |
21 from jinja2.utils import LRUCache, next | 22 from jinja2.utils import LRUCache |
| 23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \ |
| 24 intern |
22 | 25 |
23 | 26 |
24 # cache for the lexers. Exists in order to be able to have multiple | 27 # cache for the lexers. Exists in order to be able to have multiple |
25 # environments with the same lexer | 28 # environments with the same lexer |
26 _lexer_cache = LRUCache(50) | 29 _lexer_cache = LRUCache(50) |
27 | 30 |
28 # static regular expressions | 31 # static regular expressions |
29 whitespace_re = re.compile(r'\s+', re.U) | 32 whitespace_re = re.compile(r'\s+', re.U) |
30 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" | 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" |
31 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) | 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 '<': TOKEN_LT, | 122 '<': TOKEN_LT, |
120 '<=': TOKEN_LTEQ, | 123 '<=': TOKEN_LTEQ, |
121 '=': TOKEN_ASSIGN, | 124 '=': TOKEN_ASSIGN, |
122 '.': TOKEN_DOT, | 125 '.': TOKEN_DOT, |
123 ':': TOKEN_COLON, | 126 ':': TOKEN_COLON, |
124 '|': TOKEN_PIPE, | 127 '|': TOKEN_PIPE, |
125 ',': TOKEN_COMMA, | 128 ',': TOKEN_COMMA, |
126 ';': TOKEN_SEMICOLON | 129 ';': TOKEN_SEMICOLON |
127 } | 130 } |
128 | 131 |
129 reverse_operators = dict([(v, k) for k, v in operators.iteritems()]) | 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) |
130 assert len(operators) == len(reverse_operators), 'operators dropped' | 133 assert len(operators) == len(reverse_operators), 'operators dropped' |
131 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in | 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in |
132 sorted(operators, key=lambda x: -len(x)))) | 135 sorted(operators, key=lambda x: -len(x)))) |
133 | 136 |
134 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, | 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, |
135 TOKEN_COMMENT_END, TOKEN_WHITESPACE, | 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, |
136 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, | 139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, |
137 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) | 140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) |
138 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, | 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, |
139 TOKEN_COMMENT, TOKEN_LINECOMMENT]) | 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
190 (len(environment.comment_start_string), 'comment', | 193 (len(environment.comment_start_string), 'comment', |
191 e(environment.comment_start_string)), | 194 e(environment.comment_start_string)), |
192 (len(environment.block_start_string), 'block', | 195 (len(environment.block_start_string), 'block', |
193 e(environment.block_start_string)), | 196 e(environment.block_start_string)), |
194 (len(environment.variable_start_string), 'variable', | 197 (len(environment.variable_start_string), 'variable', |
195 e(environment.variable_start_string)) | 198 e(environment.variable_start_string)) |
196 ] | 199 ] |
197 | 200 |
198 if environment.line_statement_prefix is not None: | 201 if environment.line_statement_prefix is not None: |
199 rules.append((len(environment.line_statement_prefix), 'linestatement', | 202 rules.append((len(environment.line_statement_prefix), 'linestatement', |
200 r'^\s*' + e(environment.line_statement_prefix))) | 203 r'^[ \t\v]*' + e(environment.line_statement_prefix))) |
201 if environment.line_comment_prefix is not None: | 204 if environment.line_comment_prefix is not None: |
202 rules.append((len(environment.line_comment_prefix), 'linecomment', | 205 rules.append((len(environment.line_comment_prefix), 'linecomment', |
203 r'(?:^|(?<=\S))[^\S\r\n]*' + | 206 r'(?:^|(?<=\S))[^\S\r\n]*' + |
204 e(environment.line_comment_prefix))) | 207 e(environment.line_comment_prefix))) |
205 | 208 |
206 return [x[1:] for x in sorted(rules, reverse=True)] | 209 return [x[1:] for x in sorted(rules, reverse=True)] |
207 | 210 |
208 | 211 |
209 class Failure(object): | 212 class Failure(object): |
210 """Class that raises a `TemplateSyntaxError` if called. | 213 """Class that raises a `TemplateSyntaxError` if called. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
255 return False | 258 return False |
256 | 259 |
257 def __repr__(self): | 260 def __repr__(self): |
258 return 'Token(%r, %r, %r)' % ( | 261 return 'Token(%r, %r, %r)' % ( |
259 self.lineno, | 262 self.lineno, |
260 self.type, | 263 self.type, |
261 self.value | 264 self.value |
262 ) | 265 ) |
263 | 266 |
264 | 267 |
| 268 @implements_iterator |
265 class TokenStreamIterator(object): | 269 class TokenStreamIterator(object): |
266 """The iterator for tokenstreams. Iterate over the stream | 270 """The iterator for tokenstreams. Iterate over the stream |
267 until the eof token is reached. | 271 until the eof token is reached. |
268 """ | 272 """ |
269 | 273 |
270 def __init__(self, stream): | 274 def __init__(self, stream): |
271 self.stream = stream | 275 self.stream = stream |
272 | 276 |
273 def __iter__(self): | 277 def __iter__(self): |
274 return self | 278 return self |
275 | 279 |
276 def next(self): | 280 def __next__(self): |
277 token = self.stream.current | 281 token = self.stream.current |
278 if token.type is TOKEN_EOF: | 282 if token.type is TOKEN_EOF: |
279 self.stream.close() | 283 self.stream.close() |
280 raise StopIteration() | 284 raise StopIteration() |
281 next(self.stream) | 285 next(self.stream) |
282 return token | 286 return token |
283 | 287 |
284 | 288 |
| 289 @implements_iterator |
285 class TokenStream(object): | 290 class TokenStream(object): |
286 """A token stream is an iterable that yields :class:`Token`\s. The | 291 """A token stream is an iterable that yields :class:`Token`\s. The |
287 parser however does not iterate over it but calls :meth:`next` to go | 292 parser however does not iterate over it but calls :meth:`next` to go |
288 one token ahead. The current active token is stored as :attr:`current`. | 293 one token ahead. The current active token is stored as :attr:`current`. |
289 """ | 294 """ |
290 | 295 |
291 def __init__(self, generator, name, filename): | 296 def __init__(self, generator, name, filename): |
292 self._next = iter(generator).next | 297 self._iter = iter(generator) |
293 self._pushed = deque() | 298 self._pushed = deque() |
294 self.name = name | 299 self.name = name |
295 self.filename = filename | 300 self.filename = filename |
296 self.closed = False | 301 self.closed = False |
297 self.current = Token(1, TOKEN_INITIAL, '') | 302 self.current = Token(1, TOKEN_INITIAL, '') |
298 next(self) | 303 next(self) |
299 | 304 |
300 def __iter__(self): | 305 def __iter__(self): |
301 return TokenStreamIterator(self) | 306 return TokenStreamIterator(self) |
302 | 307 |
303 def __nonzero__(self): | 308 def __bool__(self): |
304 return bool(self._pushed) or self.current.type is not TOKEN_EOF | 309 return bool(self._pushed) or self.current.type is not TOKEN_EOF |
| 310 __nonzero__ = __bool__ # py2 |
305 | 311 |
306 eos = property(lambda x: not x, doc="Are we at the end of the stream?") | 312 eos = property(lambda x: not x, doc="Are we at the end of the stream?") |
307 | 313 |
308 def push(self, token): | 314 def push(self, token): |
309 """Push a token back to the stream.""" | 315 """Push a token back to the stream.""" |
310 self._pushed.append(token) | 316 self._pushed.append(token) |
311 | 317 |
312 def look(self): | 318 def look(self): |
313 """Look at the next token.""" | 319 """Look at the next token.""" |
314 old_token = next(self) | 320 old_token = next(self) |
315 result = self.current | 321 result = self.current |
316 self.push(result) | 322 self.push(result) |
317 self.current = old_token | 323 self.current = old_token |
318 return result | 324 return result |
319 | 325 |
320 def skip(self, n=1): | 326 def skip(self, n=1): |
321 """Got n tokens ahead.""" | 327 """Got n tokens ahead.""" |
322 for x in xrange(n): | 328 for x in range(n): |
323 next(self) | 329 next(self) |
324 | 330 |
325 def next_if(self, expr): | 331 def next_if(self, expr): |
326 """Perform the token test and return the token if it matched. | 332 """Perform the token test and return the token if it matched. |
327 Otherwise the return value is `None`. | 333 Otherwise the return value is `None`. |
328 """ | 334 """ |
329 if self.current.test(expr): | 335 if self.current.test(expr): |
330 return next(self) | 336 return next(self) |
331 | 337 |
332 def skip_if(self, expr): | 338 def skip_if(self, expr): |
333 """Like :meth:`next_if` but only returns `True` or `False`.""" | 339 """Like :meth:`next_if` but only returns `True` or `False`.""" |
334 return self.next_if(expr) is not None | 340 return self.next_if(expr) is not None |
335 | 341 |
336 def next(self): | 342 def __next__(self): |
337 """Go one token ahead and return the old one""" | 343 """Go one token ahead and return the old one""" |
338 rv = self.current | 344 rv = self.current |
339 if self._pushed: | 345 if self._pushed: |
340 self.current = self._pushed.popleft() | 346 self.current = self._pushed.popleft() |
341 elif self.current.type is not TOKEN_EOF: | 347 elif self.current.type is not TOKEN_EOF: |
342 try: | 348 try: |
343 self.current = self._next() | 349 self.current = next(self._iter) |
344 except StopIteration: | 350 except StopIteration: |
345 self.close() | 351 self.close() |
346 return rv | 352 return rv |
347 | 353 |
348 def close(self): | 354 def close(self): |
349 """Close the stream.""" | 355 """Close the stream.""" |
350 self.current = Token(self.current.lineno, TOKEN_EOF, '') | 356 self.current = Token(self.current.lineno, TOKEN_EOF, '') |
351 self._next = None | 357 self._iter = None |
352 self.closed = True | 358 self.closed = True |
353 | 359 |
354 def expect(self, expr): | 360 def expect(self, expr): |
355 """Expect a given token type and return it. This accepts the same | 361 """Expect a given token type and return it. This accepts the same |
356 argument as :meth:`jinja2.lexer.Token.test`. | 362 argument as :meth:`jinja2.lexer.Token.test`. |
357 """ | 363 """ |
358 if not self.current.test(expr): | 364 if not self.current.test(expr): |
359 expr = describe_token_expr(expr) | 365 expr = describe_token_expr(expr) |
360 if self.current.type is TOKEN_EOF: | 366 if self.current.type is TOKEN_EOF: |
361 raise TemplateSyntaxError('unexpected end of template, ' | 367 raise TemplateSyntaxError('unexpected end of template, ' |
(...skipping 14 matching lines...) Expand all Loading... |
376 """Return a lexer which is probably cached.""" | 382 """Return a lexer which is probably cached.""" |
377 key = (environment.block_start_string, | 383 key = (environment.block_start_string, |
378 environment.block_end_string, | 384 environment.block_end_string, |
379 environment.variable_start_string, | 385 environment.variable_start_string, |
380 environment.variable_end_string, | 386 environment.variable_end_string, |
381 environment.comment_start_string, | 387 environment.comment_start_string, |
382 environment.comment_end_string, | 388 environment.comment_end_string, |
383 environment.line_statement_prefix, | 389 environment.line_statement_prefix, |
384 environment.line_comment_prefix, | 390 environment.line_comment_prefix, |
385 environment.trim_blocks, | 391 environment.trim_blocks, |
386 environment.newline_sequence) | 392 environment.lstrip_blocks, |
| 393 environment.newline_sequence, |
| 394 environment.keep_trailing_newline) |
387 lexer = _lexer_cache.get(key) | 395 lexer = _lexer_cache.get(key) |
388 if lexer is None: | 396 if lexer is None: |
389 lexer = Lexer(environment) | 397 lexer = Lexer(environment) |
390 _lexer_cache[key] = lexer | 398 _lexer_cache[key] = lexer |
391 return lexer | 399 return lexer |
392 | 400 |
393 | 401 |
394 class Lexer(object): | 402 class Lexer(object): |
395 """Class that implements a lexer for a given environment. Automatically | 403 """Class that implements a lexer for a given environment. Automatically |
396 created by the environment class, usually you don't have to do that. | 404 created by the environment class, usually you don't have to do that. |
(...skipping 10 matching lines...) Expand all Loading... |
407 # lexing rules for tags | 415 # lexing rules for tags |
408 tag_rules = [ | 416 tag_rules = [ |
409 (whitespace_re, TOKEN_WHITESPACE, None), | 417 (whitespace_re, TOKEN_WHITESPACE, None), |
410 (float_re, TOKEN_FLOAT, None), | 418 (float_re, TOKEN_FLOAT, None), |
411 (integer_re, TOKEN_INTEGER, None), | 419 (integer_re, TOKEN_INTEGER, None), |
412 (name_re, TOKEN_NAME, None), | 420 (name_re, TOKEN_NAME, None), |
413 (string_re, TOKEN_STRING, None), | 421 (string_re, TOKEN_STRING, None), |
414 (operator_re, TOKEN_OPERATOR, None) | 422 (operator_re, TOKEN_OPERATOR, None) |
415 ] | 423 ] |
416 | 424 |
417 # assamble the root lexing rule. because "|" is ungreedy | 425 # assemble the root lexing rule. because "|" is ungreedy |
418 # we have to sort by length so that the lexer continues working | 426 # we have to sort by length so that the lexer continues working |
419 # as expected when we have parsing rules like <% for block and | 427 # as expected when we have parsing rules like <% for block and |
420 # <%= for variables. (if someone wants asp like syntax) | 428 # <%= for variables. (if someone wants asp like syntax) |
421 # variables are just part of the rules if variable processing | 429 # variables are just part of the rules if variable processing |
422 # is required. | 430 # is required. |
423 root_tag_rules = compile_rules(environment) | 431 root_tag_rules = compile_rules(environment) |
424 | 432 |
425 # block suffix if trimming is enabled | 433 # block suffix if trimming is enabled |
426 block_suffix_re = environment.trim_blocks and '\\n?' or '' | 434 block_suffix_re = environment.trim_blocks and '\\n?' or '' |
427 | 435 |
| 436 # strip leading spaces if lstrip_blocks is enabled |
| 437 prefix_re = {} |
| 438 if environment.lstrip_blocks: |
| 439 # use '{%+' to manually disable lstrip_blocks behavior |
| 440 no_lstrip_re = e('+') |
| 441 # detect overlap between block and variable or comment strings |
| 442 block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) |
| 443 # make sure we don't mistake a block for a variable or a comment |
| 444 m = block_diff.match(environment.comment_start_string) |
| 445 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
| 446 m = block_diff.match(environment.variable_start_string) |
| 447 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
| 448 |
| 449 # detect overlap between comment and variable strings |
| 450 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) |
| 451 m = comment_diff.match(environment.variable_start_string) |
| 452 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' |
| 453 |
| 454 lstrip_re = r'^[ \t]*' |
| 455 block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( |
| 456 lstrip_re, |
| 457 e(environment.block_start_string), |
| 458 no_lstrip_re, |
| 459 e(environment.block_start_string), |
| 460 ) |
| 461 comment_prefix_re = r'%s%s%s|%s\+?' % ( |
| 462 lstrip_re, |
| 463 e(environment.comment_start_string), |
| 464 no_variable_re, |
| 465 e(environment.comment_start_string), |
| 466 ) |
| 467 prefix_re['block'] = block_prefix_re |
| 468 prefix_re['comment'] = comment_prefix_re |
| 469 else: |
| 470 block_prefix_re = '%s' % e(environment.block_start_string) |
| 471 |
428 self.newline_sequence = environment.newline_sequence | 472 self.newline_sequence = environment.newline_sequence |
| 473 self.keep_trailing_newline = environment.keep_trailing_newline |
429 | 474 |
430 # global lexing rules | 475 # global lexing rules |
431 self.rules = { | 476 self.rules = { |
432 'root': [ | 477 'root': [ |
433 # directives | 478 # directives |
434 (c('(.*?)(?:%s)' % '|'.join( | 479 (c('(.*?)(?:%s)' % '|'.join( |
435 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( | 480 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( |
436 e(environment.block_start_string), | 481 e(environment.block_start_string), |
437 e(environment.block_start_string), | 482 block_prefix_re, |
438 e(environment.block_end_string), | 483 e(environment.block_end_string), |
439 e(environment.block_end_string) | 484 e(environment.block_end_string) |
440 )] + [ | 485 )] + [ |
441 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r) | 486 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) |
442 for n, r in root_tag_rules | 487 for n, r in root_tag_rules |
443 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), | 488 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), |
444 # data | 489 # data |
445 (c('.+'), TOKEN_DATA, None) | 490 (c('.+'), TOKEN_DATA, None) |
446 ], | 491 ], |
447 # comments | 492 # comments |
448 TOKEN_COMMENT_BEGIN: [ | 493 TOKEN_COMMENT_BEGIN: [ |
449 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( | 494 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( |
450 e(environment.comment_end_string), | 495 e(environment.comment_end_string), |
451 e(environment.comment_end_string), | 496 e(environment.comment_end_string), |
(...skipping 13 matching lines...) Expand all Loading... |
465 TOKEN_VARIABLE_BEGIN: [ | 510 TOKEN_VARIABLE_BEGIN: [ |
466 (c('\-%s\s*|%s' % ( | 511 (c('\-%s\s*|%s' % ( |
467 e(environment.variable_end_string), | 512 e(environment.variable_end_string), |
468 e(environment.variable_end_string) | 513 e(environment.variable_end_string) |
469 )), TOKEN_VARIABLE_END, '#pop') | 514 )), TOKEN_VARIABLE_END, '#pop') |
470 ] + tag_rules, | 515 ] + tag_rules, |
471 # raw block | 516 # raw block |
472 TOKEN_RAW_BEGIN: [ | 517 TOKEN_RAW_BEGIN: [ |
473 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( | 518 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( |
474 e(environment.block_start_string), | 519 e(environment.block_start_string), |
475 e(environment.block_start_string), | 520 block_prefix_re, |
476 e(environment.block_end_string), | 521 e(environment.block_end_string), |
477 e(environment.block_end_string), | 522 e(environment.block_end_string), |
478 block_suffix_re | 523 block_suffix_re |
479 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), | 524 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), |
480 (c('(.)'), (Failure('Missing end of raw directive'),), None) | 525 (c('(.)'), (Failure('Missing end of raw directive'),), None) |
481 ], | 526 ], |
482 # line statements | 527 # line statements |
483 TOKEN_LINESTATEMENT_BEGIN: [ | 528 TOKEN_LINESTATEMENT_BEGIN: [ |
484 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') | 529 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') |
485 ] + tag_rules, | 530 ] + tag_rules, |
486 # line comments | 531 # line comments |
487 TOKEN_LINECOMMENT_BEGIN: [ | 532 TOKEN_LINECOMMENT_BEGIN: [ |
488 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, | 533 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, |
489 TOKEN_LINECOMMENT_END), '#pop') | 534 TOKEN_LINECOMMENT_END), '#pop') |
490 ] | 535 ] |
491 } | 536 } |
492 | 537 |
493 def _normalize_newlines(self, value): | 538 def _normalize_newlines(self, value): |
494 """Called for strings and template data to normlize it to unicode.""" | 539 """Called for strings and template data to normalize it to unicode.""" |
495 return newline_re.sub(self.newline_sequence, value) | 540 return newline_re.sub(self.newline_sequence, value) |
496 | 541 |
497 def tokenize(self, source, name=None, filename=None, state=None): | 542 def tokenize(self, source, name=None, filename=None, state=None): |
498 """Calls tokeniter + tokenize and wraps it in a token stream. | 543 """Calls tokeniter + tokenize and wraps it in a token stream. |
499 """ | 544 """ |
500 stream = self.tokeniter(source, name, filename, state) | 545 stream = self.tokeniter(source, name, filename, state) |
501 return TokenStream(self.wrap(stream, name, filename), name, filename) | 546 return TokenStream(self.wrap(stream, name, filename), name, filename) |
502 | 547 |
503 def wrap(self, stream, name=None, filename=None): | 548 def wrap(self, stream, name=None, filename=None): |
504 """This is called with the stream as returned by `tokenize` and wraps | 549 """This is called with the stream as returned by `tokenize` and wraps |
(...skipping 14 matching lines...) Expand all Loading... |
519 elif token == 'keyword': | 564 elif token == 'keyword': |
520 token = value | 565 token = value |
521 elif token == 'name': | 566 elif token == 'name': |
522 value = str(value) | 567 value = str(value) |
523 elif token == 'string': | 568 elif token == 'string': |
524 # try to unescape string | 569 # try to unescape string |
525 try: | 570 try: |
526 value = self._normalize_newlines(value[1:-1]) \ | 571 value = self._normalize_newlines(value[1:-1]) \ |
527 .encode('ascii', 'backslashreplace') \ | 572 .encode('ascii', 'backslashreplace') \ |
528 .decode('unicode-escape') | 573 .decode('unicode-escape') |
529 except Exception, e: | 574 except Exception as e: |
530 msg = str(e).split(':')[-1].strip() | 575 msg = str(e).split(':')[-1].strip() |
531 raise TemplateSyntaxError(msg, lineno, name, filename) | 576 raise TemplateSyntaxError(msg, lineno, name, filename) |
532 # if we can express it as bytestring (ascii only) | 577 # if we can express it as bytestring (ascii only) |
533 # we do that for support of semi broken APIs | 578 # we do that for support of semi broken APIs |
534 # as datetime.datetime.strftime. On python 3 this | 579 # as datetime.datetime.strftime. On python 3 this |
535 # call becomes a noop thanks to 2to3 | 580 # call becomes a noop thanks to 2to3 |
536 try: | 581 try: |
537 value = str(value) | 582 value = str(value) |
538 except UnicodeError: | 583 except UnicodeError: |
539 pass | 584 pass |
540 elif token == 'integer': | 585 elif token == 'integer': |
541 value = int(value) | 586 value = int(value) |
542 elif token == 'float': | 587 elif token == 'float': |
543 value = float(value) | 588 value = float(value) |
544 elif token == 'operator': | 589 elif token == 'operator': |
545 token = operators[value] | 590 token = operators[value] |
546 yield Token(lineno, token, value) | 591 yield Token(lineno, token, value) |
547 | 592 |
548 def tokeniter(self, source, name, filename=None, state=None): | 593 def tokeniter(self, source, name, filename=None, state=None): |
549 """This method tokenizes the text and returns the tokens in a | 594 """This method tokenizes the text and returns the tokens in a |
550 generator. Use this method if you just want to tokenize a template. | 595 generator. Use this method if you just want to tokenize a template. |
551 """ | 596 """ |
552 source = '\n'.join(unicode(source).splitlines()) | 597 source = text_type(source) |
| 598 lines = source.splitlines() |
| 599 if self.keep_trailing_newline and source: |
| 600 for newline in ('\r\n', '\r', '\n'): |
| 601 if source.endswith(newline): |
| 602 lines.append('') |
| 603 break |
| 604 source = '\n'.join(lines) |
553 pos = 0 | 605 pos = 0 |
554 lineno = 1 | 606 lineno = 1 |
555 stack = ['root'] | 607 stack = ['root'] |
556 if state is not None and state != 'root': | 608 if state is not None and state != 'root': |
557 assert state in ('variable', 'block'), 'invalid state' | 609 assert state in ('variable', 'block'), 'invalid state' |
558 stack.append(state + '_begin') | 610 stack.append(state + '_begin') |
559 else: | 611 else: |
560 state = 'root' | 612 state = 'root' |
561 statetokens = self.rules[stack[-1]] | 613 statetokens = self.rules[stack[-1]] |
562 source_length = len(source) | 614 source_length = len(source) |
563 | 615 |
564 balancing_stack = [] | 616 balancing_stack = [] |
565 | 617 |
566 while 1: | 618 while 1: |
567 # tokenizer loop | 619 # tokenizer loop |
568 for regex, tokens, new_state in statetokens: | 620 for regex, tokens, new_state in statetokens: |
569 m = regex.match(source, pos) | 621 m = regex.match(source, pos) |
570 # if no match we try again with the next rule | 622 # if no match we try again with the next rule |
571 if m is None: | 623 if m is None: |
572 continue | 624 continue |
573 | 625 |
574 # we only match blocks and variables if brances / parentheses | 626 # we only match blocks and variables if braces / parentheses |
575 # are balanced. continue parsing with the lower rule which | 627 # are balanced. continue parsing with the lower rule which |
576 # is the operator rule. do this only if the end tags look | 628 # is the operator rule. do this only if the end tags look |
577 # like operators | 629 # like operators |
578 if balancing_stack and \ | 630 if balancing_stack and \ |
579 tokens in ('variable_end', 'block_end', | 631 tokens in ('variable_end', 'block_end', |
580 'linestatement_end'): | 632 'linestatement_end'): |
581 continue | 633 continue |
582 | 634 |
583 # tuples support more options | 635 # tuples support more options |
584 if isinstance(tokens, tuple): | 636 if isinstance(tokens, tuple): |
585 for idx, token in enumerate(tokens): | 637 for idx, token in enumerate(tokens): |
586 # failure group | 638 # failure group |
587 if token.__class__ is Failure: | 639 if token.__class__ is Failure: |
588 raise token(lineno, filename) | 640 raise token(lineno, filename) |
589 # bygroup is a bit more complex, in that case we | 641 # bygroup is a bit more complex, in that case we |
590 # yield for the current token the first named | 642 # yield for the current token the first named |
591 # group that matched | 643 # group that matched |
592 elif token == '#bygroup': | 644 elif token == '#bygroup': |
593 for key, value in m.groupdict().iteritems(): | 645 for key, value in iteritems(m.groupdict()): |
594 if value is not None: | 646 if value is not None: |
595 yield lineno, key, value | 647 yield lineno, key, value |
596 lineno += value.count('\n') | 648 lineno += value.count('\n') |
597 break | 649 break |
598 else: | 650 else: |
599 raise RuntimeError('%r wanted to resolve ' | 651 raise RuntimeError('%r wanted to resolve ' |
600 'the token dynamically' | 652 'the token dynamically' |
601 ' but no group matched' | 653 ' but no group matched' |
602 % regex) | 654 % regex) |
603 # normal group | 655 # normal group |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
640 # in an infinite loop | 692 # in an infinite loop |
641 pos2 = m.end() | 693 pos2 = m.end() |
642 | 694 |
643 # handle state changes | 695 # handle state changes |
644 if new_state is not None: | 696 if new_state is not None: |
645 # remove the uppermost state | 697 # remove the uppermost state |
646 if new_state == '#pop': | 698 if new_state == '#pop': |
647 stack.pop() | 699 stack.pop() |
648 # resolve the new state by group checking | 700 # resolve the new state by group checking |
649 elif new_state == '#bygroup': | 701 elif new_state == '#bygroup': |
650 for key, value in m.groupdict().iteritems(): | 702 for key, value in iteritems(m.groupdict()): |
651 if value is not None: | 703 if value is not None: |
652 stack.append(key) | 704 stack.append(key) |
653 break | 705 break |
654 else: | 706 else: |
655 raise RuntimeError('%r wanted to resolve the ' | 707 raise RuntimeError('%r wanted to resolve the ' |
656 'new state dynamically but' | 708 'new state dynamically but' |
657 ' no group matched' % | 709 ' no group matched' % |
658 regex) | 710 regex) |
659 # direct state name given | 711 # direct state name given |
660 else: | 712 else: |
661 stack.append(new_state) | 713 stack.append(new_state) |
662 statetokens = self.rules[stack[-1]] | 714 statetokens = self.rules[stack[-1]] |
663 # we are still at the same position and no stack change. | 715 # we are still at the same position and no stack change. |
664 # this means a loop without break condition, avoid that and | 716 # this means a loop without break condition, avoid that and |
665 # raise error | 717 # raise error |
666 elif pos2 == pos: | 718 elif pos2 == pos: |
667 raise RuntimeError('%r yielded empty string without ' | 719 raise RuntimeError('%r yielded empty string without ' |
668 'stack change' % regex) | 720 'stack change' % regex) |
669 # publish new function and start again | 721 # publish new function and start again |
670 pos = pos2 | 722 pos = pos2 |
671 break | 723 break |
672 # if loop terminated without break we havn't found a single match | 724 # if loop terminated without break we haven't found a single match |
673 # either we are at the end of the file or we have a problem | 725 # either we are at the end of the file or we have a problem |
674 else: | 726 else: |
675 # end of text | 727 # end of text |
676 if pos >= source_length: | 728 if pos >= source_length: |
677 return | 729 return |
678 # something went wrong | 730 # something went wrong |
679 raise TemplateSyntaxError('unexpected char %r at %d' % | 731 raise TemplateSyntaxError('unexpected char %r at %d' % |
680 (source[pos], pos), lineno, | 732 (source[pos], pos), lineno, |
681 name, filename) | 733 name, filename) |
OLD | NEW |