| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 """ | 2 """ |
| 3 jinja2.lexer | 3 jinja2.lexer |
| 4 ~~~~~~~~~~~~ | 4 ~~~~~~~~~~~~ |
| 5 | 5 |
| 6 This module implements a Jinja / Python combination lexer. The | 6 This module implements a Jinja / Python combination lexer. The |
| 7 `Lexer` class provided by this module is used to do some preprocessing | 7 `Lexer` class provided by this module is used to do some preprocessing |
| 8 for Jinja. | 8 for Jinja. |
| 9 | 9 |
| 10 On the one hand it filters out invalid operators like the bitshift | 10 On the one hand it filters out invalid operators like the bitshift |
| 11 operators we don't allow in templates. On the other hand it separates | 11 operators we don't allow in templates. On the other hand it separates |
| 12 template code and python code in expressions. | 12 template code and python code in expressions. |
| 13 | 13 |
| 14 :copyright: (c) 2010 by the Jinja Team. | 14 :copyright: (c) 2010 by the Jinja Team. |
| 15 :license: BSD, see LICENSE for more details. | 15 :license: BSD, see LICENSE for more details. |
| 16 """ | 16 """ |
| 17 import re | 17 import re |
| 18 |
| 18 from operator import itemgetter | 19 from operator import itemgetter |
| 19 from collections import deque | 20 from collections import deque |
| 20 from jinja2.exceptions import TemplateSyntaxError | 21 from jinja2.exceptions import TemplateSyntaxError |
| 21 from jinja2.utils import LRUCache, next | 22 from jinja2.utils import LRUCache |
| 23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \ |
| 24 intern |
| 22 | 25 |
| 23 | 26 |
| 24 # cache for the lexers. Exists in order to be able to have multiple | 27 # cache for the lexers. Exists in order to be able to have multiple |
| 25 # environments with the same lexer | 28 # environments with the same lexer |
| 26 _lexer_cache = LRUCache(50) | 29 _lexer_cache = LRUCache(50) |
| 27 | 30 |
| 28 # static regular expressions | 31 # static regular expressions |
| 29 whitespace_re = re.compile(r'\s+', re.U) | 32 whitespace_re = re.compile(r'\s+', re.U) |
| 30 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" | 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" |
| 31 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) | 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 119 '<': TOKEN_LT, | 122 '<': TOKEN_LT, |
| 120 '<=': TOKEN_LTEQ, | 123 '<=': TOKEN_LTEQ, |
| 121 '=': TOKEN_ASSIGN, | 124 '=': TOKEN_ASSIGN, |
| 122 '.': TOKEN_DOT, | 125 '.': TOKEN_DOT, |
| 123 ':': TOKEN_COLON, | 126 ':': TOKEN_COLON, |
| 124 '|': TOKEN_PIPE, | 127 '|': TOKEN_PIPE, |
| 125 ',': TOKEN_COMMA, | 128 ',': TOKEN_COMMA, |
| 126 ';': TOKEN_SEMICOLON | 129 ';': TOKEN_SEMICOLON |
| 127 } | 130 } |
| 128 | 131 |
| 129 reverse_operators = dict([(v, k) for k, v in operators.iteritems()]) | 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) |
| 130 assert len(operators) == len(reverse_operators), 'operators dropped' | 133 assert len(operators) == len(reverse_operators), 'operators dropped' |
| 131 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in | 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in |
| 132 sorted(operators, key=lambda x: -len(x)))) | 135 sorted(operators, key=lambda x: -len(x)))) |
| 133 | 136 |
| 134 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, | 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, |
| 135 TOKEN_COMMENT_END, TOKEN_WHITESPACE, | 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, |
| 136 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, | 139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, |
| 137 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) | 140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) |
| 138 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, | 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, |
| 139 TOKEN_COMMENT, TOKEN_LINECOMMENT]) | 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 190 (len(environment.comment_start_string), 'comment', | 193 (len(environment.comment_start_string), 'comment', |
| 191 e(environment.comment_start_string)), | 194 e(environment.comment_start_string)), |
| 192 (len(environment.block_start_string), 'block', | 195 (len(environment.block_start_string), 'block', |
| 193 e(environment.block_start_string)), | 196 e(environment.block_start_string)), |
| 194 (len(environment.variable_start_string), 'variable', | 197 (len(environment.variable_start_string), 'variable', |
| 195 e(environment.variable_start_string)) | 198 e(environment.variable_start_string)) |
| 196 ] | 199 ] |
| 197 | 200 |
| 198 if environment.line_statement_prefix is not None: | 201 if environment.line_statement_prefix is not None: |
| 199 rules.append((len(environment.line_statement_prefix), 'linestatement', | 202 rules.append((len(environment.line_statement_prefix), 'linestatement', |
| 200 r'^\s*' + e(environment.line_statement_prefix))) | 203 r'^[ \t\v]*' + e(environment.line_statement_prefix))) |
| 201 if environment.line_comment_prefix is not None: | 204 if environment.line_comment_prefix is not None: |
| 202 rules.append((len(environment.line_comment_prefix), 'linecomment', | 205 rules.append((len(environment.line_comment_prefix), 'linecomment', |
| 203 r'(?:^|(?<=\S))[^\S\r\n]*' + | 206 r'(?:^|(?<=\S))[^\S\r\n]*' + |
| 204 e(environment.line_comment_prefix))) | 207 e(environment.line_comment_prefix))) |
| 205 | 208 |
| 206 return [x[1:] for x in sorted(rules, reverse=True)] | 209 return [x[1:] for x in sorted(rules, reverse=True)] |
| 207 | 210 |
| 208 | 211 |
| 209 class Failure(object): | 212 class Failure(object): |
| 210 """Class that raises a `TemplateSyntaxError` if called. | 213 """Class that raises a `TemplateSyntaxError` if called. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 255 return False | 258 return False |
| 256 | 259 |
| 257 def __repr__(self): | 260 def __repr__(self): |
| 258 return 'Token(%r, %r, %r)' % ( | 261 return 'Token(%r, %r, %r)' % ( |
| 259 self.lineno, | 262 self.lineno, |
| 260 self.type, | 263 self.type, |
| 261 self.value | 264 self.value |
| 262 ) | 265 ) |
| 263 | 266 |
| 264 | 267 |
| 268 @implements_iterator |
| 265 class TokenStreamIterator(object): | 269 class TokenStreamIterator(object): |
| 266 """The iterator for tokenstreams. Iterate over the stream | 270 """The iterator for tokenstreams. Iterate over the stream |
| 267 until the eof token is reached. | 271 until the eof token is reached. |
| 268 """ | 272 """ |
| 269 | 273 |
| 270 def __init__(self, stream): | 274 def __init__(self, stream): |
| 271 self.stream = stream | 275 self.stream = stream |
| 272 | 276 |
| 273 def __iter__(self): | 277 def __iter__(self): |
| 274 return self | 278 return self |
| 275 | 279 |
| 276 def next(self): | 280 def __next__(self): |
| 277 token = self.stream.current | 281 token = self.stream.current |
| 278 if token.type is TOKEN_EOF: | 282 if token.type is TOKEN_EOF: |
| 279 self.stream.close() | 283 self.stream.close() |
| 280 raise StopIteration() | 284 raise StopIteration() |
| 281 next(self.stream) | 285 next(self.stream) |
| 282 return token | 286 return token |
| 283 | 287 |
| 284 | 288 |
| 289 @implements_iterator |
| 285 class TokenStream(object): | 290 class TokenStream(object): |
| 286 """A token stream is an iterable that yields :class:`Token`\s. The | 291 """A token stream is an iterable that yields :class:`Token`\s. The |
| 287 parser however does not iterate over it but calls :meth:`next` to go | 292 parser however does not iterate over it but calls :meth:`next` to go |
| 288 one token ahead. The current active token is stored as :attr:`current`. | 293 one token ahead. The current active token is stored as :attr:`current`. |
| 289 """ | 294 """ |
| 290 | 295 |
| 291 def __init__(self, generator, name, filename): | 296 def __init__(self, generator, name, filename): |
| 292 self._next = iter(generator).next | 297 self._iter = iter(generator) |
| 293 self._pushed = deque() | 298 self._pushed = deque() |
| 294 self.name = name | 299 self.name = name |
| 295 self.filename = filename | 300 self.filename = filename |
| 296 self.closed = False | 301 self.closed = False |
| 297 self.current = Token(1, TOKEN_INITIAL, '') | 302 self.current = Token(1, TOKEN_INITIAL, '') |
| 298 next(self) | 303 next(self) |
| 299 | 304 |
| 300 def __iter__(self): | 305 def __iter__(self): |
| 301 return TokenStreamIterator(self) | 306 return TokenStreamIterator(self) |
| 302 | 307 |
| 303 def __nonzero__(self): | 308 def __bool__(self): |
| 304 return bool(self._pushed) or self.current.type is not TOKEN_EOF | 309 return bool(self._pushed) or self.current.type is not TOKEN_EOF |
| 310 __nonzero__ = __bool__ # py2 |
| 305 | 311 |
| 306 eos = property(lambda x: not x, doc="Are we at the end of the stream?") | 312 eos = property(lambda x: not x, doc="Are we at the end of the stream?") |
| 307 | 313 |
| 308 def push(self, token): | 314 def push(self, token): |
| 309 """Push a token back to the stream.""" | 315 """Push a token back to the stream.""" |
| 310 self._pushed.append(token) | 316 self._pushed.append(token) |
| 311 | 317 |
| 312 def look(self): | 318 def look(self): |
| 313 """Look at the next token.""" | 319 """Look at the next token.""" |
| 314 old_token = next(self) | 320 old_token = next(self) |
| 315 result = self.current | 321 result = self.current |
| 316 self.push(result) | 322 self.push(result) |
| 317 self.current = old_token | 323 self.current = old_token |
| 318 return result | 324 return result |
| 319 | 325 |
| 320 def skip(self, n=1): | 326 def skip(self, n=1): |
| 321 """Got n tokens ahead.""" | 327 """Got n tokens ahead.""" |
| 322 for x in xrange(n): | 328 for x in range(n): |
| 323 next(self) | 329 next(self) |
| 324 | 330 |
| 325 def next_if(self, expr): | 331 def next_if(self, expr): |
| 326 """Perform the token test and return the token if it matched. | 332 """Perform the token test and return the token if it matched. |
| 327 Otherwise the return value is `None`. | 333 Otherwise the return value is `None`. |
| 328 """ | 334 """ |
| 329 if self.current.test(expr): | 335 if self.current.test(expr): |
| 330 return next(self) | 336 return next(self) |
| 331 | 337 |
| 332 def skip_if(self, expr): | 338 def skip_if(self, expr): |
| 333 """Like :meth:`next_if` but only returns `True` or `False`.""" | 339 """Like :meth:`next_if` but only returns `True` or `False`.""" |
| 334 return self.next_if(expr) is not None | 340 return self.next_if(expr) is not None |
| 335 | 341 |
| 336 def next(self): | 342 def __next__(self): |
| 337 """Go one token ahead and return the old one""" | 343 """Go one token ahead and return the old one""" |
| 338 rv = self.current | 344 rv = self.current |
| 339 if self._pushed: | 345 if self._pushed: |
| 340 self.current = self._pushed.popleft() | 346 self.current = self._pushed.popleft() |
| 341 elif self.current.type is not TOKEN_EOF: | 347 elif self.current.type is not TOKEN_EOF: |
| 342 try: | 348 try: |
| 343 self.current = self._next() | 349 self.current = next(self._iter) |
| 344 except StopIteration: | 350 except StopIteration: |
| 345 self.close() | 351 self.close() |
| 346 return rv | 352 return rv |
| 347 | 353 |
| 348 def close(self): | 354 def close(self): |
| 349 """Close the stream.""" | 355 """Close the stream.""" |
| 350 self.current = Token(self.current.lineno, TOKEN_EOF, '') | 356 self.current = Token(self.current.lineno, TOKEN_EOF, '') |
| 351 self._next = None | 357 self._iter = None |
| 352 self.closed = True | 358 self.closed = True |
| 353 | 359 |
| 354 def expect(self, expr): | 360 def expect(self, expr): |
| 355 """Expect a given token type and return it. This accepts the same | 361 """Expect a given token type and return it. This accepts the same |
| 356 argument as :meth:`jinja2.lexer.Token.test`. | 362 argument as :meth:`jinja2.lexer.Token.test`. |
| 357 """ | 363 """ |
| 358 if not self.current.test(expr): | 364 if not self.current.test(expr): |
| 359 expr = describe_token_expr(expr) | 365 expr = describe_token_expr(expr) |
| 360 if self.current.type is TOKEN_EOF: | 366 if self.current.type is TOKEN_EOF: |
| 361 raise TemplateSyntaxError('unexpected end of template, ' | 367 raise TemplateSyntaxError('unexpected end of template, ' |
| (...skipping 14 matching lines...) Expand all Loading... |
| 376 """Return a lexer which is probably cached.""" | 382 """Return a lexer which is probably cached.""" |
| 377 key = (environment.block_start_string, | 383 key = (environment.block_start_string, |
| 378 environment.block_end_string, | 384 environment.block_end_string, |
| 379 environment.variable_start_string, | 385 environment.variable_start_string, |
| 380 environment.variable_end_string, | 386 environment.variable_end_string, |
| 381 environment.comment_start_string, | 387 environment.comment_start_string, |
| 382 environment.comment_end_string, | 388 environment.comment_end_string, |
| 383 environment.line_statement_prefix, | 389 environment.line_statement_prefix, |
| 384 environment.line_comment_prefix, | 390 environment.line_comment_prefix, |
| 385 environment.trim_blocks, | 391 environment.trim_blocks, |
| 386 environment.newline_sequence) | 392 environment.lstrip_blocks, |
| 393 environment.newline_sequence, |
| 394 environment.keep_trailing_newline) |
| 387 lexer = _lexer_cache.get(key) | 395 lexer = _lexer_cache.get(key) |
| 388 if lexer is None: | 396 if lexer is None: |
| 389 lexer = Lexer(environment) | 397 lexer = Lexer(environment) |
| 390 _lexer_cache[key] = lexer | 398 _lexer_cache[key] = lexer |
| 391 return lexer | 399 return lexer |
| 392 | 400 |
| 393 | 401 |
| 394 class Lexer(object): | 402 class Lexer(object): |
| 395 """Class that implements a lexer for a given environment. Automatically | 403 """Class that implements a lexer for a given environment. Automatically |
| 396 created by the environment class, usually you don't have to do that. | 404 created by the environment class, usually you don't have to do that. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 407 # lexing rules for tags | 415 # lexing rules for tags |
| 408 tag_rules = [ | 416 tag_rules = [ |
| 409 (whitespace_re, TOKEN_WHITESPACE, None), | 417 (whitespace_re, TOKEN_WHITESPACE, None), |
| 410 (float_re, TOKEN_FLOAT, None), | 418 (float_re, TOKEN_FLOAT, None), |
| 411 (integer_re, TOKEN_INTEGER, None), | 419 (integer_re, TOKEN_INTEGER, None), |
| 412 (name_re, TOKEN_NAME, None), | 420 (name_re, TOKEN_NAME, None), |
| 413 (string_re, TOKEN_STRING, None), | 421 (string_re, TOKEN_STRING, None), |
| 414 (operator_re, TOKEN_OPERATOR, None) | 422 (operator_re, TOKEN_OPERATOR, None) |
| 415 ] | 423 ] |
| 416 | 424 |
| 417 # assamble the root lexing rule. because "|" is ungreedy | 425 # assemble the root lexing rule. because "|" is ungreedy |
| 418 # we have to sort by length so that the lexer continues working | 426 # we have to sort by length so that the lexer continues working |
| 419 # as expected when we have parsing rules like <% for block and | 427 # as expected when we have parsing rules like <% for block and |
| 420 # <%= for variables. (if someone wants asp like syntax) | 428 # <%= for variables. (if someone wants asp like syntax) |
| 421 # variables are just part of the rules if variable processing | 429 # variables are just part of the rules if variable processing |
| 422 # is required. | 430 # is required. |
| 423 root_tag_rules = compile_rules(environment) | 431 root_tag_rules = compile_rules(environment) |
| 424 | 432 |
| 425 # block suffix if trimming is enabled | 433 # block suffix if trimming is enabled |
| 426 block_suffix_re = environment.trim_blocks and '\\n?' or '' | 434 block_suffix_re = environment.trim_blocks and '\\n?' or '' |
| 427 | 435 |
| 436 # strip leading spaces if lstrip_blocks is enabled |
| 437 prefix_re = {} |
| 438 if environment.lstrip_blocks: |
| 439 # use '{%+' to manually disable lstrip_blocks behavior |
| 440 no_lstrip_re = e('+') |
| 441 # detect overlap between block and variable or comment strings |
| 442 block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) |
| 443 # make sure we don't mistake a block for a variable or a comment |
| 444 m = block_diff.match(environment.comment_start_string) |
| 445 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
| 446 m = block_diff.match(environment.variable_start_string) |
| 447 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
| 448 |
| 449 # detect overlap between comment and variable strings |
| 450 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) |
| 451 m = comment_diff.match(environment.variable_start_string) |
| 452 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' |
| 453 |
| 454 lstrip_re = r'^[ \t]*' |
| 455 block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( |
| 456 lstrip_re, |
| 457 e(environment.block_start_string), |
| 458 no_lstrip_re, |
| 459 e(environment.block_start_string), |
| 460 ) |
| 461 comment_prefix_re = r'%s%s%s|%s\+?' % ( |
| 462 lstrip_re, |
| 463 e(environment.comment_start_string), |
| 464 no_variable_re, |
| 465 e(environment.comment_start_string), |
| 466 ) |
| 467 prefix_re['block'] = block_prefix_re |
| 468 prefix_re['comment'] = comment_prefix_re |
| 469 else: |
| 470 block_prefix_re = '%s' % e(environment.block_start_string) |
| 471 |
| 428 self.newline_sequence = environment.newline_sequence | 472 self.newline_sequence = environment.newline_sequence |
| 473 self.keep_trailing_newline = environment.keep_trailing_newline |
| 429 | 474 |
| 430 # global lexing rules | 475 # global lexing rules |
| 431 self.rules = { | 476 self.rules = { |
| 432 'root': [ | 477 'root': [ |
| 433 # directives | 478 # directives |
| 434 (c('(.*?)(?:%s)' % '|'.join( | 479 (c('(.*?)(?:%s)' % '|'.join( |
| 435 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( | 480 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( |
| 436 e(environment.block_start_string), | 481 e(environment.block_start_string), |
| 437 e(environment.block_start_string), | 482 block_prefix_re, |
| 438 e(environment.block_end_string), | 483 e(environment.block_end_string), |
| 439 e(environment.block_end_string) | 484 e(environment.block_end_string) |
| 440 )] + [ | 485 )] + [ |
| 441 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r) | 486 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) |
| 442 for n, r in root_tag_rules | 487 for n, r in root_tag_rules |
| 443 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), | 488 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), |
| 444 # data | 489 # data |
| 445 (c('.+'), TOKEN_DATA, None) | 490 (c('.+'), TOKEN_DATA, None) |
| 446 ], | 491 ], |
| 447 # comments | 492 # comments |
| 448 TOKEN_COMMENT_BEGIN: [ | 493 TOKEN_COMMENT_BEGIN: [ |
| 449 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( | 494 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( |
| 450 e(environment.comment_end_string), | 495 e(environment.comment_end_string), |
| 451 e(environment.comment_end_string), | 496 e(environment.comment_end_string), |
| (...skipping 13 matching lines...) Expand all Loading... |
| 465 TOKEN_VARIABLE_BEGIN: [ | 510 TOKEN_VARIABLE_BEGIN: [ |
| 466 (c('\-%s\s*|%s' % ( | 511 (c('\-%s\s*|%s' % ( |
| 467 e(environment.variable_end_string), | 512 e(environment.variable_end_string), |
| 468 e(environment.variable_end_string) | 513 e(environment.variable_end_string) |
| 469 )), TOKEN_VARIABLE_END, '#pop') | 514 )), TOKEN_VARIABLE_END, '#pop') |
| 470 ] + tag_rules, | 515 ] + tag_rules, |
| 471 # raw block | 516 # raw block |
| 472 TOKEN_RAW_BEGIN: [ | 517 TOKEN_RAW_BEGIN: [ |
| 473 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( | 518 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( |
| 474 e(environment.block_start_string), | 519 e(environment.block_start_string), |
| 475 e(environment.block_start_string), | 520 block_prefix_re, |
| 476 e(environment.block_end_string), | 521 e(environment.block_end_string), |
| 477 e(environment.block_end_string), | 522 e(environment.block_end_string), |
| 478 block_suffix_re | 523 block_suffix_re |
| 479 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), | 524 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), |
| 480 (c('(.)'), (Failure('Missing end of raw directive'),), None) | 525 (c('(.)'), (Failure('Missing end of raw directive'),), None) |
| 481 ], | 526 ], |
| 482 # line statements | 527 # line statements |
| 483 TOKEN_LINESTATEMENT_BEGIN: [ | 528 TOKEN_LINESTATEMENT_BEGIN: [ |
| 484 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') | 529 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') |
| 485 ] + tag_rules, | 530 ] + tag_rules, |
| 486 # line comments | 531 # line comments |
| 487 TOKEN_LINECOMMENT_BEGIN: [ | 532 TOKEN_LINECOMMENT_BEGIN: [ |
| 488 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, | 533 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, |
| 489 TOKEN_LINECOMMENT_END), '#pop') | 534 TOKEN_LINECOMMENT_END), '#pop') |
| 490 ] | 535 ] |
| 491 } | 536 } |
| 492 | 537 |
| 493 def _normalize_newlines(self, value): | 538 def _normalize_newlines(self, value): |
| 494 """Called for strings and template data to normlize it to unicode.""" | 539 """Called for strings and template data to normalize it to unicode.""" |
| 495 return newline_re.sub(self.newline_sequence, value) | 540 return newline_re.sub(self.newline_sequence, value) |
| 496 | 541 |
| 497 def tokenize(self, source, name=None, filename=None, state=None): | 542 def tokenize(self, source, name=None, filename=None, state=None): |
| 498 """Calls tokeniter + tokenize and wraps it in a token stream. | 543 """Calls tokeniter + tokenize and wraps it in a token stream. |
| 499 """ | 544 """ |
| 500 stream = self.tokeniter(source, name, filename, state) | 545 stream = self.tokeniter(source, name, filename, state) |
| 501 return TokenStream(self.wrap(stream, name, filename), name, filename) | 546 return TokenStream(self.wrap(stream, name, filename), name, filename) |
| 502 | 547 |
| 503 def wrap(self, stream, name=None, filename=None): | 548 def wrap(self, stream, name=None, filename=None): |
| 504 """This is called with the stream as returned by `tokenize` and wraps | 549 """This is called with the stream as returned by `tokenize` and wraps |
| (...skipping 14 matching lines...) Expand all Loading... |
| 519 elif token == 'keyword': | 564 elif token == 'keyword': |
| 520 token = value | 565 token = value |
| 521 elif token == 'name': | 566 elif token == 'name': |
| 522 value = str(value) | 567 value = str(value) |
| 523 elif token == 'string': | 568 elif token == 'string': |
| 524 # try to unescape string | 569 # try to unescape string |
| 525 try: | 570 try: |
| 526 value = self._normalize_newlines(value[1:-1]) \ | 571 value = self._normalize_newlines(value[1:-1]) \ |
| 527 .encode('ascii', 'backslashreplace') \ | 572 .encode('ascii', 'backslashreplace') \ |
| 528 .decode('unicode-escape') | 573 .decode('unicode-escape') |
| 529 except Exception, e: | 574 except Exception as e: |
| 530 msg = str(e).split(':')[-1].strip() | 575 msg = str(e).split(':')[-1].strip() |
| 531 raise TemplateSyntaxError(msg, lineno, name, filename) | 576 raise TemplateSyntaxError(msg, lineno, name, filename) |
| 532 # if we can express it as bytestring (ascii only) | 577 # if we can express it as bytestring (ascii only) |
| 533 # we do that for support of semi broken APIs | 578 # we do that for support of semi broken APIs |
| 534 # as datetime.datetime.strftime. On python 3 this | 579 # as datetime.datetime.strftime. On python 3 this |
| 535 # call becomes a noop thanks to 2to3 | 580 # call becomes a noop thanks to 2to3 |
| 536 try: | 581 try: |
| 537 value = str(value) | 582 value = str(value) |
| 538 except UnicodeError: | 583 except UnicodeError: |
| 539 pass | 584 pass |
| 540 elif token == 'integer': | 585 elif token == 'integer': |
| 541 value = int(value) | 586 value = int(value) |
| 542 elif token == 'float': | 587 elif token == 'float': |
| 543 value = float(value) | 588 value = float(value) |
| 544 elif token == 'operator': | 589 elif token == 'operator': |
| 545 token = operators[value] | 590 token = operators[value] |
| 546 yield Token(lineno, token, value) | 591 yield Token(lineno, token, value) |
| 547 | 592 |
| 548 def tokeniter(self, source, name, filename=None, state=None): | 593 def tokeniter(self, source, name, filename=None, state=None): |
| 549 """This method tokenizes the text and returns the tokens in a | 594 """This method tokenizes the text and returns the tokens in a |
| 550 generator. Use this method if you just want to tokenize a template. | 595 generator. Use this method if you just want to tokenize a template. |
| 551 """ | 596 """ |
| 552 source = '\n'.join(unicode(source).splitlines()) | 597 source = text_type(source) |
| 598 lines = source.splitlines() |
| 599 if self.keep_trailing_newline and source: |
| 600 for newline in ('\r\n', '\r', '\n'): |
| 601 if source.endswith(newline): |
| 602 lines.append('') |
| 603 break |
| 604 source = '\n'.join(lines) |
| 553 pos = 0 | 605 pos = 0 |
| 554 lineno = 1 | 606 lineno = 1 |
| 555 stack = ['root'] | 607 stack = ['root'] |
| 556 if state is not None and state != 'root': | 608 if state is not None and state != 'root': |
| 557 assert state in ('variable', 'block'), 'invalid state' | 609 assert state in ('variable', 'block'), 'invalid state' |
| 558 stack.append(state + '_begin') | 610 stack.append(state + '_begin') |
| 559 else: | 611 else: |
| 560 state = 'root' | 612 state = 'root' |
| 561 statetokens = self.rules[stack[-1]] | 613 statetokens = self.rules[stack[-1]] |
| 562 source_length = len(source) | 614 source_length = len(source) |
| 563 | 615 |
| 564 balancing_stack = [] | 616 balancing_stack = [] |
| 565 | 617 |
| 566 while 1: | 618 while 1: |
| 567 # tokenizer loop | 619 # tokenizer loop |
| 568 for regex, tokens, new_state in statetokens: | 620 for regex, tokens, new_state in statetokens: |
| 569 m = regex.match(source, pos) | 621 m = regex.match(source, pos) |
| 570 # if no match we try again with the next rule | 622 # if no match we try again with the next rule |
| 571 if m is None: | 623 if m is None: |
| 572 continue | 624 continue |
| 573 | 625 |
| 574 # we only match blocks and variables if brances / parentheses | 626 # we only match blocks and variables if braces / parentheses |
| 575 # are balanced. continue parsing with the lower rule which | 627 # are balanced. continue parsing with the lower rule which |
| 576 # is the operator rule. do this only if the end tags look | 628 # is the operator rule. do this only if the end tags look |
| 577 # like operators | 629 # like operators |
| 578 if balancing_stack and \ | 630 if balancing_stack and \ |
| 579 tokens in ('variable_end', 'block_end', | 631 tokens in ('variable_end', 'block_end', |
| 580 'linestatement_end'): | 632 'linestatement_end'): |
| 581 continue | 633 continue |
| 582 | 634 |
| 583 # tuples support more options | 635 # tuples support more options |
| 584 if isinstance(tokens, tuple): | 636 if isinstance(tokens, tuple): |
| 585 for idx, token in enumerate(tokens): | 637 for idx, token in enumerate(tokens): |
| 586 # failure group | 638 # failure group |
| 587 if token.__class__ is Failure: | 639 if token.__class__ is Failure: |
| 588 raise token(lineno, filename) | 640 raise token(lineno, filename) |
| 589 # bygroup is a bit more complex, in that case we | 641 # bygroup is a bit more complex, in that case we |
| 590 # yield for the current token the first named | 642 # yield for the current token the first named |
| 591 # group that matched | 643 # group that matched |
| 592 elif token == '#bygroup': | 644 elif token == '#bygroup': |
| 593 for key, value in m.groupdict().iteritems(): | 645 for key, value in iteritems(m.groupdict()): |
| 594 if value is not None: | 646 if value is not None: |
| 595 yield lineno, key, value | 647 yield lineno, key, value |
| 596 lineno += value.count('\n') | 648 lineno += value.count('\n') |
| 597 break | 649 break |
| 598 else: | 650 else: |
| 599 raise RuntimeError('%r wanted to resolve ' | 651 raise RuntimeError('%r wanted to resolve ' |
| 600 'the token dynamically' | 652 'the token dynamically' |
| 601 ' but no group matched' | 653 ' but no group matched' |
| 602 % regex) | 654 % regex) |
| 603 # normal group | 655 # normal group |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 640 # in an infinite loop | 692 # in an infinite loop |
| 641 pos2 = m.end() | 693 pos2 = m.end() |
| 642 | 694 |
| 643 # handle state changes | 695 # handle state changes |
| 644 if new_state is not None: | 696 if new_state is not None: |
| 645 # remove the uppermost state | 697 # remove the uppermost state |
| 646 if new_state == '#pop': | 698 if new_state == '#pop': |
| 647 stack.pop() | 699 stack.pop() |
| 648 # resolve the new state by group checking | 700 # resolve the new state by group checking |
| 649 elif new_state == '#bygroup': | 701 elif new_state == '#bygroup': |
| 650 for key, value in m.groupdict().iteritems(): | 702 for key, value in iteritems(m.groupdict()): |
| 651 if value is not None: | 703 if value is not None: |
| 652 stack.append(key) | 704 stack.append(key) |
| 653 break | 705 break |
| 654 else: | 706 else: |
| 655 raise RuntimeError('%r wanted to resolve the ' | 707 raise RuntimeError('%r wanted to resolve the ' |
| 656 'new state dynamically but' | 708 'new state dynamically but' |
| 657 ' no group matched' % | 709 ' no group matched' % |
| 658 regex) | 710 regex) |
| 659 # direct state name given | 711 # direct state name given |
| 660 else: | 712 else: |
| 661 stack.append(new_state) | 713 stack.append(new_state) |
| 662 statetokens = self.rules[stack[-1]] | 714 statetokens = self.rules[stack[-1]] |
| 663 # we are still at the same position and no stack change. | 715 # we are still at the same position and no stack change. |
| 664 # this means a loop without break condition, avoid that and | 716 # this means a loop without break condition, avoid that and |
| 665 # raise error | 717 # raise error |
| 666 elif pos2 == pos: | 718 elif pos2 == pos: |
| 667 raise RuntimeError('%r yielded empty string without ' | 719 raise RuntimeError('%r yielded empty string without ' |
| 668 'stack change' % regex) | 720 'stack change' % regex) |
| 669 # publish new function and start again | 721 # publish new function and start again |
| 670 pos = pos2 | 722 pos = pos2 |
| 671 break | 723 break |
| 672 # if loop terminated without break we havn't found a single match | 724 # if loop terminated without break we haven't found a single match |
| 673 # either we are at the end of the file or we have a problem | 725 # either we are at the end of the file or we have a problem |
| 674 else: | 726 else: |
| 675 # end of text | 727 # end of text |
| 676 if pos >= source_length: | 728 if pos >= source_length: |
| 677 return | 729 return |
| 678 # something went wrong | 730 # something went wrong |
| 679 raise TemplateSyntaxError('unexpected char %r at %d' % | 731 raise TemplateSyntaxError('unexpected char %r at %d' % |
| 680 (source[pos], pos), lineno, | 732 (source[pos], pos), lineno, |
| 681 name, filename) | 733 name, filename) |
| OLD | NEW |