| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 """ | 2 """ |
| 3 jinja2.lexer | 3 jinja2.lexer |
| 4 ~~~~~~~~~~~~ | 4 ~~~~~~~~~~~~ |
| 5 | 5 |
| 6 This module implements a Jinja / Python combination lexer. The | 6 This module implements a Jinja / Python combination lexer. The |
| 7 `Lexer` class provided by this module is used to do some preprocessing | 7 `Lexer` class provided by this module is used to do some preprocessing |
| 8 for Jinja. | 8 for Jinja. |
| 9 | 9 |
| 10 On the one hand it filters out invalid operators like the bitshift | 10 On the one hand it filters out invalid operators like the bitshift |
| 11 operators we don't allow in templates. On the other hand it separates | 11 operators we don't allow in templates. On the other hand it separates |
| 12 template code and python code in expressions. | 12 template code and python code in expressions. |
| 13 | 13 |
| 14 :copyright: (c) 2010 by the Jinja Team. | 14 :copyright: (c) 2010 by the Jinja Team. |
| 15 :license: BSD, see LICENSE for more details. | 15 :license: BSD, see LICENSE for more details. |
| 16 """ | 16 """ |
| 17 import re | 17 import re |
| 18 | 18 |
| 19 from operator import itemgetter | 19 from operator import itemgetter |
| 20 from collections import deque | 20 from collections import deque |
| 21 from jinja2.exceptions import TemplateSyntaxError | 21 from jinja2.exceptions import TemplateSyntaxError |
| 22 from jinja2.utils import LRUCache | 22 from jinja2.utils import LRUCache |
| 23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \ | 23 from jinja2._compat import iteritems, implements_iterator, text_type, \ |
| 24 intern | 24 intern, PY2 |
| 25 | 25 |
| 26 | 26 |
| 27 # cache for the lexers. Exists in order to be able to have multiple | 27 # cache for the lexers. Exists in order to be able to have multiple |
| 28 # environments with the same lexer | 28 # environments with the same lexer |
| 29 _lexer_cache = LRUCache(50) | 29 _lexer_cache = LRUCache(50) |
| 30 | 30 |
| 31 # static regular expressions | 31 # static regular expressions |
| 32 whitespace_re = re.compile(r'\s+', re.U) | 32 whitespace_re = re.compile(r'\s+', re.U) |
| 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" | 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" |
| 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) | 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 ';': TOKEN_SEMICOLON | 129 ';': TOKEN_SEMICOLON |
| 130 } | 130 } |
| 131 | 131 |
| 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) | 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) |
| 133 assert len(operators) == len(reverse_operators), 'operators dropped' | 133 assert len(operators) == len(reverse_operators), 'operators dropped' |
| 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in | 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in |
| 135 sorted(operators, key=lambda x: -len(x)))) | 135 sorted(operators, key=lambda x: -len(x)))) |
| 136 | 136 |
| 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, | 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, |
| 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, | 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, |
| 139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, | 139 TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, |
| 140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) | 140 TOKEN_LINECOMMENT]) |
| 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, | 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, |
| 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) | 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) |
| 143 | 143 |
| 144 | 144 |
| 145 def _describe_token_type(token_type): | 145 def _describe_token_type(token_type): |
| 146 if token_type in reverse_operators: | 146 if token_type in reverse_operators: |
| 147 return reverse_operators[token_type] | 147 return reverse_operators[token_type] |
| 148 return { | 148 return { |
| 149 TOKEN_COMMENT_BEGIN: 'begin of comment', | 149 TOKEN_COMMENT_BEGIN: 'begin of comment', |
| 150 TOKEN_COMMENT_END: 'end of comment', | 150 TOKEN_COMMENT_END: 'end of comment', |
| (...skipping 420 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 571 value = self._normalize_newlines(value[1:-1]) \ | 571 value = self._normalize_newlines(value[1:-1]) \ |
| 572 .encode('ascii', 'backslashreplace') \ | 572 .encode('ascii', 'backslashreplace') \ |
| 573 .decode('unicode-escape') | 573 .decode('unicode-escape') |
| 574 except Exception as e: | 574 except Exception as e: |
| 575 msg = str(e).split(':')[-1].strip() | 575 msg = str(e).split(':')[-1].strip() |
| 576 raise TemplateSyntaxError(msg, lineno, name, filename) | 576 raise TemplateSyntaxError(msg, lineno, name, filename) |
| 577 # if we can express it as bytestring (ascii only) | 577 # if we can express it as bytestring (ascii only) |
| 578 # we do that for support of semi broken APIs | 578 # we do that for support of semi broken APIs |
| 579 # as datetime.datetime.strftime. On python 3 this | 579 # as datetime.datetime.strftime. On python 3 this |
| 580 # call becomes a noop thanks to 2to3 | 580 # call becomes a noop thanks to 2to3 |
| 581 try: | 581 if PY2: |
| 582 value = str(value) | 582 try: |
| 583 except UnicodeError: | 583 value = value.encode('ascii') |
| 584 pass | 584 except UnicodeError: |
| 585 pass |
| 585 elif token == 'integer': | 586 elif token == 'integer': |
| 586 value = int(value) | 587 value = int(value) |
| 587 elif token == 'float': | 588 elif token == 'float': |
| 588 value = float(value) | 589 value = float(value) |
| 589 elif token == 'operator': | 590 elif token == 'operator': |
| 590 token = operators[value] | 591 token = operators[value] |
| 591 yield Token(lineno, token, value) | 592 yield Token(lineno, token, value) |
| 592 | 593 |
| 593 def tokeniter(self, source, name, filename=None, state=None): | 594 def tokeniter(self, source, name, filename=None, state=None): |
| 594 """This method tokenizes the text and returns the tokens in a | 595 """This method tokenizes the text and returns the tokens in a |
| (...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 724 # if loop terminated without break we haven't found a single match | 725 # if loop terminated without break we haven't found a single match |
| 725 # either we are at the end of the file or we have a problem | 726 # either we are at the end of the file or we have a problem |
| 726 else: | 727 else: |
| 727 # end of text | 728 # end of text |
| 728 if pos >= source_length: | 729 if pos >= source_length: |
| 729 return | 730 return |
| 730 # something went wrong | 731 # something went wrong |
| 731 raise TemplateSyntaxError('unexpected char %r at %d' % | 732 raise TemplateSyntaxError('unexpected char %r at %d' % |
| 732 (source[pos], pos), lineno, | 733 (source[pos], pos), lineno, |
| 733 name, filename) | 734 name, filename) |
| OLD | NEW |