OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """ | 2 """ |
3 jinja2.lexer | 3 jinja2.lexer |
4 ~~~~~~~~~~~~ | 4 ~~~~~~~~~~~~ |
5 | 5 |
6 This module implements a Jinja / Python combination lexer. The | 6 This module implements a Jinja / Python combination lexer. The |
7 `Lexer` class provided by this module is used to do some preprocessing | 7 `Lexer` class provided by this module is used to do some preprocessing |
8 for Jinja. | 8 for Jinja. |
9 | 9 |
10 On the one hand it filters out invalid operators like the bitshift | 10 On the one hand it filters out invalid operators like the bitshift |
11 operators we don't allow in templates. On the other hand it separates | 11 operators we don't allow in templates. On the other hand it separates |
12 template code and python code in expressions. | 12 template code and python code in expressions. |
13 | 13 |
14 :copyright: (c) 2010 by the Jinja Team. | 14 :copyright: (c) 2010 by the Jinja Team. |
15 :license: BSD, see LICENSE for more details. | 15 :license: BSD, see LICENSE for more details. |
16 """ | 16 """ |
17 import re | 17 import re |
18 | 18 |
19 from operator import itemgetter | 19 from operator import itemgetter |
20 from collections import deque | 20 from collections import deque |
21 from jinja2.exceptions import TemplateSyntaxError | 21 from jinja2.exceptions import TemplateSyntaxError |
22 from jinja2.utils import LRUCache | 22 from jinja2.utils import LRUCache |
23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \ | 23 from jinja2._compat import iteritems, implements_iterator, text_type, \ |
24 intern | 24 intern, PY2 |
25 | 25 |
26 | 26 |
27 # cache for the lexers. Exists in order to be able to have multiple | 27 # cache for the lexers. Exists in order to be able to have multiple |
28 # environments with the same lexer | 28 # environments with the same lexer |
29 _lexer_cache = LRUCache(50) | 29 _lexer_cache = LRUCache(50) |
30 | 30 |
31 # static regular expressions | 31 # static regular expressions |
32 whitespace_re = re.compile(r'\s+', re.U) | 32 whitespace_re = re.compile(r'\s+', re.U) |
33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" | 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" |
34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) | 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
129 ';': TOKEN_SEMICOLON | 129 ';': TOKEN_SEMICOLON |
130 } | 130 } |
131 | 131 |
132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) | 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) |
133 assert len(operators) == len(reverse_operators), 'operators dropped' | 133 assert len(operators) == len(reverse_operators), 'operators dropped' |
134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in | 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in |
135 sorted(operators, key=lambda x: -len(x)))) | 135 sorted(operators, key=lambda x: -len(x)))) |
136 | 136 |
137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, | 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, |
138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, | 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, |
139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, | 139 TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, |
140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) | 140 TOKEN_LINECOMMENT]) |
141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, | 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, |
142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) | 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) |
143 | 143 |
144 | 144 |
145 def _describe_token_type(token_type): | 145 def _describe_token_type(token_type): |
146 if token_type in reverse_operators: | 146 if token_type in reverse_operators: |
147 return reverse_operators[token_type] | 147 return reverse_operators[token_type] |
148 return { | 148 return { |
149 TOKEN_COMMENT_BEGIN: 'begin of comment', | 149 TOKEN_COMMENT_BEGIN: 'begin of comment', |
150 TOKEN_COMMENT_END: 'end of comment', | 150 TOKEN_COMMENT_END: 'end of comment', |
(...skipping 420 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
571 value = self._normalize_newlines(value[1:-1]) \ | 571 value = self._normalize_newlines(value[1:-1]) \ |
572 .encode('ascii', 'backslashreplace') \ | 572 .encode('ascii', 'backslashreplace') \ |
573 .decode('unicode-escape') | 573 .decode('unicode-escape') |
574 except Exception as e: | 574 except Exception as e: |
575 msg = str(e).split(':')[-1].strip() | 575 msg = str(e).split(':')[-1].strip() |
576 raise TemplateSyntaxError(msg, lineno, name, filename) | 576 raise TemplateSyntaxError(msg, lineno, name, filename) |
577 # if we can express it as bytestring (ascii only) | 577 # if we can express it as bytestring (ascii only) |
578 # we do that for support of semi broken APIs | 578 # we do that for support of semi broken APIs |
579 # as datetime.datetime.strftime. On python 3 this | 579 # as datetime.datetime.strftime. On python 3 this |
580 # call becomes a noop thanks to 2to3 | 580 # call becomes a noop thanks to 2to3 |
581 try: | 581 if PY2: |
582 value = str(value) | 582 try: |
583 except UnicodeError: | 583 value = value.encode('ascii') |
584 pass | 584 except UnicodeError: |
| 585 pass |
585 elif token == 'integer': | 586 elif token == 'integer': |
586 value = int(value) | 587 value = int(value) |
587 elif token == 'float': | 588 elif token == 'float': |
588 value = float(value) | 589 value = float(value) |
589 elif token == 'operator': | 590 elif token == 'operator': |
590 token = operators[value] | 591 token = operators[value] |
591 yield Token(lineno, token, value) | 592 yield Token(lineno, token, value) |
592 | 593 |
593 def tokeniter(self, source, name, filename=None, state=None): | 594 def tokeniter(self, source, name, filename=None, state=None): |
594 """This method tokenizes the text and returns the tokens in a | 595 """This method tokenizes the text and returns the tokens in a |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
724 # if loop terminated without break we haven't found a single match | 725 # if loop terminated without break we haven't found a single match |
725 # either we are at the end of the file or we have a problem | 726 # either we are at the end of the file or we have a problem |
726 else: | 727 else: |
727 # end of text | 728 # end of text |
728 if pos >= source_length: | 729 if pos >= source_length: |
729 return | 730 return |
730 # something went wrong | 731 # something went wrong |
731 raise TemplateSyntaxError('unexpected char %r at %d' % | 732 raise TemplateSyntaxError('unexpected char %r at %d' % |
732 (source[pos], pos), lineno, | 733 (source[pos], pos), lineno, |
733 name, filename) | 734 name, filename) |
OLD | NEW |