Index: third_party/jinja2/lexer.py |
diff --git a/third_party/jinja2/lexer.py b/third_party/jinja2/lexer.py |
index 0d3f69617159938700a52957ea339dde5378fb7a..a50128507bb98ac6bc57a76afe8a0776a2df2c49 100644 |
--- a/third_party/jinja2/lexer.py |
+++ b/third_party/jinja2/lexer.py |
@@ -15,10 +15,13 @@ |
:license: BSD, see LICENSE for more details. |
""" |
import re |
+ |
from operator import itemgetter |
from collections import deque |
from jinja2.exceptions import TemplateSyntaxError |
-from jinja2.utils import LRUCache, next |
+from jinja2.utils import LRUCache |
+from jinja2._compat import next, iteritems, implements_iterator, text_type, \ |
+ intern |
# cache for the lexers. Exists in order to be able to have multiple |
@@ -126,7 +129,7 @@ operators = { |
';': TOKEN_SEMICOLON |
} |
-reverse_operators = dict([(v, k) for k, v in operators.iteritems()]) |
+reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) |
assert len(operators) == len(reverse_operators), 'operators dropped' |
operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in |
sorted(operators, key=lambda x: -len(x)))) |
@@ -197,7 +200,7 @@ def compile_rules(environment): |
if environment.line_statement_prefix is not None: |
rules.append((len(environment.line_statement_prefix), 'linestatement', |
- r'^\s*' + e(environment.line_statement_prefix))) |
+ r'^[ \t\v]*' + e(environment.line_statement_prefix))) |
if environment.line_comment_prefix is not None: |
rules.append((len(environment.line_comment_prefix), 'linecomment', |
r'(?:^|(?<=\S))[^\S\r\n]*' + |
@@ -262,6 +265,7 @@ class Token(tuple): |
) |
+@implements_iterator |
class TokenStreamIterator(object): |
"""The iterator for tokenstreams. Iterate over the stream |
until the eof token is reached. |
@@ -273,7 +277,7 @@ class TokenStreamIterator(object): |
def __iter__(self): |
return self |
- def next(self): |
+ def __next__(self): |
token = self.stream.current |
if token.type is TOKEN_EOF: |
self.stream.close() |
@@ -282,6 +286,7 @@ class TokenStreamIterator(object): |
return token |
+@implements_iterator |
class TokenStream(object): |
"""A token stream is an iterable that yields :class:`Token`\s. The |
parser however does not iterate over it but calls :meth:`next` to go |
@@ -289,7 +294,7 @@ class TokenStream(object): |
""" |
def __init__(self, generator, name, filename): |
- self._next = iter(generator).next |
+ self._iter = iter(generator) |
self._pushed = deque() |
self.name = name |
self.filename = filename |
@@ -300,8 +305,9 @@ class TokenStream(object): |
def __iter__(self): |
return TokenStreamIterator(self) |
- def __nonzero__(self): |
+ def __bool__(self): |
return bool(self._pushed) or self.current.type is not TOKEN_EOF |
+ __nonzero__ = __bool__ # py2 |
eos = property(lambda x: not x, doc="Are we at the end of the stream?") |
@@ -319,7 +325,7 @@ class TokenStream(object): |
def skip(self, n=1): |
"""Got n tokens ahead.""" |
- for x in xrange(n): |
+ for x in range(n): |
next(self) |
def next_if(self, expr): |
@@ -333,14 +339,14 @@ class TokenStream(object): |
"""Like :meth:`next_if` but only returns `True` or `False`.""" |
return self.next_if(expr) is not None |
- def next(self): |
+ def __next__(self): |
"""Go one token ahead and return the old one""" |
rv = self.current |
if self._pushed: |
self.current = self._pushed.popleft() |
elif self.current.type is not TOKEN_EOF: |
try: |
- self.current = self._next() |
+ self.current = next(self._iter) |
except StopIteration: |
self.close() |
return rv |
@@ -348,7 +354,7 @@ class TokenStream(object): |
def close(self): |
"""Close the stream.""" |
self.current = Token(self.current.lineno, TOKEN_EOF, '') |
- self._next = None |
+ self._iter = None |
self.closed = True |
def expect(self, expr): |
@@ -383,7 +389,9 @@ def get_lexer(environment): |
environment.line_statement_prefix, |
environment.line_comment_prefix, |
environment.trim_blocks, |
- environment.newline_sequence) |
+ environment.lstrip_blocks, |
+ environment.newline_sequence, |
+ environment.keep_trailing_newline) |
lexer = _lexer_cache.get(key) |
if lexer is None: |
lexer = Lexer(environment) |
@@ -414,7 +422,7 @@ class Lexer(object): |
(operator_re, TOKEN_OPERATOR, None) |
] |
- # assamble the root lexing rule. because "|" is ungreedy |
+ # assemble the root lexing rule. because "|" is ungreedy |
# we have to sort by length so that the lexer continues working |
# as expected when we have parsing rules like <% for block and |
# <%= for variables. (if someone wants asp like syntax) |
@@ -425,7 +433,44 @@ class Lexer(object): |
# block suffix if trimming is enabled |
block_suffix_re = environment.trim_blocks and '\\n?' or '' |
+ # strip leading spaces if lstrip_blocks is enabled |
+ prefix_re = {} |
+ if environment.lstrip_blocks: |
+ # use '{%+' to manually disable lstrip_blocks behavior |
+ no_lstrip_re = e('+') |
+ # detect overlap between block and variable or comment strings |
+ block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) |
+ # make sure we don't mistake a block for a variable or a comment |
+ m = block_diff.match(environment.comment_start_string) |
+ no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
+ m = block_diff.match(environment.variable_start_string) |
+ no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' |
+ |
+ # detect overlap between comment and variable strings |
+ comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) |
+ m = comment_diff.match(environment.variable_start_string) |
+ no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' |
+ |
+ lstrip_re = r'^[ \t]*' |
+ block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( |
+ lstrip_re, |
+ e(environment.block_start_string), |
+ no_lstrip_re, |
+ e(environment.block_start_string), |
+ ) |
+ comment_prefix_re = r'%s%s%s|%s\+?' % ( |
+ lstrip_re, |
+ e(environment.comment_start_string), |
+ no_variable_re, |
+ e(environment.comment_start_string), |
+ ) |
+ prefix_re['block'] = block_prefix_re |
+ prefix_re['comment'] = comment_prefix_re |
+ else: |
+ block_prefix_re = '%s' % e(environment.block_start_string) |
+ |
self.newline_sequence = environment.newline_sequence |
+ self.keep_trailing_newline = environment.keep_trailing_newline |
# global lexing rules |
self.rules = { |
@@ -434,11 +479,11 @@ class Lexer(object): |
(c('(.*?)(?:%s)' % '|'.join( |
[r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( |
e(environment.block_start_string), |
- e(environment.block_start_string), |
+ block_prefix_re, |
e(environment.block_end_string), |
e(environment.block_end_string) |
)] + [ |
- r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r) |
+ r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) |
for n, r in root_tag_rules |
])), (TOKEN_DATA, '#bygroup'), '#bygroup'), |
# data |
@@ -472,7 +517,7 @@ class Lexer(object): |
TOKEN_RAW_BEGIN: [ |
(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( |
e(environment.block_start_string), |
- e(environment.block_start_string), |
+ block_prefix_re, |
e(environment.block_end_string), |
e(environment.block_end_string), |
block_suffix_re |
@@ -491,7 +536,7 @@ class Lexer(object): |
} |
def _normalize_newlines(self, value): |
- """Called for strings and template data to normlize it to unicode.""" |
+ """Called for strings and template data to normalize it to unicode.""" |
return newline_re.sub(self.newline_sequence, value) |
def tokenize(self, source, name=None, filename=None, state=None): |
@@ -526,7 +571,7 @@ class Lexer(object): |
value = self._normalize_newlines(value[1:-1]) \ |
.encode('ascii', 'backslashreplace') \ |
.decode('unicode-escape') |
- except Exception, e: |
+ except Exception as e: |
msg = str(e).split(':')[-1].strip() |
raise TemplateSyntaxError(msg, lineno, name, filename) |
# if we can express it as bytestring (ascii only) |
@@ -549,7 +594,14 @@ class Lexer(object): |
"""This method tokenizes the text and returns the tokens in a |
generator. Use this method if you just want to tokenize a template. |
""" |
- source = '\n'.join(unicode(source).splitlines()) |
+ source = text_type(source) |
+ lines = source.splitlines() |
+ if self.keep_trailing_newline and source: |
+ for newline in ('\r\n', '\r', '\n'): |
+ if source.endswith(newline): |
+ lines.append('') |
+ break |
+ source = '\n'.join(lines) |
pos = 0 |
lineno = 1 |
stack = ['root'] |
@@ -571,7 +623,7 @@ class Lexer(object): |
if m is None: |
continue |
- # we only match blocks and variables if brances / parentheses |
+ # we only match blocks and variables if braces / parentheses |
# are balanced. continue parsing with the lower rule which |
# is the operator rule. do this only if the end tags look |
# like operators |
@@ -590,7 +642,7 @@ class Lexer(object): |
# yield for the current token the first named |
# group that matched |
elif token == '#bygroup': |
- for key, value in m.groupdict().iteritems(): |
+ for key, value in iteritems(m.groupdict()): |
if value is not None: |
yield lineno, key, value |
lineno += value.count('\n') |
@@ -647,7 +699,7 @@ class Lexer(object): |
stack.pop() |
# resolve the new state by group checking |
elif new_state == '#bygroup': |
- for key, value in m.groupdict().iteritems(): |
+ for key, value in iteritems(m.groupdict()): |
if value is not None: |
stack.append(key) |
break |
@@ -669,7 +721,7 @@ class Lexer(object): |
# publish new function and start again |
pos = pos2 |
break |
- # if loop terminated without break we havn't found a single match |
+ # if loop terminated without break we haven't found a single match |
# either we are at the end of the file or we have a problem |
else: |
# end of text |