Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(760)

Side by Side Diff: third_party/jinja2/lexer.py

Issue 23506004: Update Jinja2 (Python template library) to 2.7.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/jinja2/get_jinja2.sh ('k') | third_party/jinja2/loaders.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 """ 2 """
3 jinja2.lexer 3 jinja2.lexer
4 ~~~~~~~~~~~~ 4 ~~~~~~~~~~~~
5 5
6 This module implements a Jinja / Python combination lexer. The 6 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing 7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja. 8 for Jinja.
9 9
10 On the one hand it filters out invalid operators like the bitshift 10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates 11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions. 12 template code and python code in expressions.
13 13
14 :copyright: (c) 2010 by the Jinja Team. 14 :copyright: (c) 2010 by the Jinja Team.
15 :license: BSD, see LICENSE for more details. 15 :license: BSD, see LICENSE for more details.
16 """ 16 """
17 import re 17 import re
18
18 from operator import itemgetter 19 from operator import itemgetter
19 from collections import deque 20 from collections import deque
20 from jinja2.exceptions import TemplateSyntaxError 21 from jinja2.exceptions import TemplateSyntaxError
21 from jinja2.utils import LRUCache, next 22 from jinja2.utils import LRUCache
23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \
24 intern
22 25
23 26
24 # cache for the lexers. Exists in order to be able to have multiple 27 # cache for the lexers. Exists in order to be able to have multiple
25 # environments with the same lexer 28 # environments with the same lexer
26 _lexer_cache = LRUCache(50) 29 _lexer_cache = LRUCache(50)
27 30
28 # static regular expressions 31 # static regular expressions
29 whitespace_re = re.compile(r'\s+', re.U) 32 whitespace_re = re.compile(r'\s+', re.U)
30 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
31 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 '<': TOKEN_LT, 122 '<': TOKEN_LT,
120 '<=': TOKEN_LTEQ, 123 '<=': TOKEN_LTEQ,
121 '=': TOKEN_ASSIGN, 124 '=': TOKEN_ASSIGN,
122 '.': TOKEN_DOT, 125 '.': TOKEN_DOT,
123 ':': TOKEN_COLON, 126 ':': TOKEN_COLON,
124 '|': TOKEN_PIPE, 127 '|': TOKEN_PIPE,
125 ',': TOKEN_COMMA, 128 ',': TOKEN_COMMA,
126 ';': TOKEN_SEMICOLON 129 ';': TOKEN_SEMICOLON
127 } 130 }
128 131
129 reverse_operators = dict([(v, k) for k, v in operators.iteritems()]) 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
130 assert len(operators) == len(reverse_operators), 'operators dropped' 133 assert len(operators) == len(reverse_operators), 'operators dropped'
131 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
132 sorted(operators, key=lambda x: -len(x)))) 135 sorted(operators, key=lambda x: -len(x))))
133 136
134 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
135 TOKEN_COMMENT_END, TOKEN_WHITESPACE, 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE,
136 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, 139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
137 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) 140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
138 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
139 TOKEN_COMMENT, TOKEN_LINECOMMENT]) 142 TOKEN_COMMENT, TOKEN_LINECOMMENT])
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 (len(environment.comment_start_string), 'comment', 193 (len(environment.comment_start_string), 'comment',
191 e(environment.comment_start_string)), 194 e(environment.comment_start_string)),
192 (len(environment.block_start_string), 'block', 195 (len(environment.block_start_string), 'block',
193 e(environment.block_start_string)), 196 e(environment.block_start_string)),
194 (len(environment.variable_start_string), 'variable', 197 (len(environment.variable_start_string), 'variable',
195 e(environment.variable_start_string)) 198 e(environment.variable_start_string))
196 ] 199 ]
197 200
198 if environment.line_statement_prefix is not None: 201 if environment.line_statement_prefix is not None:
199 rules.append((len(environment.line_statement_prefix), 'linestatement', 202 rules.append((len(environment.line_statement_prefix), 'linestatement',
200 r'^\s*' + e(environment.line_statement_prefix))) 203 r'^[ \t\v]*' + e(environment.line_statement_prefix)))
201 if environment.line_comment_prefix is not None: 204 if environment.line_comment_prefix is not None:
202 rules.append((len(environment.line_comment_prefix), 'linecomment', 205 rules.append((len(environment.line_comment_prefix), 'linecomment',
203 r'(?:^|(?<=\S))[^\S\r\n]*' + 206 r'(?:^|(?<=\S))[^\S\r\n]*' +
204 e(environment.line_comment_prefix))) 207 e(environment.line_comment_prefix)))
205 208
206 return [x[1:] for x in sorted(rules, reverse=True)] 209 return [x[1:] for x in sorted(rules, reverse=True)]
207 210
208 211
209 class Failure(object): 212 class Failure(object):
210 """Class that raises a `TemplateSyntaxError` if called. 213 """Class that raises a `TemplateSyntaxError` if called.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
255 return False 258 return False
256 259
257 def __repr__(self): 260 def __repr__(self):
258 return 'Token(%r, %r, %r)' % ( 261 return 'Token(%r, %r, %r)' % (
259 self.lineno, 262 self.lineno,
260 self.type, 263 self.type,
261 self.value 264 self.value
262 ) 265 )
263 266
264 267
268 @implements_iterator
265 class TokenStreamIterator(object): 269 class TokenStreamIterator(object):
266 """The iterator for tokenstreams. Iterate over the stream 270 """The iterator for tokenstreams. Iterate over the stream
267 until the eof token is reached. 271 until the eof token is reached.
268 """ 272 """
269 273
270 def __init__(self, stream): 274 def __init__(self, stream):
271 self.stream = stream 275 self.stream = stream
272 276
273 def __iter__(self): 277 def __iter__(self):
274 return self 278 return self
275 279
276 def next(self): 280 def __next__(self):
277 token = self.stream.current 281 token = self.stream.current
278 if token.type is TOKEN_EOF: 282 if token.type is TOKEN_EOF:
279 self.stream.close() 283 self.stream.close()
280 raise StopIteration() 284 raise StopIteration()
281 next(self.stream) 285 next(self.stream)
282 return token 286 return token
283 287
284 288
289 @implements_iterator
285 class TokenStream(object): 290 class TokenStream(object):
286 """A token stream is an iterable that yields :class:`Token`\s. The 291 """A token stream is an iterable that yields :class:`Token`\s. The
287 parser however does not iterate over it but calls :meth:`next` to go 292 parser however does not iterate over it but calls :meth:`next` to go
288 one token ahead. The current active token is stored as :attr:`current`. 293 one token ahead. The current active token is stored as :attr:`current`.
289 """ 294 """
290 295
291 def __init__(self, generator, name, filename): 296 def __init__(self, generator, name, filename):
292 self._next = iter(generator).next 297 self._iter = iter(generator)
293 self._pushed = deque() 298 self._pushed = deque()
294 self.name = name 299 self.name = name
295 self.filename = filename 300 self.filename = filename
296 self.closed = False 301 self.closed = False
297 self.current = Token(1, TOKEN_INITIAL, '') 302 self.current = Token(1, TOKEN_INITIAL, '')
298 next(self) 303 next(self)
299 304
300 def __iter__(self): 305 def __iter__(self):
301 return TokenStreamIterator(self) 306 return TokenStreamIterator(self)
302 307
303 def __nonzero__(self): 308 def __bool__(self):
304 return bool(self._pushed) or self.current.type is not TOKEN_EOF 309 return bool(self._pushed) or self.current.type is not TOKEN_EOF
310 __nonzero__ = __bool__ # py2
305 311
306 eos = property(lambda x: not x, doc="Are we at the end of the stream?") 312 eos = property(lambda x: not x, doc="Are we at the end of the stream?")
307 313
308 def push(self, token): 314 def push(self, token):
309 """Push a token back to the stream.""" 315 """Push a token back to the stream."""
310 self._pushed.append(token) 316 self._pushed.append(token)
311 317
312 def look(self): 318 def look(self):
313 """Look at the next token.""" 319 """Look at the next token."""
314 old_token = next(self) 320 old_token = next(self)
315 result = self.current 321 result = self.current
316 self.push(result) 322 self.push(result)
317 self.current = old_token 323 self.current = old_token
318 return result 324 return result
319 325
320 def skip(self, n=1): 326 def skip(self, n=1):
321 """Got n tokens ahead.""" 327 """Got n tokens ahead."""
322 for x in xrange(n): 328 for x in range(n):
323 next(self) 329 next(self)
324 330
325 def next_if(self, expr): 331 def next_if(self, expr):
326 """Perform the token test and return the token if it matched. 332 """Perform the token test and return the token if it matched.
327 Otherwise the return value is `None`. 333 Otherwise the return value is `None`.
328 """ 334 """
329 if self.current.test(expr): 335 if self.current.test(expr):
330 return next(self) 336 return next(self)
331 337
332 def skip_if(self, expr): 338 def skip_if(self, expr):
333 """Like :meth:`next_if` but only returns `True` or `False`.""" 339 """Like :meth:`next_if` but only returns `True` or `False`."""
334 return self.next_if(expr) is not None 340 return self.next_if(expr) is not None
335 341
336 def next(self): 342 def __next__(self):
337 """Go one token ahead and return the old one""" 343 """Go one token ahead and return the old one"""
338 rv = self.current 344 rv = self.current
339 if self._pushed: 345 if self._pushed:
340 self.current = self._pushed.popleft() 346 self.current = self._pushed.popleft()
341 elif self.current.type is not TOKEN_EOF: 347 elif self.current.type is not TOKEN_EOF:
342 try: 348 try:
343 self.current = self._next() 349 self.current = next(self._iter)
344 except StopIteration: 350 except StopIteration:
345 self.close() 351 self.close()
346 return rv 352 return rv
347 353
348 def close(self): 354 def close(self):
349 """Close the stream.""" 355 """Close the stream."""
350 self.current = Token(self.current.lineno, TOKEN_EOF, '') 356 self.current = Token(self.current.lineno, TOKEN_EOF, '')
351 self._next = None 357 self._iter = None
352 self.closed = True 358 self.closed = True
353 359
354 def expect(self, expr): 360 def expect(self, expr):
355 """Expect a given token type and return it. This accepts the same 361 """Expect a given token type and return it. This accepts the same
356 argument as :meth:`jinja2.lexer.Token.test`. 362 argument as :meth:`jinja2.lexer.Token.test`.
357 """ 363 """
358 if not self.current.test(expr): 364 if not self.current.test(expr):
359 expr = describe_token_expr(expr) 365 expr = describe_token_expr(expr)
360 if self.current.type is TOKEN_EOF: 366 if self.current.type is TOKEN_EOF:
361 raise TemplateSyntaxError('unexpected end of template, ' 367 raise TemplateSyntaxError('unexpected end of template, '
(...skipping 14 matching lines...) Expand all
376 """Return a lexer which is probably cached.""" 382 """Return a lexer which is probably cached."""
377 key = (environment.block_start_string, 383 key = (environment.block_start_string,
378 environment.block_end_string, 384 environment.block_end_string,
379 environment.variable_start_string, 385 environment.variable_start_string,
380 environment.variable_end_string, 386 environment.variable_end_string,
381 environment.comment_start_string, 387 environment.comment_start_string,
382 environment.comment_end_string, 388 environment.comment_end_string,
383 environment.line_statement_prefix, 389 environment.line_statement_prefix,
384 environment.line_comment_prefix, 390 environment.line_comment_prefix,
385 environment.trim_blocks, 391 environment.trim_blocks,
386 environment.newline_sequence) 392 environment.lstrip_blocks,
393 environment.newline_sequence,
394 environment.keep_trailing_newline)
387 lexer = _lexer_cache.get(key) 395 lexer = _lexer_cache.get(key)
388 if lexer is None: 396 if lexer is None:
389 lexer = Lexer(environment) 397 lexer = Lexer(environment)
390 _lexer_cache[key] = lexer 398 _lexer_cache[key] = lexer
391 return lexer 399 return lexer
392 400
393 401
394 class Lexer(object): 402 class Lexer(object):
395 """Class that implements a lexer for a given environment. Automatically 403 """Class that implements a lexer for a given environment. Automatically
396 created by the environment class, usually you don't have to do that. 404 created by the environment class, usually you don't have to do that.
(...skipping 10 matching lines...) Expand all
407 # lexing rules for tags 415 # lexing rules for tags
408 tag_rules = [ 416 tag_rules = [
409 (whitespace_re, TOKEN_WHITESPACE, None), 417 (whitespace_re, TOKEN_WHITESPACE, None),
410 (float_re, TOKEN_FLOAT, None), 418 (float_re, TOKEN_FLOAT, None),
411 (integer_re, TOKEN_INTEGER, None), 419 (integer_re, TOKEN_INTEGER, None),
412 (name_re, TOKEN_NAME, None), 420 (name_re, TOKEN_NAME, None),
413 (string_re, TOKEN_STRING, None), 421 (string_re, TOKEN_STRING, None),
414 (operator_re, TOKEN_OPERATOR, None) 422 (operator_re, TOKEN_OPERATOR, None)
415 ] 423 ]
416 424
417 # assamble the root lexing rule. because "|" is ungreedy 425 # assemble the root lexing rule. because "|" is ungreedy
418 # we have to sort by length so that the lexer continues working 426 # we have to sort by length so that the lexer continues working
419 # as expected when we have parsing rules like <% for block and 427 # as expected when we have parsing rules like <% for block and
420 # <%= for variables. (if someone wants asp like syntax) 428 # <%= for variables. (if someone wants asp like syntax)
421 # variables are just part of the rules if variable processing 429 # variables are just part of the rules if variable processing
422 # is required. 430 # is required.
423 root_tag_rules = compile_rules(environment) 431 root_tag_rules = compile_rules(environment)
424 432
425 # block suffix if trimming is enabled 433 # block suffix if trimming is enabled
426 block_suffix_re = environment.trim_blocks and '\\n?' or '' 434 block_suffix_re = environment.trim_blocks and '\\n?' or ''
427 435
436 # strip leading spaces if lstrip_blocks is enabled
437 prefix_re = {}
438 if environment.lstrip_blocks:
439 # use '{%+' to manually disable lstrip_blocks behavior
440 no_lstrip_re = e('+')
441 # detect overlap between block and variable or comment strings
442 block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
443 # make sure we don't mistake a block for a variable or a comment
444 m = block_diff.match(environment.comment_start_string)
445 no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
446 m = block_diff.match(environment.variable_start_string)
447 no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
448
449 # detect overlap between comment and variable strings
450 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
451 m = comment_diff.match(environment.variable_start_string)
452 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
453
454 lstrip_re = r'^[ \t]*'
455 block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
456 lstrip_re,
457 e(environment.block_start_string),
458 no_lstrip_re,
459 e(environment.block_start_string),
460 )
461 comment_prefix_re = r'%s%s%s|%s\+?' % (
462 lstrip_re,
463 e(environment.comment_start_string),
464 no_variable_re,
465 e(environment.comment_start_string),
466 )
467 prefix_re['block'] = block_prefix_re
468 prefix_re['comment'] = comment_prefix_re
469 else:
470 block_prefix_re = '%s' % e(environment.block_start_string)
471
428 self.newline_sequence = environment.newline_sequence 472 self.newline_sequence = environment.newline_sequence
473 self.keep_trailing_newline = environment.keep_trailing_newline
429 474
430 # global lexing rules 475 # global lexing rules
431 self.rules = { 476 self.rules = {
432 'root': [ 477 'root': [
433 # directives 478 # directives
434 (c('(.*?)(?:%s)' % '|'.join( 479 (c('(.*?)(?:%s)' % '|'.join(
435 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 480 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
436 e(environment.block_start_string), 481 e(environment.block_start_string),
437 e(environment.block_start_string), 482 block_prefix_re,
438 e(environment.block_end_string), 483 e(environment.block_end_string),
439 e(environment.block_end_string) 484 e(environment.block_end_string)
440 )] + [ 485 )] + [
441 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r) 486 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
442 for n, r in root_tag_rules 487 for n, r in root_tag_rules
443 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 488 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
444 # data 489 # data
445 (c('.+'), TOKEN_DATA, None) 490 (c('.+'), TOKEN_DATA, None)
446 ], 491 ],
447 # comments 492 # comments
448 TOKEN_COMMENT_BEGIN: [ 493 TOKEN_COMMENT_BEGIN: [
449 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 494 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
450 e(environment.comment_end_string), 495 e(environment.comment_end_string),
451 e(environment.comment_end_string), 496 e(environment.comment_end_string),
(...skipping 13 matching lines...) Expand all
465 TOKEN_VARIABLE_BEGIN: [ 510 TOKEN_VARIABLE_BEGIN: [
466 (c('\-%s\s*|%s' % ( 511 (c('\-%s\s*|%s' % (
467 e(environment.variable_end_string), 512 e(environment.variable_end_string),
468 e(environment.variable_end_string) 513 e(environment.variable_end_string)
469 )), TOKEN_VARIABLE_END, '#pop') 514 )), TOKEN_VARIABLE_END, '#pop')
470 ] + tag_rules, 515 ] + tag_rules,
471 # raw block 516 # raw block
472 TOKEN_RAW_BEGIN: [ 517 TOKEN_RAW_BEGIN: [
473 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 518 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
474 e(environment.block_start_string), 519 e(environment.block_start_string),
475 e(environment.block_start_string), 520 block_prefix_re,
476 e(environment.block_end_string), 521 e(environment.block_end_string),
477 e(environment.block_end_string), 522 e(environment.block_end_string),
478 block_suffix_re 523 block_suffix_re
479 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 524 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
480 (c('(.)'), (Failure('Missing end of raw directive'),), None) 525 (c('(.)'), (Failure('Missing end of raw directive'),), None)
481 ], 526 ],
482 # line statements 527 # line statements
483 TOKEN_LINESTATEMENT_BEGIN: [ 528 TOKEN_LINESTATEMENT_BEGIN: [
484 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 529 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
485 ] + tag_rules, 530 ] + tag_rules,
486 # line comments 531 # line comments
487 TOKEN_LINECOMMENT_BEGIN: [ 532 TOKEN_LINECOMMENT_BEGIN: [
488 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 533 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
489 TOKEN_LINECOMMENT_END), '#pop') 534 TOKEN_LINECOMMENT_END), '#pop')
490 ] 535 ]
491 } 536 }
492 537
493 def _normalize_newlines(self, value): 538 def _normalize_newlines(self, value):
494 """Called for strings and template data to normlize it to unicode.""" 539 """Called for strings and template data to normalize it to unicode."""
495 return newline_re.sub(self.newline_sequence, value) 540 return newline_re.sub(self.newline_sequence, value)
496 541
497 def tokenize(self, source, name=None, filename=None, state=None): 542 def tokenize(self, source, name=None, filename=None, state=None):
498 """Calls tokeniter + tokenize and wraps it in a token stream. 543 """Calls tokeniter + tokenize and wraps it in a token stream.
499 """ 544 """
500 stream = self.tokeniter(source, name, filename, state) 545 stream = self.tokeniter(source, name, filename, state)
501 return TokenStream(self.wrap(stream, name, filename), name, filename) 546 return TokenStream(self.wrap(stream, name, filename), name, filename)
502 547
503 def wrap(self, stream, name=None, filename=None): 548 def wrap(self, stream, name=None, filename=None):
504 """This is called with the stream as returned by `tokenize` and wraps 549 """This is called with the stream as returned by `tokenize` and wraps
(...skipping 14 matching lines...) Expand all
519 elif token == 'keyword': 564 elif token == 'keyword':
520 token = value 565 token = value
521 elif token == 'name': 566 elif token == 'name':
522 value = str(value) 567 value = str(value)
523 elif token == 'string': 568 elif token == 'string':
524 # try to unescape string 569 # try to unescape string
525 try: 570 try:
526 value = self._normalize_newlines(value[1:-1]) \ 571 value = self._normalize_newlines(value[1:-1]) \
527 .encode('ascii', 'backslashreplace') \ 572 .encode('ascii', 'backslashreplace') \
528 .decode('unicode-escape') 573 .decode('unicode-escape')
529 except Exception, e: 574 except Exception as e:
530 msg = str(e).split(':')[-1].strip() 575 msg = str(e).split(':')[-1].strip()
531 raise TemplateSyntaxError(msg, lineno, name, filename) 576 raise TemplateSyntaxError(msg, lineno, name, filename)
532 # if we can express it as bytestring (ascii only) 577 # if we can express it as bytestring (ascii only)
533 # we do that for support of semi broken APIs 578 # we do that for support of semi broken APIs
534 # as datetime.datetime.strftime. On python 3 this 579 # as datetime.datetime.strftime. On python 3 this
535 # call becomes a noop thanks to 2to3 580 # call becomes a noop thanks to 2to3
536 try: 581 try:
537 value = str(value) 582 value = str(value)
538 except UnicodeError: 583 except UnicodeError:
539 pass 584 pass
540 elif token == 'integer': 585 elif token == 'integer':
541 value = int(value) 586 value = int(value)
542 elif token == 'float': 587 elif token == 'float':
543 value = float(value) 588 value = float(value)
544 elif token == 'operator': 589 elif token == 'operator':
545 token = operators[value] 590 token = operators[value]
546 yield Token(lineno, token, value) 591 yield Token(lineno, token, value)
547 592
548 def tokeniter(self, source, name, filename=None, state=None): 593 def tokeniter(self, source, name, filename=None, state=None):
549 """This method tokenizes the text and returns the tokens in a 594 """This method tokenizes the text and returns the tokens in a
550 generator. Use this method if you just want to tokenize a template. 595 generator. Use this method if you just want to tokenize a template.
551 """ 596 """
552 source = '\n'.join(unicode(source).splitlines()) 597 source = text_type(source)
598 lines = source.splitlines()
599 if self.keep_trailing_newline and source:
600 for newline in ('\r\n', '\r', '\n'):
601 if source.endswith(newline):
602 lines.append('')
603 break
604 source = '\n'.join(lines)
553 pos = 0 605 pos = 0
554 lineno = 1 606 lineno = 1
555 stack = ['root'] 607 stack = ['root']
556 if state is not None and state != 'root': 608 if state is not None and state != 'root':
557 assert state in ('variable', 'block'), 'invalid state' 609 assert state in ('variable', 'block'), 'invalid state'
558 stack.append(state + '_begin') 610 stack.append(state + '_begin')
559 else: 611 else:
560 state = 'root' 612 state = 'root'
561 statetokens = self.rules[stack[-1]] 613 statetokens = self.rules[stack[-1]]
562 source_length = len(source) 614 source_length = len(source)
563 615
564 balancing_stack = [] 616 balancing_stack = []
565 617
566 while 1: 618 while 1:
567 # tokenizer loop 619 # tokenizer loop
568 for regex, tokens, new_state in statetokens: 620 for regex, tokens, new_state in statetokens:
569 m = regex.match(source, pos) 621 m = regex.match(source, pos)
570 # if no match we try again with the next rule 622 # if no match we try again with the next rule
571 if m is None: 623 if m is None:
572 continue 624 continue
573 625
574 # we only match blocks and variables if brances / parentheses 626 # we only match blocks and variables if braces / parentheses
575 # are balanced. continue parsing with the lower rule which 627 # are balanced. continue parsing with the lower rule which
576 # is the operator rule. do this only if the end tags look 628 # is the operator rule. do this only if the end tags look
577 # like operators 629 # like operators
578 if balancing_stack and \ 630 if balancing_stack and \
579 tokens in ('variable_end', 'block_end', 631 tokens in ('variable_end', 'block_end',
580 'linestatement_end'): 632 'linestatement_end'):
581 continue 633 continue
582 634
583 # tuples support more options 635 # tuples support more options
584 if isinstance(tokens, tuple): 636 if isinstance(tokens, tuple):
585 for idx, token in enumerate(tokens): 637 for idx, token in enumerate(tokens):
586 # failure group 638 # failure group
587 if token.__class__ is Failure: 639 if token.__class__ is Failure:
588 raise token(lineno, filename) 640 raise token(lineno, filename)
589 # bygroup is a bit more complex, in that case we 641 # bygroup is a bit more complex, in that case we
590 # yield for the current token the first named 642 # yield for the current token the first named
591 # group that matched 643 # group that matched
592 elif token == '#bygroup': 644 elif token == '#bygroup':
593 for key, value in m.groupdict().iteritems(): 645 for key, value in iteritems(m.groupdict()):
594 if value is not None: 646 if value is not None:
595 yield lineno, key, value 647 yield lineno, key, value
596 lineno += value.count('\n') 648 lineno += value.count('\n')
597 break 649 break
598 else: 650 else:
599 raise RuntimeError('%r wanted to resolve ' 651 raise RuntimeError('%r wanted to resolve '
600 'the token dynamically' 652 'the token dynamically'
601 ' but no group matched' 653 ' but no group matched'
602 % regex) 654 % regex)
603 # normal group 655 # normal group
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
640 # in an infinite loop 692 # in an infinite loop
641 pos2 = m.end() 693 pos2 = m.end()
642 694
643 # handle state changes 695 # handle state changes
644 if new_state is not None: 696 if new_state is not None:
645 # remove the uppermost state 697 # remove the uppermost state
646 if new_state == '#pop': 698 if new_state == '#pop':
647 stack.pop() 699 stack.pop()
648 # resolve the new state by group checking 700 # resolve the new state by group checking
649 elif new_state == '#bygroup': 701 elif new_state == '#bygroup':
650 for key, value in m.groupdict().iteritems(): 702 for key, value in iteritems(m.groupdict()):
651 if value is not None: 703 if value is not None:
652 stack.append(key) 704 stack.append(key)
653 break 705 break
654 else: 706 else:
655 raise RuntimeError('%r wanted to resolve the ' 707 raise RuntimeError('%r wanted to resolve the '
656 'new state dynamically but' 708 'new state dynamically but'
657 ' no group matched' % 709 ' no group matched' %
658 regex) 710 regex)
659 # direct state name given 711 # direct state name given
660 else: 712 else:
661 stack.append(new_state) 713 stack.append(new_state)
662 statetokens = self.rules[stack[-1]] 714 statetokens = self.rules[stack[-1]]
663 # we are still at the same position and no stack change. 715 # we are still at the same position and no stack change.
664 # this means a loop without break condition, avoid that and 716 # this means a loop without break condition, avoid that and
665 # raise error 717 # raise error
666 elif pos2 == pos: 718 elif pos2 == pos:
667 raise RuntimeError('%r yielded empty string without ' 719 raise RuntimeError('%r yielded empty string without '
668 'stack change' % regex) 720 'stack change' % regex)
669 # publish new function and start again 721 # publish new function and start again
670 pos = pos2 722 pos = pos2
671 break 723 break
672 # if loop terminated without break we havn't found a single match 724 # if loop terminated without break we haven't found a single match
673 # either we are at the end of the file or we have a problem 725 # either we are at the end of the file or we have a problem
674 else: 726 else:
675 # end of text 727 # end of text
676 if pos >= source_length: 728 if pos >= source_length:
677 return 729 return
678 # something went wrong 730 # something went wrong
679 raise TemplateSyntaxError('unexpected char %r at %d' % 731 raise TemplateSyntaxError('unexpected char %r at %d' %
680 (source[pos], pos), lineno, 732 (source[pos], pos), lineno,
681 name, filename) 733 name, filename)
OLDNEW
« no previous file with comments | « third_party/jinja2/get_jinja2.sh ('k') | third_party/jinja2/loaders.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698