Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1328)

Side by Side Diff: tools/lexer_generator/regex_lexer.py

Issue 66313005: Experimental parser: better escaping (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex 28 import ply.lex as lex
29 29
30 def build_escape_map(chars):
31 def add_escape(d, char):
32 d['\\' + char] = char
33 return d
34 return reduce(add_escape, chars,
35 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
36
30 class RegexLexer: 37 class RegexLexer:
31 38
32 tokens = ( 39 tokens = (
33 40
34 'GROUP_BEGIN', 41 'GROUP_BEGIN',
35 'GROUP_END', 42 'GROUP_END',
36 43
37 'CLASS_BEGIN', 44 'CLASS_BEGIN',
38 'CLASS_END', 45 'CLASS_END',
39 46
(...skipping 15 matching lines...) Expand all
55 'CLASS_LITERAL', 62 'CLASS_LITERAL',
56 'CLASS_LITERAL_AS_OCTAL', 63 'CLASS_LITERAL_AS_OCTAL',
57 'CHARACTER_CLASS', 64 'CHARACTER_CLASS',
58 ) 65 )
59 66
60 states = ( 67 states = (
61 ('class','exclusive'), 68 ('class','exclusive'),
62 ('repeat','exclusive'), 69 ('repeat','exclusive'),
63 ) 70 )
64 71
72 __escaped_literals = build_escape_map("(){}[]?+.*|\\")
73
65 def t_ESCAPED_LITERAL(self, t): 74 def t_ESCAPED_LITERAL(self, t):
66 r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}' 75 r'\\.'
67 t.type = 'LITERAL' 76 t.type = 'LITERAL'
68 t.value = t.value[1:] 77 t.value = RegexLexer.__escaped_literals[t.value]
69 return t 78 return t
70 79
71 t_GROUP_BEGIN = r'\(' 80 t_GROUP_BEGIN = r'\('
72 t_GROUP_END = r'\)' 81 t_GROUP_END = r'\)'
73 82
74 t_OR = r'\|' 83 t_OR = r'\|'
75 t_ONE_OR_MORE = r'\+' 84 t_ONE_OR_MORE = r'\+'
76 t_ZERO_OR_MORE = r'\*' 85 t_ZERO_OR_MORE = r'\*'
77 t_ZERO_OR_ONE = r'\?' 86 t_ZERO_OR_ONE = r'\?'
78 87
(...skipping 12 matching lines...) Expand all
91 return t 100 return t
92 101
93 t_class_RANGE = '-' 102 t_class_RANGE = '-'
94 t_class_NOT = '\^' 103 t_class_NOT = '\^'
95 t_class_CHARACTER_CLASS = r':\w+:' 104 t_class_CHARACTER_CLASS = r':\w+:'
96 105
97 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): 106 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
98 r'\\\d+' 107 r'\\\d+'
99 return t 108 return t
100 109
101 escaped_class_literals = { 110 __escaped_class_literals = build_escape_map("^[]-:")
102 '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f',
103 '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':',
104 }
105 111
106 def t_class_ESCAPED_CLASS_LITERAL(self, t): 112 def t_class_ESCAPED_CLASS_LITERAL(self, t):
107 r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w' 113 r'\\.'
108 t.type = 'CLASS_LITERAL' 114 t.type = 'CLASS_LITERAL'
109 t.value = RegexLexer.escaped_class_literals[t.value] 115 t.value = RegexLexer.__escaped_class_literals[t.value]
110 return t 116 return t
111 117
112 t_class_CLASS_LITERAL = r'[\w $_+]' 118 t_class_CLASS_LITERAL = r'[\w $_+]'
113 119
114 def t_REPEAT_BEGIN(self, t): 120 def t_REPEAT_BEGIN(self, t):
115 r'\{' 121 r'\{'
116 self.lexer.push_state('repeat') 122 self.lexer.push_state('repeat')
117 return t 123 return t
118 124
119 def t_repeat_REPEAT_END(self, t): 125 def t_repeat_REPEAT_END(self, t):
120 r'\}' 126 r'\}'
121 self.lexer.pop_state() 127 self.lexer.pop_state()
122 return t 128 return t
123 129
124 t_repeat_NUMBER = r'[0-9]+' 130 t_repeat_NUMBER = r'[0-9]+'
125 t_repeat_COMMA = r',' 131 t_repeat_COMMA = r','
126 132
127 t_ANY_ignore = '\n' 133 t_ANY_ignore = '\n'
128 134
129 def t_ANY_error(self, t): 135 def t_ANY_error(self, t):
130 raise Exception("Illegal character '%s'" % t.value[0]) 136 raise Exception("Illegal character '%s'" % t.value[0])
131 137
132 def build(self, **kwargs): 138 def build(self, **kwargs):
133 self.lexer = lex.lex(module=self, **kwargs) 139 self.lexer = lex.lex(module=self, **kwargs)
OLDNEW
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698