Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 157013003: Experimental parser: cleanup RegexParser (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/nfa_builder.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex 28 import ply.lex as lex
29 import ply.yacc as yacc 29 import ply.yacc as yacc
30 from types import ListType, TupleType
31 from regex_lexer import RegexLexer
32 from action import Term 30 from action import Term
31 from nfa_builder import NfaBuilder
33 32
34 class ParserBuilder: 33 class ParserBuilder:
35 34
36 class Logger(object): 35 class Logger(object):
37 def debug(self,msg,*args,**kwargs): 36 def debug(self,msg,*args,**kwargs):
38 pass 37 pass
39 38
40 def info(self,msg,*args,**kwargs): 39 def info(self,msg,*args,**kwargs):
41 pass 40 pass
42 41
43 def warning(self,msg,*args,**kwargs): 42 def warning(self,msg,*args,**kwargs):
44 pass 43 raise Exception("warning: "+ (msg % args) + "\n")
45 # assert False, "warning: "+ (msg % args) + "\n"
46 44
47 def error(self,msg,*args,**kwargs): 45 def error(self,msg,*args,**kwargs):
48 assert False, "error: "+ (msg % args) + "\n" 46 raise Exception("error: "+ (msg % args) + "\n")
49 47
50 __static_instances = {} 48 __static_instances = {}
51 @staticmethod 49 @staticmethod
52 def parse( 50 def parse(
53 string, name, new_lexer, new_parser, preparse = None, postparse = None): 51 string, name, new_lexer, new_parser, preparse = None, postparse = None):
54 if not name in ParserBuilder.__static_instances: 52 if not name in ParserBuilder.__static_instances:
55 logger = ParserBuilder.Logger() 53 logger = ParserBuilder.Logger()
56 lexer_instance = new_lexer() 54 lexer_instance = new_lexer()
57 lexer_instance.lex = lex.lex(module=lexer_instance) 55 lexer_instance.lex = lex.lex(module=lexer_instance)
58 instance = new_parser() 56 instance = new_parser()
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 'REPEAT_BEGIN', 96 'REPEAT_BEGIN',
99 'REPEAT_END', 97 'REPEAT_END',
100 98
101 'NUMBER', 99 'NUMBER',
102 'COMMA', 100 'COMMA',
103 'LITERAL', 101 'LITERAL',
104 102
105 'RANGE', 103 'RANGE',
106 'NOT', 104 'NOT',
107 'CLASS_LITERAL', 105 'CLASS_LITERAL',
108 'CLASS_LITERAL_AS_OCTAL',
109 'CHARACTER_CLASS', 106 'CHARACTER_CLASS',
110 ) 107 )
111 108
112 states = ( 109 states = (
113 ('class','exclusive'), 110 ('class','exclusive'),
114 ('repeat','exclusive'), 111 ('repeat','exclusive'),
115 ) 112 )
116 113
117 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\") 114 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
118 115
(...skipping 24 matching lines...) Expand all
143 r'\]' 140 r'\]'
144 self.lex.pop_state() 141 self.lex.pop_state()
145 return t 142 return t
146 143
147 t_class_RANGE = '-' 144 t_class_RANGE = '-'
148 t_class_NOT = '\^' 145 t_class_NOT = '\^'
149 t_class_CHARACTER_CLASS = r':\w+:' 146 t_class_CHARACTER_CLASS = r':\w+:'
150 147
151 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): 148 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
152 r'\\\d+' 149 r'\\\d+'
150 t.type = 'CLASS_LITERAL'
151 t.value = chr(int(t.value[1:], 8))
153 return t 152 return t
154 153
155 __escaped_class_literals = build_escape_map("^[]-:\\") 154 __escaped_class_literals = build_escape_map("^[]-:\\")
156 155
157 def t_class_ESCAPED_CLASS_LITERAL(self, t): 156 def t_class_ESCAPED_CLASS_LITERAL(self, t):
158 r'\\.' 157 r'\\.'
159 t.type = 'CLASS_LITERAL' 158 t.type = 'CLASS_LITERAL'
160 t.value = RegexLexer.__escaped_class_literals[t.value] 159 t.value = RegexLexer.__escaped_class_literals[t.value]
161 return t 160 return t
162 161
(...skipping 18 matching lines...) Expand all
181 raise Exception("Illegal character '%s'" % t.value[0]) 180 raise Exception("Illegal character '%s'" % t.value[0])
182 181
183 class RegexParser: 182 class RegexParser:
184 183
185 tokens = RegexLexer.tokens 184 tokens = RegexLexer.tokens
186 185
187 token_map = { 186 token_map = {
188 '+': 'ONE_OR_MORE', 187 '+': 'ONE_OR_MORE',
189 '?': 'ZERO_OR_ONE', 188 '?': 'ZERO_OR_ONE',
190 '*': 'ZERO_OR_MORE', 189 '*': 'ZERO_OR_MORE',
191 '|': 'OR',
192 '.': 'ANY', 190 '.': 'ANY',
193 } 191 }
194 192
195 def p_start(self, p): 193 def p_start(self, p):
196 '''start : fragments OR fragments 194 '''start : fragments OR fragments
197 | fragments''' 195 | fragments'''
198 if len(p) == 2: 196 if len(p) == 2:
199 p[0] = p[1] 197 p[0] = p[1]
200 else: 198 else:
201 p[0] = Term(self.token_map[p[2]], p[1], p[3]) 199 p[0] = NfaBuilder.or_terms([p[1], p[3]])
202 200
203 def p_fragments(self, p): 201 def p_fragments(self, p):
204 '''fragments : fragment 202 '''fragments : fragment
205 | fragment fragments''' 203 | fragment fragments'''
206 if len(p) == 2: 204 if len(p) == 2:
207 p[0] = p[1] 205 p[0] = p[1]
208 else: 206 else:
209 p[0] = self.__cat(p[1], p[2]) 207 p[0] = self.__cat(p[1], p[2])
210 208
211 def p_fragment(self, p): 209 def p_fragment(self, p):
212 '''fragment : literal_array maybe_modifier 210 '''fragment : literal maybe_modifier
213 | class maybe_modifier 211 | class maybe_modifier
214 | group maybe_modifier 212 | group maybe_modifier
215 | any maybe_modifier 213 | any maybe_modifier
216 ''' 214 '''
217 if p[2] != None: 215 if p[2]:
218 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT': 216 if p[2][0] == 'REPEAT':
219 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1]) 217 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
220 else: 218 else:
221 p[0] = Term(p[2], p[1]) 219 p[0] = Term(p[2][0], p[1])
222 else: 220 else:
223 p[0] = p[1] 221 p[0] = p[1]
224 222
225 def p_maybe_modifier(self, p): 223 def p_maybe_modifier(self, p):
226 '''maybe_modifier : ONE_OR_MORE 224 '''maybe_modifier : ONE_OR_MORE
227 | ZERO_OR_ONE 225 | ZERO_OR_ONE
228 | ZERO_OR_MORE 226 | ZERO_OR_MORE
229 | repetition 227 | REPEAT_BEGIN NUMBER REPEAT_END
228 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END
230 | empty''' 229 | empty'''
231 p[0] = p[1]
232 if p[1] in self.token_map:
233 p[0] = self.token_map[p[1]]
234
235 def p_repetition(self, p):
236 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
237 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
238 if len(p) == 4: 230 if len(p) == 4:
239 p[0] = ("REPEAT", p[2], p[2]) 231 p[0] = ("REPEAT", p[2], p[2])
232 elif len(p) == 5:
233 p[0] = ("REPEAT", p[2], p[4])
234 elif p[1]:
235 p[0] = (self.token_map[p[1]],)
240 else: 236 else:
241 p[0] = ("REPEAT", p[2], p[4]) 237 p[0] = None
242 238
243 def p_literal_array(self, p): 239 def p_literal(self, p):
244 '''literal_array : literals''' 240 '''literal : LITERAL'''
245 p[0] = Term('LITERAL', ''.join(reversed(p[1]))) 241 p[0] = Term('LITERAL', p[1])
246
247 def p_literals(self, p):
248 '''literals : LITERAL maybe_literals'''
249 if not p[2]:
250 p[0] = [p[1]]
251 else:
252 p[2].append(p[1])
253 p[0] = p[2]
254
255 def p_maybe_literals(self, p):
256 '''maybe_literals : literals
257 | empty'''
258 p[0] = p[1]
259 242
260 def p_any(self, p): 243 def p_any(self, p):
261 '''any : ANY''' 244 '''any : ANY'''
262 p[0] = Term(self.token_map[p[1]]) 245 p[0] = Term(self.token_map[p[1]])
263 246
264 def p_class(self, p): 247 def p_class(self, p):
265 '''class : CLASS_BEGIN class_content CLASS_END 248 '''class : CLASS_BEGIN class_content CLASS_END
266 | CLASS_BEGIN NOT class_content CLASS_END''' 249 | CLASS_BEGIN NOT class_content CLASS_END'''
267 if len(p) == 4: 250 if len(p) == 4:
268 p[0] = Term("CLASS", p[2]) 251 p[0] = Term("CLASS", p[2])
269 else: 252 else:
270 p[0] = Term("NOT_CLASS", p[3]) 253 p[0] = Term("NOT_CLASS", p[3])
271 254
272 def p_group(self, p): 255 def p_group(self, p):
273 '''group : GROUP_BEGIN start GROUP_END''' 256 '''group : GROUP_BEGIN start GROUP_END'''
274 p[0] = p[2] 257 p[0] = p[2]
275 258
276 def p_class_content(self, p): 259 def p_class_content(self, p):
277 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content 260 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
278 | CLASS_LITERAL maybe_class_content 261 | CLASS_LITERAL maybe_class_content
279 | CHARACTER_CLASS maybe_class_content 262 | CHARACTER_CLASS maybe_class_content
280 | CLASS_LITERAL_AS_OCTAL maybe_class_content
281 ''' 263 '''
282 if len(p) == 5: 264 if len(p) == 5:
283 left = Term("RANGE", p[1], p[3]) 265 left = Term("RANGE", p[1], p[3])
284 else: 266 else:
285 if len(p[1]) == 1: 267 if len(p[1]) == 1:
286 left = Term('LITERAL', p[1]) 268 left = Term('LITERAL', p[1])
287 elif p[1][0] == '\\':
288 left = Term('LITERAL', chr(int(p[1][1:], 8)))
289 else: 269 else:
290 left = Term('CHARACTER_CLASS', p[1][1:-1]) 270 left = Term('CHARACTER_CLASS', p[1][1:-1])
291 p[0] = self.__cat(left, p[len(p)-1]) 271 p[0] = self.__cat(left, p[len(p)-1])
292 272
293 def p_maybe_class_content(self, p): 273 def p_maybe_class_content(self, p):
294 '''maybe_class_content : class_content 274 '''maybe_class_content : class_content
295 | empty''' 275 | empty'''
296 p[0] = p[1] 276 p[0] = p[1]
297 277
298 def p_empty(self, p): 278 def p_empty(self, p):
299 'empty :' 279 'empty :'
300 280
301 def p_error(self, p): 281 def p_error(self, p):
302 raise Exception("Syntax error in input '%s'" % str(p)) 282 raise Exception("Syntax error in input '%s'" % str(p))
303 283
304 @staticmethod 284 @staticmethod
305 def __cat(left, right): 285 def __cat(left, right):
306 assert left 286 assert left
307 return left if not right else Term('CAT', left, right) 287 return NfaBuilder.cat_terms([left] if not right else [left, right])
308 288
309 @staticmethod 289 @staticmethod
310 def parse(string): 290 def parse(string):
311 new_lexer = lambda: RegexLexer() 291 new_lexer = lambda: RegexLexer()
312 new_parser = lambda: RegexParser() 292 new_parser = lambda: RegexParser()
313 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser) 293 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser)
OLDNEW
« no previous file with comments | « tools/lexer_generator/nfa_builder.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698