Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(738)

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 149113010: Experimental parser: unify parser construction (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 10 matching lines...) Expand all
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex 28 import ply.lex as lex
29 import ply.yacc as yacc 29 import ply.yacc as yacc
30 from action import Term, Action 30 from action import Term, Action
31 from regex_parser import RegexParser 31 from regex_parser import RegexParser, ParserBuilder
32 from nfa_builder import NfaBuilder 32 from nfa_builder import NfaBuilder
33 from dfa import Dfa 33 from dfa import Dfa
34 from dfa_optimizer import DfaOptimizer 34 from dfa_optimizer import DfaOptimizer
35 from transition_keys import TransitionKey, KeyEncoding 35 from transition_keys import TransitionKey, KeyEncoding
36 36
37 class RuleLexer: 37 class RuleLexer:
38 38
39 tokens = ( 39 tokens = (
40 'DEFAULT_ACTION', 40 'DEFAULT_ACTION',
41 'EPSILON', 41 'EPSILON',
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 t_EQUALS = '=' 89 t_EQUALS = '='
90 t_LEFT_PARENTHESIS = r'\(' 90 t_LEFT_PARENTHESIS = r'\('
91 t_RIGHT_PARENTHESIS = r'\)' 91 t_RIGHT_PARENTHESIS = r'\)'
92 t_GRAPH_OPEN = '<<' 92 t_GRAPH_OPEN = '<<'
93 t_GRAPH_CLOSE = '>>' 93 t_GRAPH_CLOSE = '>>'
94 t_SEMICOLON = ';' 94 t_SEMICOLON = ';'
95 t_ACTION_OPEN = '<' 95 t_ACTION_OPEN = '<'
96 t_ACTION_CLOSE = '>' 96 t_ACTION_CLOSE = '>'
97 t_COMMA = ',' 97 t_COMMA = ','
98 98
99 def t_LEFT_BRACKET(self, t):
100 r'{'
101 self.lexer.push_state('code')
102 self.nesting = 1
103 return t
104
105 def t_ANY_error(self, t): 99 def t_ANY_error(self, t):
106 raise Exception("Illegal character '%s'" % t.value[0]) 100 raise Exception("Illegal character '%s'" % t.value[0])
107 101
108 def build(self, **kwargs):
109 self.lexer = lex.lex(module=self, **kwargs)
110
111 class RuleParserState: 102 class RuleParserState:
112 103
113 def __init__(self, encoding): 104 def __init__(self, encoding):
114 self.aliases = {} 105 self.aliases = {}
115 self.character_classes = {} 106 self.character_classes = {}
116 self.current_state = None 107 self.current_state = None
117 self.rules = {} 108 self.rules = {}
118 self.transitions = set() 109 self.transitions = set()
119 self.encoding = encoding 110 self.encoding = encoding
120 111
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
290 p[0] = NfaBuilder.apply_modifier(modifier, term) 281 p[0] = NfaBuilder.apply_modifier(modifier, term)
291 else: 282 else:
292 p[0] = term 283 p[0] = term
293 284
294 def p_regex_string_literal(self, p): 285 def p_regex_string_literal(self, p):
295 'regex_string_literal : STRING' 286 'regex_string_literal : STRING'
296 string = p[1][1:-1] 287 string = p[1][1:-1]
297 escape_char = lambda string, char: string.replace(char, "\\" + char) 288 escape_char = lambda string, char: string.replace(char, "\\" + char)
298 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") 289 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"")
299 p[0] = RegexParser.parse(string) 290 p[0] = RegexParser.parse(string)
291
300 def p_regex(self, p): 292 def p_regex(self, p):
301 'regex : REGEX' 293 'regex : REGEX'
302 string = p[1][1:-1].replace("\\/", "/") 294 string = p[1][1:-1].replace("\\/", "/")
303 p[0] = RegexParser.parse(string) 295 p[0] = RegexParser.parse(string)
304 296
305 def p_regex_class(self, p): 297 def p_regex_class(self, p):
306 'regex_class : CHARACTER_CLASS_REGEX' 298 'regex_class : CHARACTER_CLASS_REGEX'
307 p[0] = RegexParser.parse(p[1]) 299 p[0] = RegexParser.parse(p[1])
308 300
309 def p_regex_alias(self, p): 301 def p_regex_alias(self, p):
310 'regex_alias : IDENTIFIER' 302 'regex_alias : IDENTIFIER'
311 p[0] = self.__state.aliases[p[1]] 303 p[0] = self.__state.aliases[p[1]]
312 304
313 def p_modifier(self, p): 305 def p_modifier(self, p):
314 '''modifier : PLUS 306 '''modifier : PLUS
315 | QUESTION_MARK 307 | QUESTION_MARK
316 | STAR 308 | STAR
317 | empty''' 309 | empty'''
318 p[0] = p[1] 310 p[0] = p[1]
319 311
320 def p_empty(self, p): 312 def p_empty(self, p):
321 'empty :' 313 'empty :'
322 314
323 def p_error(self, p): 315 def p_error(self, p):
324 raise Exception("Syntax error in input '%s'" % str(p)) 316 raise Exception("Syntax error in input '%s'" % str(p))
325 317
326 def build(self, **kwargs):
327 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
328 self.lexer = RuleLexer()
329 self.lexer.build(**kwargs)
330
331 __static_instance = None
332 @staticmethod 318 @staticmethod
333 def parse(data, parser_state): 319 def parse(string, parser_state):
334 parser = RuleParser.__static_instance 320 new_lexer = lambda: RuleLexer()
335 if not parser: 321 new_parser = lambda: RuleParser()
336 parser = RuleParser() 322 def preparse(parser):
337 parser.build() 323 parser.__state = parser_state
338 RuleParser.__static_instance = parser 324 def postparse(parser):
339 parser.__state = parser_state 325 parser.__state = None
340 try: 326 return ParserBuilder.parse(
341 parser.parser.parse(data, lexer=parser.lexer.lexer) 327 string, "RuleParser", new_lexer, new_parser, preparse, postparse)
342 except Exception:
343 RuleParser.__static_instance = None
344 raise
345 parser.__state = None
346 328
347 class RuleProcessor(object): 329 class RuleProcessor(object):
348 330
349 def __init__(self, string, encoding_name): 331 def __init__(self, string, encoding_name):
350 self.__automata = {} 332 self.__automata = {}
351 self.__default_action = None 333 self.__default_action = None
352 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name)) 334 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name))
353 RuleParser.parse(string, self.__parser_state) 335 RuleParser.parse(string, self.__parser_state)
354 self.__process_parser_state() 336 self.__process_parser_state()
355 337
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 for tree_name, v in parser_state.rules.items(): 396 for tree_name, v in parser_state.rules.items():
415 assert v['trees'], "lexer state %s is empty" % tree_name 397 assert v['trees'], "lexer state %s is empty" % tree_name
416 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) 398 rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])
417 # build the automata 399 # build the automata
418 for name, tree in rule_map.items(): 400 for name, tree in rule_map.items():
419 self.__automata[name] = RuleProcessor.Automata( 401 self.__automata[name] = RuleProcessor.Automata(
420 parser_state.encoding, parser_state.character_classes, rule_map, name) 402 parser_state.encoding, parser_state.character_classes, rule_map, name)
421 # process default_action 403 # process default_action
422 default_action = parser_state.rules['default']['default_action'] 404 default_action = parser_state.rules['default']['default_action']
423 self.__default_action = Action(Term.empty_term(), default_action) 405 self.__default_action = Action(Term.empty_term(), default_action)
OLDNEW
« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698