Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(259)

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 62103017: Experimental parser: rule grammar refactor (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | tools/lexer_generator/rule_parser_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
44 self.rules = {} 44 self.rules = {}
45 self.transitions = set() 45 self.transitions = set()
46 46
47 def parse(self, string): 47 def parse(self, string):
48 return RuleParser.parse(string, self) 48 return RuleParser.parse(string, self)
49 49
50 class RuleParser: 50 class RuleParser:
51 51
52 tokens = RuleLexer.tokens 52 tokens = RuleLexer.tokens
53 __rule_precedence_counter = 0 53 __rule_precedence_counter = 0
54 __keyword_transitions = set([ 54 __keyword_transitions = set(['continue'])
55 'continue', 'break', 'terminate', 'terminate_illegal', 'skip'])
56 55
57 def __init__(self): 56 def __init__(self):
58 self.__state = None 57 self.__state = None
59 58
60 def p_statements(self, p): 59 def p_statements(self, p):
61 'statements : aliases rules' 60 'statements : aliases rules'
62 61
63 def p_aliases(self, p): 62 def p_aliases(self, p):
64 '''aliases : alias_rule aliases 63 '''aliases : alias_rule aliases
65 | empty''' 64 | empty'''
66 65
67 def p_alias_rule(self, p): 66 def p_alias_rule(self, p):
68 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' 67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
69 state = self.__state 68 state = self.__state
70 assert not p[1] in state.aliases 69 assert not p[1] in state.aliases
71 graph = p[3] 70 graph = p[3]
72 state.aliases[p[1]] = graph 71 state.aliases[p[1]] = graph
73 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS': 72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
74 classes = state.character_classes 73 classes = state.character_classes
75 assert not p[1] in classes 74 assert not p[1] in classes
76 classes[p[1]] = TransitionKey.character_class(graph, classes) 75 classes[p[1]] = TransitionKey.character_class(graph, classes)
77 76
78 def p_rules(self, p): 77 def p_rules(self, p):
79 '''rules : state_change transition_rules rules 78 '''rules : state_change transition_rules rules
80 | empty''' 79 | empty'''
81 80
82 def p_state_change(self, p): 81 def p_state_change(self, p):
83 '''state_change : LESS_THAN IDENTIFIER GREATER_THAN 82 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'
84 | LESS_THAN DEFAULT GREATER_THAN'''
85 state = self.__state 83 state = self.__state
86 state.current_state = p[2] 84 state.current_state = p[2]
87 assert state.current_state 85 assert state.current_state
88 if not state.current_state in state.rules: 86 if not state.current_state in state.rules:
89 state.rules[state.current_state] = { 87 state.rules[state.current_state] = {
90 'default_action': None, 88 'default_action': None,
91 'catch_all' : None, 89 'catch_all' : None,
92 'regex' : [] 90 'regex' : []
93 } 91 }
94 p[0] = state.current_state 92 p[0] = state.current_state
95 93
96 def p_transition_rules(self, p): 94 def p_transition_rules(self, p):
97 '''transition_rules : transition_rule transition_rules 95 '''transition_rules : transition_rule transition_rules
98 | empty''' 96 | empty'''
99 97
100 def p_transition_rule(self, p): 98 def p_transition_rule(self, p):
101 '''transition_rule : composite_regex code_or_token action 99 '''transition_rule : composite_regex action
102 | composite_regex empty action 100 | DEFAULT_ACTION default_action
103 | composite_regex code_or_token empty 101 | CATCH_ALL action'''
104 | DEFAULT_ACTION code_or_token empty 102 precedence = RuleParser.__rule_precedence_counter
105 | CATCH_ALL empty action''' 103 RuleParser.__rule_precedence_counter += 1
106 transition = p[3] 104 action = p[2]
105 (entry_action, match_action, transition) = action
107 if transition and not transition in self.__keyword_transitions: 106 if transition and not transition in self.__keyword_transitions:
108 assert not transition == 'default' 107 assert not transition == 'default', "can't append default graph"
109 self.__state.transitions.add(transition) 108 self.__state.transitions.add(transition)
110 RuleParser.__rule_precedence_counter += 1
111 rules = self.__state.rules[self.__state.current_state] 109 rules = self.__state.rules[self.__state.current_state]
112 code = p[2]
113 if p[1] == 'default_action': 110 if p[1] == 'default_action':
114 assert self.__state.current_state == 'default' 111 assert self.__state.current_state == 'default'
115 assert not rules['default_action'] 112 assert not rules['default_action']
116 rules['default_action'] = code 113 rules['default_action'] = action
117 elif p[1] == 'catch_all': 114 elif p[1] == 'catch_all':
118 assert not rules['catch_all'] 115 assert not rules['catch_all']
119 rules['catch_all'] = transition 116 rules['catch_all'] = (precedence, action)
120 else: 117 else:
121 rule = (p[1], RuleParser.__rule_precedence_counter, code, transition) 118 regex = p[1]
122 rules['regex'].append(rule) 119 rules['regex'].append((regex, precedence, action))
123 120
124 def p_code_or_token(self, p): 121 def p_action(self, p):
125 '''code_or_token : code 122 '''action : ACTION_OPEN maybe_action_part OR maybe_action_part OR maybe_tran sition ACTION_CLOSE'''
126 | push_token''' 123 p[0] = (p[2], p[4], p[6])
124
125 def p_default_action(self, p):
126 'default_action : ACTION_OPEN action_part ACTION_CLOSE'
127 p[0] = (None, p[2], None)
128
129 def p_maybe_action_part(self, p):
130 '''maybe_action_part : action_part
131 | empty'''
127 p[0] = p[1] 132 p[0] = p[1]
128 133
129 def p_push_token(self, p): 134 def p_action_part(self, p):
130 'push_token : PUSH_TOKEN LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS' 135 '''action_part : code
131 p[0] = (p[1], p[3]) 136 | identifier_action'''
137 p[0] = p[1]
132 138
133 def p_action(self, p): 139 def p_maybe_transition(self, p):
134 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE' 140 '''maybe_transition : IDENTIFIER
135 p[0] = p[2] 141 | empty'''
142 p[0] = p[1]
143
144 def p_identifier_action(self, p):
145 '''identifier_action : IDENTIFIER
146 | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHE SIS'''
147 assert p[1] != 'code'
148 if len(p) == 2:
149 p[0] = (p[1], None)
150 elif len(p) == 5:
151 p[0] = (p[1], p[2])
152 else:
153 raise Exception()
136 154
137 def p_composite_regex(self, p): 155 def p_composite_regex(self, p):
138 '''composite_regex : regex_parts OR regex_parts 156 '''composite_regex : regex_parts OR regex_parts
139 | regex_parts''' 157 | regex_parts'''
140 if len(p) == 2: 158 if len(p) == 2:
141 p[0] = p[1] 159 p[0] = p[1]
142 else: 160 else:
143 p[0] = NfaBuilder.or_graphs([p[1], p[3]]) 161 p[0] = NfaBuilder.or_graphs([p[1], p[3]])
144 162
145 def p_regex_parts(self, p): 163 def p_regex_parts(self, p):
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
267 def minimal_dfa(self): 285 def minimal_dfa(self):
268 if not self.__minimial_dfa: 286 if not self.__minimial_dfa:
269 self.__minimial_dfa = self.dfa().minimize() 287 self.__minimial_dfa = self.dfa().minimize()
270 return self.__minimial_dfa 288 return self.__minimial_dfa
271 289
272 def __process_parser_state(self, parser_state): 290 def __process_parser_state(self, parser_state):
273 rule_map = {} 291 rule_map = {}
274 builder = NfaBuilder() 292 builder = NfaBuilder()
275 builder.set_character_classes(parser_state.character_classes) 293 builder.set_character_classes(parser_state.character_classes)
276 assert 'default' in parser_state.rules 294 assert 'default' in parser_state.rules
277 def process(k, v): 295 def process(subgraph, v):
278 graphs = [] 296 graphs = []
279 continues = 0 297 continues = 0
280 for (graph, precedence, code, transition) in v['regex']: 298 for graph, precedence, action in v['regex']:
281 default_code = v['default_action'] 299 (entry_action, match_action, transition) = action
282 if code or default_code: 300 if entry_action or match_action:
283 (code_type, code_value) = code if code else default_code 301 action = Action(entry_action, match_action, precedence)
284 action = Action(code_type, code_value, precedence)
285 graph = NfaBuilder.add_action(graph, action) 302 graph = NfaBuilder.add_action(graph, action)
286 if not transition or transition == 'break': 303 if not transition:
287 pass 304 pass
288 elif transition == 'continue': 305 elif transition == 'continue':
289 assert not k == 'default' 306 assert not subgraph == 'default'
290 continues += 1 307 continues += 1
291 graph = NfaBuilder.add_continue(graph) 308 graph = NfaBuilder.add_continue(graph)
292 elif (transition == 'terminate' or
293 transition == 'terminate_illegal' or
294 transition == 'skip'):
295 assert not code
296 graph = NfaBuilder.add_action(graph, Action(transition, None, -1))
297 else: 309 else:
298 assert k == 'default' 310 assert subgraph == 'default'
299 subgraph_modifier = '*' if code else None 311 subgraph_modifier = None
300 graph = NfaBuilder.join_subgraph( 312 graph = NfaBuilder.join_subgraph(
301 graph, transition, rule_map[transition], subgraph_modifier) 313 graph, transition, rule_map[transition], subgraph_modifier)
302 graphs.append(graph) 314 graphs.append(graph)
303 if continues == len(graphs): 315 if continues == len(graphs):
304 graphs.append(NfaBuilder.epsilon()) 316 graphs.append(NfaBuilder.epsilon())
305 if v['catch_all']: 317 if v['catch_all']:
306 assert v['catch_all'] == 'continue' 318 (precedence, catch_all) = v['catch_all']
319 assert catch_all == (None, None, 'continue'), "unimplemented"
307 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all())) 320 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
308 graph = NfaBuilder.or_graphs(graphs) 321 graph = NfaBuilder.or_graphs(graphs)
309 rule_map[k] = graph 322 rule_map[k] = graph
310 # process first the subgraphs, then the default graph 323 # process first the subgraphs, then the default graph
311 for k, v in parser_state.rules.items(): 324 for k, v in parser_state.rules.items():
312 if k == 'default': continue 325 if k == 'default': continue
313 process(k, v) 326 process(k, v)
314 process('default', parser_state.rules['default']) 327 process('default', parser_state.rules['default'])
315 # build the automata 328 # build the automata
316 for rule_name, graph in rule_map.items(): 329 for rule_name, graph in rule_map.items():
317 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph) 330 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
318 331 # process default_action
319 default_action = parser_state.rules['default']['default_action'] 332 default_action = parser_state.rules['default']['default_action']
320 self.default_action = Action(default_action[0], default_action[1]) if defaul t_action else None 333 self.default_action = Action(None, default_action[1]) if default_action else None
OLDNEW
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | tools/lexer_generator/rule_parser_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698