Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(860)

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 137883006: Experimental parser: use Terms instead of tuples (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | tools/lexer_generator/term_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.yacc as yacc 28 import ply.yacc as yacc
29 from automaton import Term, Action 29 from action import Term, Action
30 from rule_lexer import RuleLexer 30 from rule_lexer import RuleLexer
31 from regex_parser import RegexParser 31 from regex_parser import RegexParser
32 from nfa_builder import NfaBuilder 32 from nfa_builder import NfaBuilder
33 from dfa import Dfa 33 from dfa import Dfa
34 from dfa_optimizer import DfaOptimizer 34 from dfa_optimizer import DfaOptimizer
35 from transition_keys import TransitionKey, KeyEncoding 35 from transition_keys import TransitionKey, KeyEncoding
36 36
37 class RuleParserState: 37 class RuleParserState:
38 38
39 def __init__(self, encoding): 39 def __init__(self, encoding):
(...skipping 20 matching lines...) Expand all
60 'statements : aliases rules' 60 'statements : aliases rules'
61 61
62 def p_aliases(self, p): 62 def p_aliases(self, p):
63 '''aliases : alias_rule aliases 63 '''aliases : alias_rule aliases
64 | empty''' 64 | empty'''
65 65
66 def p_alias_rule(self, p): 66 def p_alias_rule(self, p):
67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' 67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
68 state = self.__state 68 state = self.__state
69 assert not p[1] in state.aliases 69 assert not p[1] in state.aliases
70 graph = p[3] 70 term = p[3]
71 state.aliases[p[1]] = graph 71 state.aliases[p[1]] = term
72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS': 72 if term.name() == 'CLASS' or term.name() == 'NOT_CLASS':
73 classes = state.character_classes 73 classes = state.character_classes
74 assert not p[1] in classes 74 assert not p[1] in classes, "cannot reassign alias"
75 encoding = state.encoding 75 encoding = state.encoding
76 classes[p[1]] = TransitionKey.character_class(encoding, graph, classes) 76 classes[p[1]] = TransitionKey.character_class(encoding, term, classes)
77 77
78 def p_rules(self, p): 78 def p_rules(self, p):
79 '''rules : state_change transition_rules rules 79 '''rules : state_change transition_rules rules
80 | empty''' 80 | empty'''
81 81
82 def p_state_change(self, p): 82 def p_state_change(self, p):
83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' 83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'
84 state = self.__state 84 state = self.__state
85 state.current_state = p[2] 85 state.current_state = p[2]
86 assert state.current_state 86 assert state.current_state
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 p[0] = tuple(([p[1]] + list(p[3]))) 164 p[0] = tuple(([p[1]] + list(p[3])))
165 else: 165 else:
166 raise Exception() 166 raise Exception()
167 167
168 def p_composite_regex(self, p): 168 def p_composite_regex(self, p):
169 '''composite_regex : regex_parts OR regex_parts 169 '''composite_regex : regex_parts OR regex_parts
170 | regex_parts''' 170 | regex_parts'''
171 if len(p) == 2: 171 if len(p) == 2:
172 p[0] = p[1] 172 p[0] = p[1]
173 else: 173 else:
174 p[0] = NfaBuilder.or_graphs([p[1], p[3]]) 174 p[0] = NfaBuilder.or_terms([p[1], p[3]])
175 175
176 def p_regex_parts(self, p): 176 def p_regex_parts(self, p):
177 '''regex_parts : regex_part 177 '''regex_parts : regex_part
178 | regex_part regex_parts''' 178 | regex_part regex_parts'''
179 p[0] = NfaBuilder.cat_graphs(p[1:]) 179 p[0] = NfaBuilder.cat_terms(p[1:])
180 180
181 def p_regex_part(self, p): 181 def p_regex_part(self, p):
182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier 182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
183 | regex_string_literal modifier 183 | regex_string_literal modifier
184 | regex_class modifier 184 | regex_class modifier
185 | regex modifier 185 | regex modifier
186 | regex_alias modifier''' 186 | regex_alias modifier'''
187 modifier = p[len(p)-1] 187 modifier = p[len(p)-1]
188 graph = p[2] if len(p) == 5 else p[1] 188 term = p[2] if len(p) == 5 else p[1]
189 if modifier: 189 if modifier:
190 p[0] = NfaBuilder.apply_modifier(modifier, graph) 190 p[0] = NfaBuilder.apply_modifier(modifier, term)
191 else: 191 else:
192 p[0] = graph 192 p[0] = term
193 193
194 def p_regex_string_literal(self, p): 194 def p_regex_string_literal(self, p):
195 'regex_string_literal : STRING' 195 'regex_string_literal : STRING'
196 string = p[1][1:-1] 196 string = p[1][1:-1]
197 escape_char = lambda string, char: string.replace(char, "\\" + char) 197 escape_char = lambda string, char: string.replace(char, "\\" + char)
198 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") 198 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"")
199 p[0] = RegexParser.parse(string) 199 p[0] = RegexParser.parse(string)
200
201 def p_regex(self, p): 200 def p_regex(self, p):
202 'regex : REGEX' 201 'regex : REGEX'
203 string = p[1][1:-1].replace("\\/", "/") 202 string = p[1][1:-1].replace("\\/", "/")
204 p[0] = RegexParser.parse(string) 203 p[0] = RegexParser.parse(string)
205 204
206 def p_regex_class(self, p): 205 def p_regex_class(self, p):
207 'regex_class : CHARACTER_CLASS_REGEX' 206 'regex_class : CHARACTER_CLASS_REGEX'
208 p[0] = RegexParser.parse(p[1]) 207 p[0] = RegexParser.parse(p[1])
209 208
210 def p_regex_alias(self, p): 209 def p_regex_alias(self, p):
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 RuleParser.__static_instance = None 243 RuleParser.__static_instance = None
245 raise 244 raise
246 parser.__state = None 245 parser.__state = None
247 assert parser_state.transitions <= set(parser_state.rules.keys()) 246 assert parser_state.transitions <= set(parser_state.rules.keys())
248 247
249 class RuleProcessor(object): 248 class RuleProcessor(object):
250 249
251 def __init__(self, parser_state): 250 def __init__(self, parser_state):
252 self.__automata = {} 251 self.__automata = {}
253 self.__rule_trees = {} 252 self.__rule_trees = {}
253 self.__default_action = None
254 self.__process_parser_state(parser_state) 254 self.__process_parser_state(parser_state)
255 255
256 @staticmethod 256 @staticmethod
257 def parse(string, encoding_name): 257 def parse(string, encoding_name):
258 parser_state = RuleParserState(KeyEncoding.get(encoding_name)) 258 parser_state = RuleParserState(KeyEncoding.get(encoding_name))
259 RuleParser.parse(string, parser_state) 259 RuleParser.parse(string, parser_state)
260 return RuleProcessor(parser_state) 260 return RuleProcessor(parser_state)
261 261
262 def automata_iter(self): 262 def automata_iter(self):
263 return iter(self.__automata.items()) 263 return iter(self.__automata.items())
264 264
265 def default_automata(self): 265 def default_automata(self):
266 return self.__automata['default'] 266 return self.__automata['default']
267 267
268 def rule_tree_dots(self): 268 def default_action(self):
269 result = {} 269 return self.__default_action
270 for rule in self.__rule_trees:
271 result[rule] = NfaBuilder.rule_tree_dot(self.__rule_trees[rule])
272 return result
273 270
274 class Automata(object): 271 class Automata(object):
275 272
276 def __init__(self, builder, graph): 273 def __init__(self, encoding, character_classes, rule_term):
277 self.__builder = builder 274 self.__encoding = encoding
278 self.__graph = graph 275 self.__character_classes = character_classes
276 self.__rule_term = rule_term
279 self.__nfa = None 277 self.__nfa = None
280 self.__dfa = None 278 self.__dfa = None
281 self.__minimial_dfa = None 279 self.__minimial_dfa = None
282 280
281 def rule_term(self):
282 return self.__rule_term
283
283 def nfa(self): 284 def nfa(self):
284 if not self.__nfa: 285 if not self.__nfa:
285 self.__nfa = self.__builder.nfa(self.__graph) 286 self.__nfa = NfaBuilder.nfa(
287 self.__encoding, self.__character_classes, self.__rule_term)
286 return self.__nfa 288 return self.__nfa
287 289
288 def dfa(self): 290 def dfa(self):
289 if not self.__dfa: 291 if not self.__dfa:
290 (start, dfa_nodes) = self.nfa().compute_dfa() 292 (start, dfa_nodes) = self.nfa().compute_dfa()
291 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes) 293 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes)
292 return self.__dfa 294 return self.__dfa
293 295
294 def optimize_dfa(self, log = False): 296 def optimize_dfa(self, log = False):
295 assert not self.__dfa 297 assert not self.__dfa
296 self.__dfa = DfaOptimizer.optimize(self.dfa(), log) 298 self.__dfa = DfaOptimizer.optimize(self.dfa(), log)
297 299
298 def minimal_dfa(self): 300 def minimal_dfa(self):
299 if not self.__minimial_dfa: 301 if not self.__minimial_dfa:
300 self.__minimial_dfa = self.dfa().minimize() 302 self.__minimial_dfa = self.dfa().minimize()
301 return self.__minimial_dfa 303 return self.__minimial_dfa
302 304
303 def __process_parser_state(self, parser_state): 305 def __process_parser_state(self, parser_state):
304 rule_map = {} 306 rule_map = {}
305 builder = NfaBuilder(parser_state.encoding, parser_state.character_classes)
306 assert 'default' in parser_state.rules 307 assert 'default' in parser_state.rules
307 def process(subgraph, v): 308 def process(subgraph, v):
308 graphs = [] 309 graphs = []
309 for graph, precedence, action in v['regex']: 310 for graph, precedence, action in v['regex']:
310 (entry_action, match_action, transition) = action 311 (entry_action, match_action, transition) = action
311 if entry_action or match_action: 312 if entry_action or match_action:
312 action = Action(entry_action, match_action, precedence) 313 graph = NfaBuilder.add_action(
313 graph = NfaBuilder.add_action(graph, action) 314 graph, Action(entry_action, match_action, precedence))
314 if not transition: 315 if not transition:
315 pass 316 pass
316 elif transition == 'continue': 317 elif transition == 'continue':
317 assert not subgraph == 'default', 'unimplemented' 318 assert not subgraph == 'default', 'unimplemented'
318 graph = NfaBuilder.add_continue(graph) 319 graph = NfaBuilder.add_continue(graph)
319 else: 320 else:
320 assert subgraph == 'default', 'unimplemented' 321 assert subgraph == 'default', 'unimplemented'
321 graph = NfaBuilder.join_subgraph( 322 graph = NfaBuilder.join_subgraph(
322 graph, transition, rule_map[transition]) 323 graph, transition, rule_map[transition])
323 graphs.append(graph) 324 graphs.append(graph)
324 graph = NfaBuilder.or_graphs(graphs) 325 graph = NfaBuilder.or_terms(graphs)
325 rule_map[subgraph] = graph 326 rule_map[subgraph] = graph
326 # process first the subgraphs, then the default graph 327 # process first the subgraphs, then the default graph
327 for k, v in parser_state.rules.items(): 328 for k, v in parser_state.rules.items():
328 if k == 'default': continue 329 if k == 'default': continue
329 process(k, v) 330 process(k, v)
330 process('default', parser_state.rules['default']) 331 process('default', parser_state.rules['default'])
331 # build the automata 332 # build the automata
332 for rule_name, graph in rule_map.items(): 333 for rule_name, graph in rule_map.items():
333 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph) 334 self.__automata[rule_name] = RuleProcessor.Automata(
335 parser_state.encoding, parser_state.character_classes, graph)
334 self.__rule_trees[rule_name] = graph 336 self.__rule_trees[rule_name] = graph
335 # process default_action 337 # process default_action
336 default_action = parser_state.rules['default']['default_action'] 338 default_action = parser_state.rules['default']['default_action']
337 self.default_action = Action(Term.empty_term(), default_action) 339 self.__default_action = Action(Term.empty_term(), default_action)
OLDNEW
« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | tools/lexer_generator/term_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698