Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: tools/lexer_generator/nfa_builder.py

Issue 61893023: Experimental parser: split out NfaBuilder (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from types import TupleType 28 from types import TupleType
29 from transition_keys import TransitionKey
30 from automaton import *
31 from inspect import getmembers 29 from inspect import getmembers
30 from nfa import *
32 31
33 class NfaState(AutomatonState): 32 class NfaBuilder(object):
34
35 def __init__(self, node_number):
36 super(NfaState, self).__init__(node_number)
37 self.__transitions = {}
38 self.__unclosed = set()
39 self.__epsilon_closure = None
40 self.__action = None
41
42 def epsilon_closure(self):
43 return self.__epsilon_closure
44
45 def set_epsilon_closure(self, closure):
46 assert self.is_closed()
47 assert self.__epsilon_closure == None
48 self.__epsilon_closure = frozenset(closure)
49
50 def action(self):
51 assert self.is_closed()
52 return self.__action
53
54 def set_action(self, action):
55 assert not self.is_closed()
56 assert not self.__action
57 self.__action = action
58
59 def transitions(self):
60 assert self.is_closed()
61 return self.__transitions
62
63 def next_states(self, key_filter):
64 assert self.is_closed()
65 f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(v)
66 return reduce(f, self.__transitions.items(), set())
67
68 def __add_transition(self, key, next_state):
69 if next_state == None:
70 assert not self.is_closed(), "already closed"
71 self.__unclosed.add(key)
72 return
73 if not key in self.__transitions:
74 self.__transitions[key] = set()
75 self.__transitions[key].add(next_state)
76
77 def add_unclosed_transition(self, key):
78 assert key != TransitionKey.epsilon()
79 self.__add_transition(key, None)
80
81 def add_epsilon_transition(self, state):
82 assert state != None
83 self.__add_transition(TransitionKey.epsilon(), state)
84
85 def is_closed(self):
86 return self.__unclosed == None
87
88 def close(self, end):
89 assert not self.is_closed()
90 unclosed, self.__unclosed = self.__unclosed, None
91 if end == None:
92 assert not unclosed
93 return
94 for key in unclosed:
95 self.__add_transition(key, end)
96 if not unclosed:
97 self.__add_transition(TransitionKey.epsilon(), end)
98
99 def __matches(self, match_func, value):
100 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
101 return reduce(f, self.__transitions.items(), set())
102
103 def char_matches(self, value):
104 return self.__matches(lambda k, v : k.matches_char(v), value)
105
106 def key_matches(self, value):
107 return self.__matches(lambda k, v : k.matches_key(v), value)
108
109 def replace_catch_all(self):
110 catch_all = TransitionKey.unique('catch_all')
111 if not catch_all in self.__transitions:
112 return
113 f = lambda acc, state: acc | state.__epsilon_closure
114 reachable_states = reduce(f, self.__transitions[catch_all], set())
115 f = lambda acc, state: acc | set(state.__transitions.keys())
116 keys = reduce(f, reachable_states, set())
117 keys.discard(TransitionKey.epsilon())
118 keys.discard(catch_all)
119 inverse_key = TransitionKey.inverse_key(keys)
120 if inverse_key:
121 self.__transitions[inverse_key] = self.__transitions[catch_all]
122 del self.__transitions[catch_all]
123
124 @staticmethod
125 def gather_transition_keys(state_set):
126 f = lambda acc, state: acc | set(state.__transitions.keys())
127 keys = reduce(f, state_set, set())
128 keys.discard(TransitionKey.epsilon())
129 return TransitionKey.disjoint_keys(keys)
130
131 class NfaBuilder:
132 33
133 def __init__(self): 34 def __init__(self):
134 self.__node_number = 0 35 self.__node_number = 0
135 self.__operation_map = {} 36 self.__operation_map = {}
136 self.__members = getmembers(self) 37 self.__members = getmembers(self)
137 self.__character_classes = {} 38 self.__character_classes = {}
138 self.__states = [] 39 self.__states = []
139 40
140 def set_character_classes(self, classes): 41 def set_character_classes(self, classes):
141 self.__character_classes = classes 42 self.__character_classes = classes
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
341 242
342 __modifer_map = { 243 __modifer_map = {
343 '+': 'ONE_OR_MORE', 244 '+': 'ONE_OR_MORE',
344 '?': 'ZERO_OR_ONE', 245 '?': 'ZERO_OR_ONE',
345 '*': 'ZERO_OR_MORE', 246 '*': 'ZERO_OR_MORE',
346 } 247 }
347 248
348 @staticmethod 249 @staticmethod
349 def apply_modifier(modifier, graph): 250 def apply_modifier(modifier, graph):
350 return (NfaBuilder.__modifer_map[modifier], graph) 251 return (NfaBuilder.__modifer_map[modifier], graph)
351
352 class Nfa(Automaton):
353
354 def __init__(self, start, end, nodes_created):
355 super(Nfa, self).__init__()
356 self.__start = start
357 self.__end = end
358 self.__epsilon_closure_computed = False
359 self.__verify(nodes_created)
360
361 def __visit_all_edges(self, visitor, state):
362 edge = set([self.__start])
363 next_edge = lambda node: node.next_states(lambda x : True)
364 return self.visit_edges(edge, next_edge, visitor, state)
365
366 def __verify(self, nodes_created):
367 def f(node, node_list):
368 assert node.is_closed()
369 node_list.append(node)
370 return node_list
371 node_list = self.__visit_all_edges(f, [])
372 assert len(node_list) == nodes_created
373
374 def __compute_epsilon_closures(self):
375 if self.__epsilon_closure_computed:
376 return
377 self.__epsilon_closure_computed = True
378 def outer(node, state):
379 def inner(node, closure):
380 closure.add(node)
381 return closure
382 is_epsilon = lambda k: k == TransitionKey.epsilon()
383 next_edge = lambda node : node.next_states(is_epsilon)
384 edge = next_edge(node)
385 closure = self.visit_edges(edge, next_edge, inner, set())
386 node.set_epsilon_closure(closure)
387 self.__visit_all_edges(outer, None)
388
389 @staticmethod
390 def __close(states):
391 f = lambda acc, node: acc | node.epsilon_closure()
392 return reduce(f, states, set(states))
393
394 def matches(self, string):
395 self.__compute_epsilon_closures()
396 valid_states = Nfa.__close(set([self.__start]))
397 for c in string:
398 f = lambda acc, state: acc | state.char_matches(c)
399 transitions = reduce(f, valid_states, set())
400 if not transitions:
401 return False
402 valid_states = Nfa.__close(transitions)
403 return self.__end in valid_states
404
405 @staticmethod
406 def __to_dfa(nfa_state_set, dfa_nodes, end_node):
407 nfa_state_set = Nfa.__close(nfa_state_set)
408 assert nfa_state_set
409 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
410 if name in dfa_nodes:
411 return name
412 def gather_actions(states):
413 actions = set()
414 for state in states:
415 if state.action():
416 actions.add(state.action())
417 actions = list(actions)
418 actions.sort()
419 return actions
420 dfa_nodes[name] = {
421 'transitions': {},
422 'terminal': end_node in nfa_state_set,
423 'actions' : gather_actions(nfa_state_set)}
424 for key in NfaState.gather_transition_keys(nfa_state_set):
425 match_states = set()
426 f = lambda acc, state: acc | state.key_matches(key)
427 for state in reduce(f, nfa_state_set, set()):
428 match_states.add(state)
429 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
430 dfa_nodes[name]['transitions'][key] = transition_state
431 return name
432
433 def compute_dfa(self):
434 self.__compute_epsilon_closures()
435 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None)
436 dfa_nodes = {}
437 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
438 return (start_name, dfa_nodes)
439
440 def to_dot(self):
441 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)
442 state_iterator = lambda x : x
443 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)
OLDNEW
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698