Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 51043003: Experimental Parser: move key functions to TransitionKey class (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from types import TupleType 28 from types import TupleType
29 from transition_keys import TransitionKey
29 from inspect import getmembers 30 from inspect import getmembers
30 31
31
32 class NfaState: 32 class NfaState:
33 33
34 __epsilon_key = "epsilon"
35 __any_key = "any"
36
37 @staticmethod
38 def epsilon_key():
39 return NfaState.__epsilon_key
40
41 @staticmethod
42 def any_key():
43 return NfaState.__any_key
44
45 def __init__(self, node_number): 34 def __init__(self, node_number):
46 self.__transitions = {} 35 self.__transitions = {}
47 self.__unclosed = set() 36 self.__unclosed = set()
48 self.__node_number = node_number 37 self.__node_number = node_number
49 self.__epsilon_closure = None 38 self.__epsilon_closure = None
50 39
51 def node_number(self): 40 def node_number(self):
52 return self.__node_number 41 return self.__node_number
53 42
54 def epsilon_closure(self): 43 def epsilon_closure(self):
55 return self.__epsilon_closure 44 return self.__epsilon_closure
56 45
57 def set_epsilon_closure(self, closure): 46 def set_epsilon_closure(self, closure):
58 assert self.is_closed() 47 assert self.is_closed()
59 assert self.__epsilon_closure == None 48 assert self.__epsilon_closure == None
60 self.__epsilon_closure = frozenset(closure) 49 self.__epsilon_closure = frozenset(closure)
61 50
62 def transitions(self): 51 def transitions(self):
63 assert self.is_closed() 52 assert self.is_closed()
64 return self.__transitions 53 return self.__transitions
65 54
66 def get_transition_set(self, key): 55 def get_epsilon_transitions(self):
56 key = TransitionKey.epsilon();
67 if not self.__transitions.has_key(key): return frozenset() 57 if not self.__transitions.has_key(key): return frozenset()
68 return frozenset(self.__transitions[key]) 58 return frozenset(self.__transitions[key])
69 59
70 def __add_transition(self, key, value): 60 def __add_transition(self, key, value):
71 if value == None: 61 if value == None:
72 assert not self.is_closed(), "already closed" 62 assert not self.is_closed(), "already closed"
73 self.__unclosed.add(key) 63 self.__unclosed.add(key)
74 return 64 return
75 if not self.__transitions.has_key(key): 65 if not self.__transitions.has_key(key):
76 self.__transitions[key] = set() 66 self.__transitions[key] = set()
77 self.__transitions[key].add(value) 67 self.__transitions[key].add(value)
78 68
79 def add_character_transition(self, char): 69 def add_unclosed_transition(self, key):
80 self.__add_transition(char, None) 70 assert key != TransitionKey.epsilon()
81 71 self.__add_transition(key, None)
82 def add_any_transition(self):
83 self.__add_transition(self.__any_key, None)
84
85 def add_class_transition(self, class_data, inverted):
86 self.__add_transition('class_data', None)
87 72
88 def add_epsilon_transition(self, state): 73 def add_epsilon_transition(self, state):
89 assert state != None 74 assert state != None
90 self.__add_transition(self.__epsilon_key, state) 75 self.__add_transition(TransitionKey.epsilon(), state)
91 76
92 def is_closed(self): 77 def is_closed(self):
93 return self.__unclosed == None 78 return self.__unclosed == None
94 79
95 def close(self, end): 80 def close(self, end):
96 assert not self.is_closed() 81 assert not self.is_closed()
97 if end == None: 82 if end == None:
98 assert not self.__unclosed 83 assert not self.__unclosed
99 else: 84 else:
100 for key in self.__unclosed: 85 for key in self.__unclosed:
101 self.__add_transition(key, end) 86 self.__add_transition(key, end)
102 if not self.__unclosed: 87 if not self.__unclosed:
103 self.add_epsilon_transition(end) 88 self.add_epsilon_transition(end)
104 self.__unclosed = None 89 self.__unclosed = None
105 90
106 def matches(self, char): 91 def __matches(self, match_func, value):
107 matches = self.get_transition_set(char) 92 matches = set()
108 matches = matches.union(self.get_transition_set(self.__any_key)) 93 for key, values in self.__transitions.items():
109 # TODO class matches 94 if match_func(key, value):
95 matches |= values
110 return matches 96 return matches
111 97
98 def char_matches(self, value):
99 return self.__matches((lambda k, v : k.matches_char(v)), value)
100
101 def key_matches(self, value):
102 return self.__matches((lambda k, v : k.matches_key(v)), value)
103
112 @staticmethod 104 @staticmethod
113 def gather_transition_keys(state_set): 105 def gather_transition_keys(state_set):
114 keys = set() 106 keys = set()
115 for state in state_set: 107 for state in state_set:
116 for key, values in state.transitions().items(): 108 keys |= set(state.__transitions.keys())
117 if key == NfaState.__epsilon_key: continue 109 return TransitionKey.merge_key_set(keys)
118 keys.add(key)
119 return keys
120 110
121 class NfaBuilder: 111 class NfaBuilder:
122 112
123 def __init__(self): 113 def __init__(self):
124 self.__operation_map = {} 114 self.__operation_map = {}
125 self.__members = getmembers(self) 115 self.__members = getmembers(self)
126 116
127 def __new_state(self): 117 def __new_state(self):
128 node_number = self.node_number 118 node_number = self.node_number
129 self.node_number = self.node_number + 1 119 self.node_number = self.node_number + 1
(...skipping 26 matching lines...) Expand all
156 (node, ends) = self.__process(graph[1]) 146 (node, ends) = self.__process(graph[1])
157 start = self.__new_state() 147 start = self.__new_state()
158 start.add_epsilon_transition(node) 148 start.add_epsilon_transition(node)
159 return (start, ends + [start]) 149 return (start, ends + [start])
160 150
161 def __cat(self, graph): 151 def __cat(self, graph):
162 (left, right) = (self.__process(graph[1]), self.__process(graph[2])) 152 (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
163 self.__patch_ends(left[1], right[0]) 153 self.__patch_ends(left[1], right[0])
164 return (left[0], right[1]) 154 return (left[0], right[1])
165 155
166 def __literal(self, graph): 156 def __key_state(self, key):
167 contents = graph[1]
168 state = self.__new_state() 157 state = self.__new_state()
169 state.add_character_transition(contents) 158 state.add_unclosed_transition(key)
170 return (state, [state]) 159 return (state, [state])
171 160
161 def __literal(self, graph):
162 return self.__key_state(TransitionKey.single_char(graph[1]))
163
172 def __class(self, graph): 164 def __class(self, graph):
173 state = self.__new_state() 165 return self.__key_state(TransitionKey.character_class(False, graph[1]))
174 state.add_class_transition(graph[1], False)
175 return (state, [state])
176 166
177 def __not_class(self, graph): 167 def __not_class(self, graph):
178 state = self.__new_state() 168 return self.__key_state(TransitionKey.character_class(True, graph[1]))
179 state.add_class_transition(graph[1], True)
180 return (state, [state])
181 169
182 def __any(self, graph): 170 def __any(self, graph):
183 state = self.__new_state() 171 return self.__key_state(TransitionKey.any())
184 state.add_any_transition()
185 return (state, [state])
186 172
187 def __process(self, graph): 173 def __process(self, graph):
188 assert type(graph) == TupleType 174 assert type(graph) == TupleType
189 method = "_NfaBuilder__" + graph[0].lower() 175 method = "_NfaBuilder__" + graph[0].lower()
190 if (not self.__operation_map.has_key(method)): 176 if (not self.__operation_map.has_key(method)):
191 matches = [func for (name, func) in self.__members if name == method] 177 matches = [func for (name, func) in self.__members if name == method]
192 assert len(matches) == 1 178 assert len(matches) == 1
193 self.__operation_map[method] = matches[0] 179 self.__operation_map[method] = matches[0]
194 return self.__operation_map[method](graph) 180 return self.__operation_map[method](graph)
195 181
196 def __patch_ends(self, ends, new_end): 182 def __patch_ends(self, ends, new_end):
197 for end in ends: 183 for end in ends:
198 end.close(new_end) 184 end.close(new_end)
199 185
200 def nfa(self, graph): 186 def nfa(self, graph):
201 self.node_number = 0 187 self.node_number = 0
202 (start, ends) = self.__process(graph) 188 (start, ends) = self.__process(graph)
203 end = self.__new_state() 189 end = self.__new_state()
204 self.__patch_ends(ends, end) 190 self.__patch_ends(ends, end)
205 end.close(None) 191 end.close(None)
206 return Nfa(start, end, self.node_number) 192 return Nfa(start, end, self.node_number)
207 193
208
209 class Nfa: 194 class Nfa:
210 195
211 def __init__(self, start, end, nodes_created): 196 def __init__(self, start, end, nodes_created):
212 self.__start = start 197 self.__start = start
213 self.__end = end 198 self.__end = end
214 self.__epsilon_closure_computed = False 199 self.__epsilon_closure_computed = False
215 self.__verify(nodes_created) 200 self.__verify(nodes_created)
216 201
217 @staticmethod 202 @staticmethod
218 def __visit_edges(start, include_start, just_epsilon, function, state): 203 def __visit_edges(edge, compute_next_edge, visitor, state):
219
220 def compute_next_edge(node, next_edge):
221 if just_epsilon:
222 return next_edge.union(node.get_transition_set(node.epsilon_key()))
223 else:
224 for key, values in node.transitions().items():
225 next_edge = next_edge.union(values)
226 return next_edge
227
228 edge = set()
229 if include_start:
230 edge.add(start)
231 else:
232 edge = compute_next_edge(start, edge)
233
234 visited = set() 204 visited = set()
235 while edge: 205 while edge:
236 next_edge = set() 206 next_edge = set()
237 for node in edge: 207 for node in edge:
238 next_edge = compute_next_edge(node, next_edge) 208 next_edge |= compute_next_edge(node)
239 state = function(node, state) 209 state = visitor(node, state)
240 visited = visited.union(edge) 210 visited |= edge
241 edge = next_edge.difference(visited) 211 edge = next_edge - visited
242 return state 212 return state
243 213
244 def __visit_all_edges(self, function, state): 214 def __visit_all_edges(self, function, state):
245 return Nfa.__visit_edges(self.__start, True, False, function, state) 215 edge = set([self.__start])
216 def next_edge(node):
217 next = set()
218 for values in node.transitions().values():
219 next |= values
220 return next
221 return Nfa.__visit_edges(edge, next_edge, function, state)
246 222
247 def __verify(self, nodes_created): 223 def __verify(self, nodes_created):
248 def f(node, node_list): 224 def f(node, node_list):
249 assert node.is_closed() 225 assert node.is_closed()
250 node_list.append(node) 226 node_list.append(node)
251 return node_list 227 return node_list
252 node_list = self.__visit_all_edges(f, []) 228 node_list = self.__visit_all_edges(f, [])
253 assert len(node_list) == nodes_created 229 assert len(node_list) == nodes_created
254 230
255 def __compute_epsilon_closures(self): 231 def __compute_epsilon_closures(self):
256 if self.__epsilon_closure_computed: 232 if self.__epsilon_closure_computed:
257 return 233 return
258 self.__epsilon_closure_computed = True 234 self.__epsilon_closure_computed = True
259 def outer(node, state): 235 def outer(node, state):
260 def inner(node, closure): 236 def inner(node, closure):
261 closure.add(node) 237 closure.add(node)
262 return closure 238 return closure
263 closure = Nfa.__visit_edges(node, False, True, inner, set()) 239 next_edge = lambda node : node.get_epsilon_transitions()
240 edge = next_edge(node)
241 closure = Nfa.__visit_edges(edge, next_edge, inner, set())
264 node.set_epsilon_closure(closure) 242 node.set_epsilon_closure(closure)
265 self.__visit_all_edges(outer, None) 243 self.__visit_all_edges(outer, None)
266 244
267 @staticmethod 245 @staticmethod
268 def __close(states): 246 def __close(states):
247 closure = set(states)
269 for node in states: 248 for node in states:
270 states = states.union(node.epsilon_closure()) 249 closure |= node.epsilon_closure()
271 return states 250 return closure
272 251
273 def matches(self, string): 252 def matches(self, string):
274 self.__compute_epsilon_closures() 253 self.__compute_epsilon_closures()
275 valid_states = Nfa.__close(set([self.__start])) 254 valid_states = Nfa.__close(set([self.__start]))
276 for c in string: 255 for c in string:
277 next_valid_states = set() 256 next_valid_states = set()
278 for valid_state in valid_states: 257 for valid_state in valid_states:
279 next_valid_states = next_valid_states.union(valid_state.matches(c)) 258 next_valid_states |= valid_state.char_matches(c)
280 valid_states = Nfa.__close(next_valid_states) 259 valid_states = Nfa.__close(next_valid_states)
281 if not valid_states: 260 if not valid_states:
282 return False 261 return False
283 return self.__end in valid_states 262 return self.__end in valid_states
284 263
285 @staticmethod 264 @staticmethod
286 def __to_dfa(nfa_state_set, dfa_builder): 265 def __to_dfa(nfa_state_set, dfa_builder):
287 nfa_state_set = Nfa.__close(nfa_state_set) 266 nfa_state_set = Nfa.__close(nfa_state_set)
288 name = str([x.node_number() for x in nfa_state_set]) 267 name = str([x.node_number() for x in nfa_state_set])
289 (dfa_nodes, end_nodes, end_node) = dfa_builder 268 (dfa_nodes, end_nodes, end_node) = dfa_builder
290 if dfa_nodes.has_key(name): 269 if dfa_nodes.has_key(name):
291 return name 270 return name
292 dfa_node = {} 271 dfa_node = {}
293 dfa_nodes[name] = dfa_node 272 dfa_nodes[name] = dfa_node
294 for key in NfaState.gather_transition_keys(nfa_state_set): 273 for key in NfaState.gather_transition_keys(nfa_state_set):
295 reachable_set = set() 274 reachable_set = set()
296 for nfa_state in nfa_state_set: 275 for nfa_state in nfa_state_set:
297 reachable_set = reachable_set | nfa_state.matches(key) 276 reachable_set |= nfa_state.key_matches(key)
298 dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder) 277 dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
299 if end_node in nfa_state_set: 278 if end_node in nfa_state_set:
300 end_nodes.add(name) 279 end_nodes.add(name)
301 return name 280 return name
302 281
303 def compute_dfa(self): 282 def compute_dfa(self):
304 self.__compute_epsilon_closures() 283 self.__compute_epsilon_closures()
305 dfa_nodes = {} 284 dfa_nodes = {}
306 end_nodes = set() 285 end_nodes = set()
307 dfa_builder = (dfa_nodes, end_nodes, self.__end) 286 dfa_builder = (dfa_nodes, end_nodes, self.__end)
308 start_name = self.__to_dfa(set([self.__start]), dfa_builder) 287 start_name = self.__to_dfa(set([self.__start]), dfa_builder)
309 return (start_name, dfa_nodes, end_nodes) 288 return (start_name, dfa_nodes, end_nodes)
310 289
311 def to_dot(self): 290 def to_dot(self):
312 291
313 def f(node, node_content): 292 def f(node, node_content):
314 for key, values in node.transitions().items(): 293 for key, values in node.transitions().items():
315 if key == node.epsilon_key(): key = "ε" 294 if key == TransitionKey.epsilon():
316 if key != node.epsilon_key() and len(key) == 1: 295 key = "ε"
317 key = "'%s'" % key
318 for value in values: 296 for value in values:
319 node_content.append( 297 node_content.append(
320 " S_%d -> S_%d [ label = \"%s\" ];" % 298 " S_%d -> S_%d [ label = \"%s\" ];" %
321 (node.node_number(), value.node_number(), key)) 299 (node.node_number(), value.node_number(), key))
322 return node_content 300 return node_content
323 301
324 node_content = self.__visit_all_edges(f, []) 302 node_content = self.__visit_all_edges(f, [])
325 303
326 return ''' 304 return '''
327 digraph finite_state_machine { 305 digraph finite_state_machine {
328 rankdir=LR; 306 rankdir=LR;
329 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s 307 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
330 node [shape = doublecircle, style=unfilled]; S_%s 308 node [shape = doublecircle, style=unfilled]; S_%s
331 node [shape = circle]; 309 node [shape = circle];
332 %s 310 %s
333 } 311 }
334 ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_ content)) 312 ''' % (self.__start.node_number(),
335 313 self.__end.node_number(),
314 "\n".join(node_content))
OLDNEW
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698