Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(166)

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 50873003: Experimental Parser: add lexer generator (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are
4 # met:
5 #
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 from types import TupleType
29 from inspect import getmembers
30
31
32 class NfaState:
33
34 __epsilon_key = "epsilon"
35 __any_key = "any"
36
37 @staticmethod
38 def epsilon_key():
39 return NfaState.__epsilon_key
40
41 @staticmethod
42 def any_key():
43 return NfaState.__any_key
44
45 def __init__(self, node_number):
46 self.__transitions = {}
47 self.__unclosed = set()
48 self.__node_number = node_number
49 self.__epsilon_closure = None
50
51 def node_number(self):
52 return self.__node_number
53
54 def epsilon_closure(self):
55 return self.__epsilon_closure
56
57 def set_epsilon_closure(self, closure):
58 assert self.is_closed()
59 assert self.__epsilon_closure == None
60 self.__epsilon_closure = frozenset(closure)
61
62 def transitions(self):
63 assert self.is_closed()
64 return self.__transitions
65
66 def get_transition_set(self, key):
67 if not self.__transitions.has_key(key): return frozenset()
68 return frozenset(self.__transitions[key])
69
70 def __add_transition(self, key, value):
71 if value == None:
72 assert not self.is_closed(), "already closed"
73 self.__unclosed.add(key)
74 return
75 if not self.__transitions.has_key(key):
76 self.__transitions[key] = set()
77 self.__transitions[key].add(value)
78
79 def add_character_transition(self, char):
80 self.__add_transition(char, None)
81
82 def add_any_transition(self):
83 self.__add_transition(self.__any_key, None)
84
85 def add_class_transition(self, class_data, inverted):
86 self.__add_transition('class_data', None)
87
88 def add_epsilon_transition(self, state):
89 assert state != None
90 self.__add_transition(self.__epsilon_key, state)
91
92 def is_closed(self):
93 return self.__unclosed == None
94
95 def close(self, end):
96 assert not self.is_closed()
97 if end == None:
98 assert not self.__unclosed
99 else:
100 for key in self.__unclosed:
101 self.__add_transition(key, end)
102 if not self.__unclosed:
103 self.add_epsilon_transition(end)
104 self.__unclosed = None
105
106 def matches(self, char):
107 matches = self.get_transition_set(char)
108 matches = matches.union(self.get_transition_set(self.__any_key))
109 # TODO class matches
110 return matches
111
112 @staticmethod
113 def gather_transition_keys(state_set):
114 keys = set()
115 for state in state_set:
116 for key, values in state.transitions().items():
117 if key == NfaState.__epsilon_key: continue
118 keys.add(key)
119 return keys
120
121 class NfaBuilder:
122
123 def __init__(self):
124 self.__operation_map = {}
125 self.__members = getmembers(self)
126
127 def __new_state(self):
128 node_number = self.node_number
129 self.node_number = self.node_number + 1
130 return NfaState(node_number)
131
132 def __or(self, graph):
133 start = self.__new_state()
134 ends = []
135 for x in [self.__process(graph[1]), self.__process(graph[2])]:
136 start.add_epsilon_transition(x[0])
137 ends += x[1]
138 start.close(None)
139 return (start, ends)
140
141 def __one_or_more(self, graph):
142 (start, ends) = self.__process(graph[1])
143 end = self.__new_state()
144 end.add_epsilon_transition(start)
145 self.__patch_ends(ends, end)
146 return (start, [end])
147
148 def __zero_or_more(self, graph):
149 (node, ends) = self.__process(graph[1])
150 start = self.__new_state()
151 start.add_epsilon_transition(node)
152 self.__patch_ends(ends, start)
153 return (start, [start])
154
155 def __zero_or_one(self, graph):
156 (node, ends) = self.__process(graph[1])
157 start = self.__new_state()
158 start.add_epsilon_transition(node)
159 return (start, ends + [start])
160
161 def __cat(self, graph):
162 (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
163 self.__patch_ends(left[1], right[0])
164 return (left[0], right[1])
165
166 def __literal(self, graph):
167 contents = graph[1]
168 state = self.__new_state()
169 state.add_character_transition(contents)
170 return (state, [state])
171
172 def __class(self, graph):
173 state = self.__new_state()
174 state.add_class_transition(graph[1], False)
175 return (state, [state])
176
177 def __not_class(self, graph):
178 state = self.__new_state()
179 state.add_class_transition(graph[1], True)
180 return (state, [state])
181
182 def __any(self, graph):
183 state = self.__new_state()
184 state.add_any_transition()
185 return (state, [state])
186
187 def __process(self, graph):
188 assert type(graph) == TupleType
189 method = "_NfaBuilder__" + graph[0].lower()
190 if (not self.__operation_map.has_key(method)):
191 matches = [func for (name, func) in self.__members if name == method]
192 assert len(matches) == 1
193 self.__operation_map[method] = matches[0]
194 return self.__operation_map[method](graph)
195
196 def __patch_ends(self, ends, new_end):
197 for end in ends:
198 end.close(new_end)
199
200 def nfa(self, graph):
201 self.node_number = 0
202 (start, ends) = self.__process(graph)
203 end = self.__new_state()
204 self.__patch_ends(ends, end)
205 end.close(None)
206 return Nfa(start, end, self.node_number)
207
208
209 class Nfa:
210
211 def __init__(self, start, end, nodes_created):
212 self.__start = start
213 self.__end = end
214 self.__epsilon_closure_computed = False
215 self.__verify(nodes_created)
216
217 @staticmethod
218 def __visit_edges(start, include_start, just_epsilon, function, state):
219
220 def compute_next_edge(node, next_edge):
221 if just_epsilon:
222 return next_edge.union(node.get_transition_set(node.epsilon_key()))
223 else:
224 for key, values in node.transitions().items():
225 next_edge = next_edge.union(values)
226 return next_edge
227
228 edge = set()
229 if include_start:
230 edge.add(start)
231 else:
232 edge = compute_next_edge(start, edge)
233
234 visited = set()
235 while edge:
236 next_edge = set()
237 for node in edge:
238 next_edge = compute_next_edge(node, next_edge)
239 state = function(node, state)
240 visited = visited.union(edge)
241 edge = next_edge.difference(visited)
242 return state
243
244 def __visit_all_edges(self, function, state):
245 return Nfa.__visit_edges(self.__start, True, False, function, state)
246
247 def __verify(self, nodes_created):
248 def f(node, node_list):
249 assert node.is_closed()
250 node_list.append(node)
251 return node_list
252 node_list = self.__visit_all_edges(f, [])
253 assert len(node_list) == nodes_created
254
255 def __compute_epsilon_closures(self):
256 if self.__epsilon_closure_computed:
257 return
258 self.__epsilon_closure_computed = True
259 def outer(node, state):
260 def inner(node, closure):
261 closure.add(node)
262 return closure
263 closure = Nfa.__visit_edges(node, False, True, inner, set())
264 node.set_epsilon_closure(closure)
265 self.__visit_all_edges(outer, None)
266
267 @staticmethod
268 def __close(states):
269 for node in states:
270 states = states.union(node.epsilon_closure())
271 return states
272
273 def matches(self, string):
274 self.__compute_epsilon_closures()
275 valid_states = Nfa.__close(set([self.__start]))
276 for c in string:
277 next_valid_states = set()
278 for valid_state in valid_states:
279 next_valid_states = next_valid_states.union(valid_state.matches(c))
280 valid_states = Nfa.__close(next_valid_states)
281 if not valid_states:
282 return False
283 return self.__end in valid_states
284
285 @staticmethod
286 def __to_dfa(nfa_state_set, dfa_builder):
287 nfa_state_set = Nfa.__close(nfa_state_set)
288 name = str([x.node_number() for x in nfa_state_set])
289 (dfa_nodes, end_nodes, end_node) = dfa_builder
290 if dfa_nodes.has_key(name):
291 return name
292 dfa_node = {}
293 dfa_nodes[name] = dfa_node
294 for key in NfaState.gather_transition_keys(nfa_state_set):
295 reachable_set = set()
296 for nfa_state in nfa_state_set:
297 reachable_set = reachable_set | nfa_state.matches(key)
298 dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
299 if end_node in nfa_state_set:
300 end_nodes.add(name)
301 return name
302
303 def compute_dfa(self):
304 self.__compute_epsilon_closures()
305 dfa_nodes = {}
306 end_nodes = set()
307 dfa_builder = (dfa_nodes, end_nodes, self.__end)
308 start_name = self.__to_dfa(set([self.__start]), dfa_builder)
309 return (start_name, dfa_nodes, end_nodes)
310
311 def to_dot(self):
312
313 def f(node, node_content):
314 for key, values in node.transitions().items():
315 if key == node.epsilon_key(): key = "ε"
316 if key != node.epsilon_key() and len(key) == 1:
317 key = "'%s'" % key
318 for value in values:
319 node_content.append(
320 " S_%d -> S_%d [ label = \"%s\" ];" %
321 (node.node_number(), value.node_number(), key))
322 return node_content
323
324 node_content = self.__visit_all_edges(f, [])
325
326 return '''
327 digraph finite_state_machine {
328 rankdir=LR;
329 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
330 node [shape = doublecircle, style=unfilled]; S_%s
331 node [shape = circle];
332 %s
333 }
334 ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_ content))
335
OLDNEW
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698