Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 61893023: Experimental parser: split out NfaBuilder (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/nfa_builder.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from types import TupleType
29 from transition_keys import TransitionKey 28 from transition_keys import TransitionKey
30 from automaton import * 29 from automaton import *
31 from inspect import getmembers
32 30
33 class NfaState(AutomatonState): 31 class NfaState(AutomatonState):
34 32
35 def __init__(self, node_number): 33 def __init__(self, node_number):
36 super(NfaState, self).__init__(node_number) 34 super(NfaState, self).__init__(node_number)
37 self.__transitions = {} 35 self.__transitions = {}
38 self.__unclosed = set() 36 self.__unclosed = set()
39 self.__epsilon_closure = None 37 self.__epsilon_closure = None
40 self.__action = None 38 self.__action = None
41 39
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 self.__transitions[inverse_key] = self.__transitions[catch_all] 119 self.__transitions[inverse_key] = self.__transitions[catch_all]
122 del self.__transitions[catch_all] 120 del self.__transitions[catch_all]
123 121
124 @staticmethod 122 @staticmethod
125 def gather_transition_keys(state_set): 123 def gather_transition_keys(state_set):
126 f = lambda acc, state: acc | set(state.__transitions.keys()) 124 f = lambda acc, state: acc | set(state.__transitions.keys())
127 keys = reduce(f, state_set, set()) 125 keys = reduce(f, state_set, set())
128 keys.discard(TransitionKey.epsilon()) 126 keys.discard(TransitionKey.epsilon())
129 return TransitionKey.disjoint_keys(keys) 127 return TransitionKey.disjoint_keys(keys)
130 128
131 class NfaBuilder:
132
133 def __init__(self):
134 self.__node_number = 0
135 self.__operation_map = {}
136 self.__members = getmembers(self)
137 self.__character_classes = {}
138 self.__states = []
139
140 def set_character_classes(self, classes):
141 self.__character_classes = classes
142
143 def __new_state(self):
144 self.__node_number += 1
145 return NfaState(self.__node_number - 1)
146
147 def __or(self, graph):
148 start = self.__new_state()
149 ends = []
150 for x in [self.__process(graph[1]), self.__process(graph[2])]:
151 start.add_epsilon_transition(x[0])
152 ends += x[1]
153 start.close(None)
154 return (start, ends)
155
156 def __one_or_more(self, graph):
157 (start, ends) = self.__process(graph[1])
158 end = self.__new_state()
159 end.add_epsilon_transition(start)
160 self.__patch_ends(ends, end)
161 return (start, [end])
162
163 def __zero_or_more(self, graph):
164 (node, ends) = self.__process(graph[1])
165 start = self.__new_state()
166 start.add_epsilon_transition(node)
167 self.__patch_ends(ends, start)
168 return (start, [start])
169
170 def __zero_or_one(self, graph):
171 (node, ends) = self.__process(graph[1])
172 start = self.__new_state()
173 start.add_epsilon_transition(node)
174 return (start, ends + [start])
175
176 def __repeat(self, graph):
177 param_min = int(graph[1])
178 param_max = int(graph[2])
179 subgraph = graph[3]
180 (start, ends) = self.__process(subgraph)
181 for i in xrange(1, param_min):
182 (start2, ends2) = self.__process(subgraph)
183 self.__patch_ends(ends, start2)
184 ends = ends2
185 if param_min == param_max:
186 return (start, ends)
187
188 midpoints = []
189 for i in xrange(param_min, param_max):
190 midpoint = self.__new_state()
191 self.__patch_ends(ends, midpoint)
192 (start2, ends) = self.__process(subgraph)
193 midpoint.add_epsilon_transition(start2)
194 midpoints.append(midpoint)
195
196 return (start, ends + midpoints)
197
198 def __cat(self, graph):
199 (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
200 self.__patch_ends(left[1], right[0])
201 return (left[0], right[1])
202
203 def __key_state(self, key):
204 state = self.__new_state()
205 state.add_unclosed_transition(key)
206 return (state, [state])
207
208 def __literal(self, graph):
209 return self.__key_state(TransitionKey.single_char(graph[1]))
210
211 def __class(self, graph):
212 return self.__key_state(
213 TransitionKey.character_class(graph, self.__character_classes))
214
215 def __not_class(self, graph):
216 return self.__key_state(
217 TransitionKey.character_class(graph, self.__character_classes))
218
219 def __any(self, graph):
220 return self.__key_state(TransitionKey.any())
221
222 def __epsilon(self, graph):
223 start = self.__new_state()
224 end = self.__new_state()
225 start.close(end)
226 return (start, [end])
227
228 def __action(self, graph):
229 (start, ends) = self.__process(graph[1])
230 action = graph[2]
231 end = self.__new_state()
232 self.__patch_ends(ends, end)
233 end.set_action(action)
234 return (start, [end])
235
236 def __continue(self, graph):
237 (start, ends) = self.__process(graph[1])
238 state = self.__peek_state()
239 if not state['start_node']:
240 state['start_node'] = self.__new_state()
241 self.__patch_ends(ends, state['start_node'])
242 return (start, [])
243
244 def __catch_all(self, graph):
245 return self.__key_state(TransitionKey.unique('catch_all'))
246
247 def __join(self, graph):
248 (graph, name, subgraph, modifier) = graph[1:]
249 subgraphs = self.__peek_state()['subgraphs']
250 if not name in subgraphs:
251 subgraphs[name] = self.__nfa(subgraph)
252 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
253 (start, ends) = self.__process(graph)
254 if modifier:
255 assert modifier == 'ZERO_OR_MORE'
256 for end in ends:
257 end.add_epsilon_transition(subgraph_end)
258 self.__patch_ends(ends, subgraph_start)
259 end = self.__new_state()
260 subgraph_end.add_epsilon_transition(end)
261 return (start, [end])
262
263 def __process(self, graph):
264 assert type(graph) == TupleType
265 method = "_NfaBuilder__" + graph[0].lower()
266 if not method in self.__operation_map:
267 matches = filter(lambda (name, func): name == method, self.__members)
268 assert len(matches) == 1
269 self.__operation_map[method] = matches[0][1]
270 return self.__operation_map[method](graph)
271
272 def __patch_ends(self, ends, new_end):
273 for end in ends:
274 end.close(new_end)
275
276 def __push_state(self):
277 self.__states.append({
278 'start_node' : None,
279 'subgraphs' : {},
280 'unpatched_ends' : [],
281 })
282
283 def __pop_state(self):
284 return self.__states.pop()
285
286 def __peek_state(self):
287 return self.__states[len(self.__states) - 1]
288
289 def __nfa(self, graph):
290 start_node_number = self.__node_number
291 self.__push_state()
292 (start, ends) = self.__process(graph)
293 state = self.__pop_state()
294 if state['start_node']:
295 state['start_node'].close(start)
296 start = state['start_node']
297 for k, subgraph in state['subgraphs'].items():
298 subgraph[1].close(None)
299 end = self.__new_state()
300 if self.__states:
301 self.__peek_state()['unpatched_ends'] += state['unpatched_ends']
302 else:
303 self.__patch_ends(state['unpatched_ends'], end)
304 self.__patch_ends(ends, end)
305 return (start, end, self.__node_number - start_node_number)
306
307 def nfa(self, graph):
308 (start, end, nodes_created) = self.__nfa(graph)
309 end.close(None)
310 return Nfa(start, end, nodes_created)
311
312 @staticmethod
313 def add_action(graph, action):
314 return ('ACTION', graph, action)
315
316 @staticmethod
317 def add_continue(graph):
318 return ('CONTINUE', graph)
319
320 @staticmethod
321 def catch_all():
322 return ('CATCH_ALL',)
323
324 @staticmethod
325 def epsilon():
326 return ('EPSILON',)
327
328 @staticmethod
329 def join_subgraph(graph, name, subgraph, modifier):
330 if modifier:
331 modifier = NfaBuilder.__modifer_map[modifier]
332 return ('JOIN', graph, name, subgraph, modifier)
333
334 @staticmethod
335 def or_graphs(graphs):
336 return reduce(lambda acc, g: ('OR', acc, g), graphs)
337
338 @staticmethod
339 def cat_graphs(graphs):
340 return reduce(lambda acc, g: ('CAT', acc, g), graphs)
341
342 __modifer_map = {
343 '+': 'ONE_OR_MORE',
344 '?': 'ZERO_OR_ONE',
345 '*': 'ZERO_OR_MORE',
346 }
347
348 @staticmethod
349 def apply_modifier(modifier, graph):
350 return (NfaBuilder.__modifer_map[modifier], graph)
351
352 class Nfa(Automaton): 129 class Nfa(Automaton):
353 130
354 def __init__(self, start, end, nodes_created): 131 def __init__(self, start, end, nodes_created):
355 super(Nfa, self).__init__() 132 super(Nfa, self).__init__()
356 self.__start = start 133 self.__start = start
357 self.__end = end 134 self.__end = end
358 self.__epsilon_closure_computed = False 135 self.__epsilon_closure_computed = False
359 self.__verify(nodes_created) 136 self.__verify(nodes_created)
360 137
361 def __visit_all_edges(self, visitor, state): 138 def __visit_all_edges(self, visitor, state):
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
434 self.__compute_epsilon_closures() 211 self.__compute_epsilon_closures()
435 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None) 212 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None)
436 dfa_nodes = {} 213 dfa_nodes = {}
437 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end) 214 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
438 return (start_name, dfa_nodes) 215 return (start_name, dfa_nodes)
439 216
440 def to_dot(self): 217 def to_dot(self):
441 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state) 218 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)
442 state_iterator = lambda x : x 219 state_iterator = lambda x : x
443 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator) 220 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)
OLDNEW
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/nfa_builder.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698