Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: tools/lexer_generator/nfa_builder.py

Issue 137883006: Experimental parser: use Terms instead of tuples (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/lexer_test.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from types import TupleType 28 from types import TupleType
29 from inspect import getmembers 29 from inspect import getmembers
30 from action import *
30 from nfa import * 31 from nfa import *
31 32
32 # Nfa is built in two stages: 33 # Nfa is built in two stages:
33 # 1. Build a tree of operations on rules. 34 # 1. Build a tree of operations on rules.
34 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN). 35 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN).
35 # Rule parser builds this tree by invoking static methods of NfaBuilder. 36 # Rule parser builds this tree by invoking static methods of NfaBuilder.
36 # 2. For each node, perform the operation of the node to produce an Nfa. 37 # 2. For each node, perform the operation of the node to produce an Nfa.
37 # If an operation is called X, then it is performed by the method 38 # If an operation is called X, then it is performed by the method
38 # of NfaBuilder called __x(). See __process() for mapping from 39 # of NfaBuilder called __x(). See __process() for mapping from
39 # operation to processing methods. 40 # operation to processing methods.
40 41
41 class NfaBuilder(object): 42 class NfaBuilder(object):
42 43
43 def __init__(self, encoding, character_classes = {}): 44 def __init__(self, encoding, character_classes):
44 self.__node_number = 0 45 self.__node_number = 0
45 self.__operation_map = {} 46 self.__operation_map = {}
46 self.__members = getmembers(self) 47 self.__members = getmembers(self)
47 self.__encoding = encoding 48 self.__encoding = encoding
48 self.__character_classes = character_classes 49 self.__character_classes = character_classes
49 self.__states = [] 50 self.__states = []
50 self.__global_end_node = None 51 self.__global_end_node = None
51 52
52 def __new_state(self): 53 def __new_state(self):
53 self.__node_number += 1 54 self.__node_number += 1
54 return NfaState() 55 return NfaState()
55 56
56 def __global_end(self): 57 def __global_end(self):
57 if not self.__global_end_node: 58 if not self.__global_end_node:
58 self.__global_end_node = self.__new_state() 59 self.__global_end_node = self.__new_state()
59 return self.__global_end_node 60 return self.__global_end_node
60 61
61 def __or(self, graph): 62 def __or(self, args):
62 start = self.__new_state() 63 start = self.__new_state()
63 ends = [] 64 ends = []
64 for x in [self.__process(graph[1]), self.__process(graph[2])]: 65 for x in [self.__process(args[0]), self.__process(args[1])]:
65 start.add_epsilon_transition(x[0]) 66 start.add_epsilon_transition(x[0])
66 ends += x[1] 67 ends += x[1]
67 start.close(None) 68 start.close(None)
68 return (start, ends) 69 return (start, ends)
69 70
70 def __one_or_more(self, graph): 71 def __one_or_more(self, args):
71 (start, ends) = self.__process(graph[1]) 72 (start, ends) = self.__process(args[0])
72 end = self.__new_state() 73 end = self.__new_state()
73 end.add_epsilon_transition(start) 74 end.add_epsilon_transition(start)
74 self.__patch_ends(ends, end) 75 self.__patch_ends(ends, end)
75 return (start, [end]) 76 return (start, [end])
76 77
77 def __zero_or_more(self, graph): 78 def __zero_or_more(self, args):
78 (node, ends) = self.__process(graph[1]) 79 (node, ends) = self.__process(args[0])
79 start = self.__new_state() 80 start = self.__new_state()
80 start.add_epsilon_transition(node) 81 start.add_epsilon_transition(node)
81 self.__patch_ends(ends, start) 82 self.__patch_ends(ends, start)
82 return (start, [start]) 83 return (start, [start])
83 84
84 def __zero_or_one(self, graph): 85 def __zero_or_one(self, args):
85 (node, ends) = self.__process(graph[1]) 86 (node, ends) = self.__process(args[0])
86 start = self.__new_state() 87 start = self.__new_state()
87 start.add_epsilon_transition(node) 88 start.add_epsilon_transition(node)
88 return (start, ends + [start]) 89 return (start, ends + [start])
89 90
90 def __repeat(self, graph): 91 def __repeat(self, args):
91 param_min = int(graph[1]) 92 param_min = int(args[0])
92 param_max = int(graph[2]) 93 param_max = int(args[1])
93 subgraph = graph[3] 94 subgraph = args[2]
94 (start, ends) = self.__process(subgraph) 95 (start, ends) = self.__process(subgraph)
95 for i in xrange(1, param_min): 96 for i in xrange(1, param_min):
96 (start2, ends2) = self.__process(subgraph) 97 (start2, ends2) = self.__process(subgraph)
97 self.__patch_ends(ends, start2) 98 self.__patch_ends(ends, start2)
98 ends = ends2 99 ends = ends2
99 if param_min == param_max: 100 if param_min == param_max:
100 return (start, ends) 101 return (start, ends)
101 102
102 midpoints = [] 103 midpoints = []
103 for i in xrange(param_min, param_max): 104 for i in xrange(param_min, param_max):
104 midpoint = self.__new_state() 105 midpoint = self.__new_state()
105 self.__patch_ends(ends, midpoint) 106 self.__patch_ends(ends, midpoint)
106 (start2, ends) = self.__process(subgraph) 107 (start2, ends) = self.__process(subgraph)
107 midpoint.add_epsilon_transition(start2) 108 midpoint.add_epsilon_transition(start2)
108 midpoints.append(midpoint) 109 midpoints.append(midpoint)
109 110
110 return (start, ends + midpoints) 111 return (start, ends + midpoints)
111 112
112 def __cat(self, graph): 113 def __cat(self, args):
113 (left, right) = (self.__process(graph[1]), self.__process(graph[2])) 114 (left, right) = (self.__process(args[0]), self.__process(args[1]))
114 self.__patch_ends(left[1], right[0]) 115 self.__patch_ends(left[1], right[0])
115 return (left[0], right[1]) 116 return (left[0], right[1])
116 117
117 def __key_state(self, key): 118 def __key_state(self, key):
118 state = self.__new_state() 119 state = self.__new_state()
119 state.add_unclosed_transition(key) 120 state.add_unclosed_transition(key)
120 return (state, [state]) 121 return (state, [state])
121 122
122 def __literal(self, graph): 123 def __literal(self, args):
124 assert len(args) == 1
123 return self.__key_state( 125 return self.__key_state(
124 TransitionKey.single_char(self.__encoding, graph[1])) 126 TransitionKey.single_char(self.__encoding, args[0]))
125 127
126 def __class(self, graph): 128 def __class(self, args):
129 assert len(args) == 1
127 return self.__key_state(TransitionKey.character_class( 130 return self.__key_state(TransitionKey.character_class(
128 self.__encoding, graph, self.__character_classes)) 131 self.__encoding, Term('CLASS', args[0]), self.__character_classes))
129 132
130 def __not_class(self, graph): 133 def __not_class(self, args):
134 assert len(args) == 1
131 return self.__key_state(TransitionKey.character_class( 135 return self.__key_state(TransitionKey.character_class(
132 self.__encoding, graph, self.__character_classes)) 136 self.__encoding, Term('NOT_CLASS', args[0]), self.__character_classes))
133 137
134 def __any(self, graph): 138 def __any(self, args):
135 return self.__key_state(TransitionKey.any(self.__encoding)) 139 return self.__key_state(TransitionKey.any(self.__encoding))
136 140
137 def __epsilon(self, graph): 141 def __action(self, args):
138 start = self.__new_state() 142 (start, ends) = self.__process(args[0])
139 end = self.__new_state() 143 action = Action.from_term(args[1])
140 start.close(end)
141 return (start, [end])
142
143 def __action(self, graph):
144 (start, ends) = self.__process(graph[1])
145 action = graph[2]
146 end = self.__new_state() 144 end = self.__new_state()
147 self.__patch_ends(ends, end) 145 self.__patch_ends(ends, end)
148 end.set_action(action) 146 end.set_action(action)
149 if action.match_action(): 147 if action.match_action():
150 global_end = self.__global_end() 148 global_end = self.__global_end()
151 end.add_epsilon_transition(global_end) 149 end.add_epsilon_transition(global_end)
152 return (start, [end]) 150 return (start, [end])
153 151
154 def __continue(self, graph): 152 def __continue(self, args):
155 (start, ends) = self.__process(graph[1]) 153 (start, ends) = self.__process(args[0])
156 state = self.__peek_state() 154 state = self.__peek_state()
157 if not state['start_node']: 155 if not state['start_node']:
158 state['start_node'] = self.__new_state() 156 state['start_node'] = self.__new_state()
159 self.__patch_ends(ends, state['start_node']) 157 self.__patch_ends(ends, state['start_node'])
160 return (start, []) 158 return (start, [])
161 159
162 def __unique_key(self, graph): 160 def __unique_key(self, args):
163 return self.__key_state(TransitionKey.unique(graph[1])) 161 return self.__key_state(TransitionKey.unique(args[0]))
164 162
165 def __join(self, graph): 163 def __join(self, (graph, name, subgraph)):
166 (graph, name, subgraph) = graph[1:]
167 subgraphs = self.__peek_state()['subgraphs'] 164 subgraphs = self.__peek_state()['subgraphs']
168 if not name in subgraphs: 165 if not name in subgraphs:
169 subgraphs[name] = self.__nfa(subgraph) 166 subgraphs[name] = self.__nfa(subgraph)
170 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name] 167 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
171 (start, ends) = self.__process(graph) 168 (start, ends) = self.__process(graph)
172 self.__patch_ends(ends, subgraph_start) 169 self.__patch_ends(ends, subgraph_start)
173 if subgraph_end: 170 if subgraph_end:
174 end = self.__new_state() 171 end = self.__new_state()
175 subgraph_end.add_epsilon_transition(end) 172 subgraph_end.add_epsilon_transition(end)
176 return (start, [end]) 173 return (start, [end])
177 else: 174 else:
178 return (start, []) 175 return (start, [])
179 176
180 def __process(self, graph): 177 def __process(self, term):
181 assert type(graph) == TupleType 178 assert isinstance(term, Term)
182 method = "_NfaBuilder__" + graph[0].lower() 179 method = "_NfaBuilder__" + term.name().lower()
183 if not method in self.__operation_map: 180 if not method in self.__operation_map:
184 matches = filter(lambda (name, func): name == method, self.__members) 181 matches = filter(lambda (name, func): name == method, self.__members)
185 assert len(matches) == 1 182 assert len(matches) == 1
186 self.__operation_map[method] = matches[0][1] 183 self.__operation_map[method] = matches[0][1]
187 return self.__operation_map[method](graph) 184 return self.__operation_map[method](term.args())
188 185
189 def __patch_ends(self, ends, new_end): 186 def __patch_ends(self, ends, new_end):
190 for end in ends: 187 for end in ends:
191 end.close(new_end) 188 end.close(new_end)
192 189
193 def __push_state(self): 190 def __push_state(self):
194 self.__states.append({ 191 self.__states.append({
195 'start_node' : None, 192 'start_node' : None,
196 'subgraphs' : {}, 193 'subgraphs' : {},
197 'unpatched_ends' : [], 194 'unpatched_ends' : [],
198 }) 195 })
199 196
200 def __pop_state(self): 197 def __pop_state(self):
201 return self.__states.pop() 198 return self.__states.pop()
202 199
203 def __peek_state(self): 200 def __peek_state(self):
204 return self.__states[-1] 201 return self.__states[-1]
205 202
206 def __nfa(self, graph): 203 def __nfa(self, term):
207 start_node_number = self.__node_number 204 start_node_number = self.__node_number
208 self.__push_state() 205 self.__push_state()
209 (start, ends) = self.__process(graph) 206 (start, ends) = self.__process(term)
210 state = self.__pop_state() 207 state = self.__pop_state()
211 if state['start_node']: 208 if state['start_node']:
212 state['start_node'].close(start) 209 state['start_node'].close(start)
213 start = state['start_node'] 210 start = state['start_node']
214 for k, subgraph in state['subgraphs'].items(): 211 for k, subgraph in state['subgraphs'].items():
215 if subgraph[1]: 212 if subgraph[1]:
216 subgraph[1].close(None) 213 subgraph[1].close(None)
217 214
218 # Don't create an end node for the subgraph if it would be unused (ends can 215 # Don't create an end node for the subgraph if it would be unused (ends can
219 # be an empty list e.g., in the case when everything inside the subgraph is 216 # be an empty list e.g., in the case when everything inside the subgraph is
(...skipping 30 matching lines...) Expand all
250 f = lambda acc, state: acc | set(state.transitions().keys()) 247 f = lambda acc, state: acc | set(state.transitions().keys())
251 keys = reduce(f, reachable_states, set()) 248 keys = reduce(f, reachable_states, set())
252 keys.discard(TransitionKey.epsilon()) 249 keys.discard(TransitionKey.epsilon())
253 keys.discard(catch_all) 250 keys.discard(catch_all)
254 keys.discard(TransitionKey.unique('eos')) 251 keys.discard(TransitionKey.unique('eos'))
255 inverse_key = TransitionKey.inverse_key(encoding, keys) 252 inverse_key = TransitionKey.inverse_key(encoding, keys)
256 if inverse_key: 253 if inverse_key:
257 transitions[inverse_key] = transitions[catch_all] 254 transitions[inverse_key] = transitions[catch_all]
258 del transitions[catch_all] 255 del transitions[catch_all]
259 256
260 def nfa(self, graph): 257 @staticmethod
261 (start, end, nodes_created) = self.__nfa(graph) 258 def nfa(encoding, character_classes, rule_term):
259
260 self = NfaBuilder(encoding, character_classes)
261 (start, end, nodes_created) = self.__nfa(rule_term)
262 262
263 global_end_to_return = self.__global_end_node or end 263 global_end_to_return = self.__global_end_node or end
264 assert global_end_to_return 264 assert global_end_to_return
265 if end and self.__global_end_node: 265 if end and self.__global_end_node:
266 end.close(self.__global_end_node) 266 end.close(self.__global_end_node)
267 267
268 global_end_to_return.close(None) 268 global_end_to_return.close(None)
269 269
270 # FIXME: the same NfaBuilder should not be called twice, so this cleanup
271 # should be unnecessary.
272 self.__global_end_node = None
273
274 self.__compute_epsilon_closures(start) 270 self.__compute_epsilon_closures(start)
275 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) 271 f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
276 Automaton.visit_states(set([start]), f) 272 Automaton.visit_states(set([start]), f)
277 273
278 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) 274 return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
279 275
280 @staticmethod 276 @staticmethod
281 def rule_tree_dot(graph): 277 def add_action(term, action):
282 node_ix = [0] 278 return Term('ACTION', term, action.to_term())
283 node_template = 'node [label="%s"]; N_%d;'
284 edge_template = 'N_%d -> N_%d'
285 nodes = []
286 edges = []
287
288 def escape(v):
289 v = str(v)
290 v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
291 v = v.replace('\\', '\\\\').replace('\"', '\\\"')
292 return v
293
294 def process_thing(thing):
295 if isinstance(thing, str):
296 node_ix[0] += 1
297 nodes.append(node_template % (escape(thing), node_ix[0]))
298 return node_ix[0]
299 if isinstance(thing, tuple):
300 child_ixs = map(process_thing, list(thing)[1:])
301 node_ix[0] += 1
302 nodes.append(node_template % (escape(thing[0]), node_ix[0]))
303 for child_ix in child_ixs:
304 edges.append(edge_template % (node_ix[0], child_ix))
305 return node_ix[0]
306 if isinstance(thing, Action):
307 node_ix[0] += 1
308 nodes.append(node_template % (str(thing), node_ix[0]))
309 return node_ix[0]
310 raise Exception
311
312 process_thing(graph)
313 return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
314 279
315 @staticmethod 280 @staticmethod
316 def add_action(graph, action): 281 def add_continue(term):
317 return ('ACTION', graph, action) 282 return Term('CONTINUE', term)
318
319 @staticmethod
320 def add_continue(graph):
321 return ('CONTINUE', graph)
322 283
323 @staticmethod 284 @staticmethod
324 def unique_key(name): 285 def unique_key(name):
325 return ('UNIQUE_KEY', name) 286 return Term('UNIQUE_KEY', name)
326 287
327 @staticmethod 288 @staticmethod
328 def join_subgraph(graph, name, subgraph): 289 def join_subgraph(term, name, subgraph_term):
329 return ('JOIN', graph, name, subgraph) 290 return Term('JOIN', term, name, subgraph_term)
330 291
331 @staticmethod 292 @staticmethod
332 def or_graphs(graphs): 293 def or_terms(terms):
333 return reduce(lambda acc, g: ('OR', acc, g), graphs) 294 return reduce(lambda acc, g: Term('OR', acc, g), terms)
334 295
335 @staticmethod 296 @staticmethod
336 def cat_graphs(graphs): 297 def cat_terms(terms):
337 return reduce(lambda acc, g: ('CAT', acc, g), graphs) 298 return reduce(lambda acc, g: Term('CAT', acc, g), terms)
338 299
339 __modifer_map = { 300 __modifer_map = {
340 '+': 'ONE_OR_MORE', 301 '+': 'ONE_OR_MORE',
341 '?': 'ZERO_OR_ONE', 302 '?': 'ZERO_OR_ONE',
342 '*': 'ZERO_OR_MORE', 303 '*': 'ZERO_OR_MORE',
343 } 304 }
344 305
345 @staticmethod 306 @staticmethod
346 def apply_modifier(modifier, graph): 307 def apply_modifier(modifier, term):
347 return (NfaBuilder.__modifer_map[modifier], graph) 308 return Term(NfaBuilder.__modifer_map[modifier], term)
OLDNEW
« no previous file with comments | « tools/lexer_generator/lexer_test.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698