Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 57923002: Experimental parser: add embedded actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 18 matching lines...) Expand all
29 from transition_keys import TransitionKey 29 from transition_keys import TransitionKey
30 from inspect import getmembers 30 from inspect import getmembers
31 31
32 class NfaState: 32 class NfaState:
33 33
34 def __init__(self, node_number): 34 def __init__(self, node_number):
35 self.__transitions = {} 35 self.__transitions = {}
36 self.__unclosed = set() 36 self.__unclosed = set()
37 self.__node_number = node_number 37 self.__node_number = node_number
38 self.__epsilon_closure = None 38 self.__epsilon_closure = None
39 self.__transition_action = None
39 40
40 def node_number(self): 41 def node_number(self):
41 return self.__node_number 42 return self.__node_number
42 43
43 def epsilon_closure(self): 44 def epsilon_closure(self):
44 return self.__epsilon_closure 45 return self.__epsilon_closure
45 46
46 def set_epsilon_closure(self, closure): 47 def set_epsilon_closure(self, closure):
47 assert self.is_closed() 48 assert self.is_closed()
48 assert self.__epsilon_closure == None 49 assert self.__epsilon_closure == None
49 self.__epsilon_closure = frozenset(closure) 50 self.__epsilon_closure = frozenset(closure)
50 51
52 def set_transition_action(self, action):
53 assert not self.is_closed()
54 assert self.__transition_action == None
55 self.__transition_action = action
56
51 def transitions(self): 57 def transitions(self):
52 assert self.is_closed() 58 assert self.is_closed()
53 return self.__transitions 59 return self.__transitions
54 60
55 def get_epsilon_transitions(self): 61 def next_states(self, key_filter):
56 key = TransitionKey.epsilon(); 62 assert self.is_closed()
57 if not key in self.__transitions: return frozenset() 63 first = lambda v: [x[0] for x in v]
58 return frozenset(self.__transitions[key]) 64 f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(first(v))
65 return reduce(f, self.__transitions.items(), set())
59 66
60 def __add_transition(self, key, value): 67 def __add_transition(self, key, action, next_state):
61 if value == None: 68 if next_state == None:
69 assert not action
62 assert not self.is_closed(), "already closed" 70 assert not self.is_closed(), "already closed"
63 self.__unclosed.add(key) 71 self.__unclosed.add(key)
64 return 72 return
65 if not key in self.__transitions: 73 if not key in self.__transitions:
66 self.__transitions[key] = set() 74 self.__transitions[key] = set()
67 self.__transitions[key].add(value) 75 self.__transitions[key].add((next_state, action))
68 76
69 def add_unclosed_transition(self, key): 77 def add_unclosed_transition(self, key):
70 assert key != TransitionKey.epsilon() 78 assert key != TransitionKey.epsilon()
71 self.__add_transition(key, None) 79 self.__add_transition(key, None, None)
72 80
73 def add_epsilon_transition(self, state): 81 def add_epsilon_transition(self, state):
74 assert state != None 82 assert state != None
75 self.__add_transition(TransitionKey.epsilon(), state) 83 self.__add_transition(TransitionKey.epsilon(), None, state)
76 84
77 def is_closed(self): 85 def is_closed(self):
78 return self.__unclosed == None 86 return self.__unclosed == None
79 87
80 def close(self, end): 88 def close(self, end):
81 assert not self.is_closed() 89 assert not self.is_closed()
90 unclosed, self.__unclosed = self.__unclosed, None
91 action, self.__transition_action = self.__transition_action, None
82 if end == None: 92 if end == None:
83 assert not self.__unclosed 93 assert not unclosed
84 else: 94 assert not action
85 for key in self.__unclosed: 95 return
86 self.__add_transition(key, end) 96 for key in unclosed:
87 if not self.__unclosed: 97 self.__add_transition(key, action, end)
88 self.add_epsilon_transition(end) 98 if not unclosed:
89 self.__unclosed = None 99 self.__add_transition(TransitionKey.epsilon(), action, end)
90 100
91 def __matches(self, match_func, value): 101 def __matches(self, match_func, value):
92 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc 102 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
93 return reduce(f, self.__transitions.items(), set()) 103 return reduce(f, self.__transitions.items(), set())
94 104
95 def char_matches(self, value): 105 def char_matches(self, value):
96 return self.__matches(lambda k, v : k.matches_char(v), value) 106 return self.__matches(lambda k, v : k.matches_char(v), value)
97 107
98 def key_matches(self, value): 108 def key_matches(self, value):
99 return self.__matches(lambda k, v : k.matches_key(v), value) 109 return self.__matches(lambda k, v : k.matches_key(v), value)
100 110
101 @staticmethod 111 @staticmethod
102 def gather_transition_keys(state_set): 112 def gather_transition_keys(state_set):
103 f = lambda acc, state: acc | set(state.__transitions.keys()) 113 f = lambda acc, state: acc | set(state.__transitions.keys())
104 return TransitionKey.disjoint_keys(reduce(f, state_set, set())) 114 return TransitionKey.disjoint_keys(reduce(f, state_set, set()))
105 115
106 class NfaBuilder: 116 class NfaBuilder:
107 117
108 def __init__(self): 118 def __init__(self):
119 self.__node_number = 0
109 self.__operation_map = {} 120 self.__operation_map = {}
110 self.__members = getmembers(self) 121 self.__members = getmembers(self)
111 122
112 def __new_state(self): 123 def __new_state(self):
113 node_number = self.node_number 124 self.__node_number += 1
114 self.node_number = self.node_number + 1 125 return NfaState(self.__node_number - 1)
115 return NfaState(node_number)
116 126
117 def __or(self, graph): 127 def __or(self, graph):
118 start = self.__new_state() 128 start = self.__new_state()
119 ends = [] 129 ends = []
120 for x in [self.__process(graph[1]), self.__process(graph[2])]: 130 for x in [self.__process(graph[1]), self.__process(graph[2])]:
121 start.add_epsilon_transition(x[0]) 131 start.add_epsilon_transition(x[0])
122 ends += x[1] 132 ends += x[1]
123 start.close(None) 133 start.close(None)
124 return (start, ends) 134 return (start, ends)
125 135
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
158 168
159 def __class(self, graph): 169 def __class(self, graph):
160 return self.__key_state(TransitionKey.character_class(False, graph[1])) 170 return self.__key_state(TransitionKey.character_class(False, graph[1]))
161 171
162 def __not_class(self, graph): 172 def __not_class(self, graph):
163 return self.__key_state(TransitionKey.character_class(True, graph[1])) 173 return self.__key_state(TransitionKey.character_class(True, graph[1]))
164 174
165 def __any(self, graph): 175 def __any(self, graph):
166 return self.__key_state(TransitionKey.any()) 176 return self.__key_state(TransitionKey.any())
167 177
178 def __action(self, graph):
179 result = self.__process(graph[1])
180 for end in result[1]:
181 end.set_transition_action(graph[2])
182 return result
183
168 def __process(self, graph): 184 def __process(self, graph):
169 assert type(graph) == TupleType 185 assert type(graph) == TupleType
170 method = "_NfaBuilder__" + graph[0].lower() 186 method = "_NfaBuilder__" + graph[0].lower()
171 if not method in self.__operation_map: 187 if not method in self.__operation_map:
172 matches = filter(lambda (name, func): name == method, self.__members) 188 matches = filter(lambda (name, func): name == method, self.__members)
173 assert len(matches) == 1 189 assert len(matches) == 1
174 self.__operation_map[method] = matches[0][1] 190 self.__operation_map[method] = matches[0][1]
175 return self.__operation_map[method](graph) 191 return self.__operation_map[method](graph)
176 192
177 def __patch_ends(self, ends, new_end): 193 def __patch_ends(self, ends, new_end):
178 for end in ends: 194 for end in ends:
179 end.close(new_end) 195 end.close(new_end)
180 196
181 def nfa(self, graph): 197 def nfa(self, graph):
182 self.node_number = 0 198 start_node_number = self.__node_number
183 (start, ends) = self.__process(graph) 199 (start, ends) = self.__process(graph)
184 end = self.__new_state() 200 end = self.__new_state()
185 self.__patch_ends(ends, end) 201 self.__patch_ends(ends, end)
186 end.close(None) 202 end.close(None)
187 return Nfa(start, end, self.node_number) 203 return Nfa(start, end, self.__node_number - start_node_number)
204
205 @staticmethod
206 def add_action(graph, action):
207 return ('ACTION', graph, action)
208
209 @staticmethod
210 def or_graphs(graphs):
211 return reduce(lambda acc, g: ('OR', acc, g), graphs)
212
213 @staticmethod
214 def cat_graphs(graphs):
215 return reduce(lambda acc, g: ('CAT', acc, g), graphs)
188 216
189 class Nfa: 217 class Nfa:
190 218
191 def __init__(self, start, end, nodes_created): 219 def __init__(self, start, end, nodes_created):
192 self.__start = start 220 self.__start = start
193 self.__end = end 221 self.__end = end
194 self.__epsilon_closure_computed = False 222 self.__epsilon_closure_computed = False
195 self.__verify(nodes_created) 223 self.__verify(nodes_created)
196 224
197 @staticmethod 225 @staticmethod
198 def __visit_edges(edge, compute_next_edge, visitor, state): 226 def __visit_edges(edge, compute_next_edge, visitor, state):
199 visited = set() 227 visited = set()
200 while edge: 228 while edge:
201 f = lambda (next_edge, state), node: ( 229 f = lambda (next_edge, state), node: (
202 next_edge | compute_next_edge(node), 230 next_edge | compute_next_edge(node),
203 visitor(node, state)) 231 visitor(node, state))
204 (next_edge, state) = reduce(f, edge, (set(), state)) 232 (next_edge, state) = reduce(f, edge, (set(), state))
205 visited |= edge 233 visited |= edge
206 edge = next_edge - visited 234 edge = next_edge - visited
207 return state 235 return state
208 236
209 def __visit_all_edges(self, visitor, state): 237 def __visit_all_edges(self, visitor, state):
210 edge = set([self.__start]) 238 edge = set([self.__start])
211 def next_edge(node): 239 next_edge = lambda node: node.next_states(lambda x : True)
212 f = lambda acc, values: acc | values
213 return reduce(f, node.transitions().values(), set())
214 return Nfa.__visit_edges(edge, next_edge, visitor, state) 240 return Nfa.__visit_edges(edge, next_edge, visitor, state)
215 241
216 def __verify(self, nodes_created): 242 def __verify(self, nodes_created):
217 def f(node, node_list): 243 def f(node, node_list):
218 assert node.is_closed() 244 assert node.is_closed()
219 node_list.append(node) 245 node_list.append(node)
220 return node_list 246 return node_list
221 node_list = self.__visit_all_edges(f, []) 247 node_list = self.__visit_all_edges(f, [])
222 assert len(node_list) == nodes_created 248 assert len(node_list) == nodes_created
223 249
224 def __compute_epsilon_closures(self): 250 def __compute_epsilon_closures(self):
225 if self.__epsilon_closure_computed: 251 if self.__epsilon_closure_computed:
226 return 252 return
227 self.__epsilon_closure_computed = True 253 self.__epsilon_closure_computed = True
228 def outer(node, state): 254 def outer(node, state):
229 def inner(node, closure): 255 def inner(node, closure):
230 closure.add(node) 256 closure.add(node)
231 return closure 257 return closure
232 next_edge = lambda node : node.get_epsilon_transitions() 258 is_epsilon = lambda k: k == TransitionKey.epsilon()
259 next_edge = lambda node : node.next_states(is_epsilon)
233 edge = next_edge(node) 260 edge = next_edge(node)
234 closure = Nfa.__visit_edges(edge, next_edge, inner, set()) 261 closure = Nfa.__visit_edges(edge, next_edge, inner, set())
235 node.set_epsilon_closure(closure) 262 node.set_epsilon_closure(closure)
236 self.__visit_all_edges(outer, None) 263 self.__visit_all_edges(outer, None)
237 264
238 @staticmethod 265 @staticmethod
239 def __close(states): 266 def __close(states):
240 f = lambda acc, node: acc | node.epsilon_closure() 267 f = lambda acc, node: acc | node.epsilon_closure()
241 return reduce(f, states, set(states)) 268 return reduce(f, states, set(states))
242 269
243 def matches(self, string): 270 def matches(self, string):
244 self.__compute_epsilon_closures() 271 self.__compute_epsilon_closures()
245 valid_states = Nfa.__close(set([self.__start])) 272 valid_states = Nfa.__close(set([self.__start]))
246 for c in string: 273 for c in string:
247 f = lambda acc, state: acc | state.char_matches(c) 274 f = lambda acc, state: acc | state.char_matches(c)
248 valid_states = Nfa.__close(reduce(f, valid_states, set())) 275 transitions = reduce(f, valid_states, set())
249 if not valid_states: 276 if not transitions:
250 return False 277 return False
278 valid_states = Nfa.__close(set([x[0] for x in transitions]))
251 return self.__end in valid_states 279 return self.__end in valid_states
252 280
253 @staticmethod 281 @staticmethod
254 def __to_dfa(nfa_state_set, builder): 282 def __to_dfa(nfa_state_set, dfa_nodes, end_node):
255 nfa_state_set = Nfa.__close(nfa_state_set) 283 nfa_state_set = Nfa.__close(nfa_state_set)
256 assert nfa_state_set 284 assert nfa_state_set
257 name = str([x.node_number() for x in nfa_state_set]) 285 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
258 (dfa_nodes, end_nodes, end_node) = builder
259 if name in dfa_nodes: 286 if name in dfa_nodes:
260 return name 287 return name
261 dfa_node = {} 288 dfa_nodes[name] = {
262 dfa_nodes[name] = dfa_node 289 'transitions': {},
290 'terminal': end_node in nfa_state_set}
263 for key in NfaState.gather_transition_keys(nfa_state_set): 291 for key in NfaState.gather_transition_keys(nfa_state_set):
264 f = lambda acc, state: acc | state.key_matches(key) 292 f = lambda acc, state: acc | state.key_matches(key)
265 dfa_node[key] = Nfa.__to_dfa(reduce(f, nfa_state_set, set()), builder) 293 transitions = reduce(f, nfa_state_set, set())
266 if end_node in nfa_state_set: 294 match_states = set()
267 end_nodes.add(name) 295 actions = set()
296 for (state, action) in transitions:
297 match_states.add(state)
298 if action:
299 actions.add(action)
300 assert len(match_states) == len(transitions)
301 assert not actions or len(actions) == 1
302 action = iter(actions).next() if actions else None
303 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
304 dfa_nodes[name]['transitions'][key] = (transition_state, action)
268 return name 305 return name
269 306
270 def compute_dfa(self): 307 def compute_dfa(self):
271 self.__compute_epsilon_closures() 308 self.__compute_epsilon_closures()
272 dfa_nodes = {} 309 dfa_nodes = {}
273 end_nodes = set() 310 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
274 dfa_builder = (dfa_nodes, end_nodes, self.__end) 311 return (start_name, dfa_nodes)
275 start_name = self.__to_dfa(set([self.__start]), dfa_builder)
276 return (start_name, dfa_nodes, end_nodes)
277 312
278 def to_dot(self): 313 def to_dot(self):
279 314
280 def f(node, node_content): 315 def f(node, node_content):
281 for key, values in node.transitions().items(): 316 for key, values in node.transitions().items():
282 if key == TransitionKey.epsilon(): 317 if key == TransitionKey.epsilon():
283 key = "ε" 318 key = "ε"
284 for value in values: 319 for value in values:
285 node_content.append( 320 node_content.append(
286 " S_%d -> S_%d [ label = \"%s\" ];" % 321 " S_%d -> S_%d [ label = \"%s\" ];" %
287 (node.node_number(), value.node_number(), key)) 322 (node.node_number(), value[0].node_number(), key))
288 return node_content 323 return node_content
289 324
290 node_content = self.__visit_all_edges(f, []) 325 node_content = self.__visit_all_edges(f, [])
291 326
292 return ''' 327 return '''
293 digraph finite_state_machine { 328 digraph finite_state_machine {
294 rankdir=LR; 329 rankdir=LR;
295 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s 330 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
296 node [shape = doublecircle, style=unfilled]; S_%s 331 node [shape = doublecircle, style=unfilled]; S_%s
297 node [shape = circle]; 332 node [shape = circle];
298 %s 333 %s
299 } 334 }
300 ''' % (self.__start.node_number(), 335 ''' % (self.__start.node_number(),
301 self.__end.node_number(), 336 self.__end.node_number(),
302 "\n".join(node_content)) 337 "\n".join(node_content))
OLDNEW
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698