Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 68343004: Experimental parser: better actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/lexer_test.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 19 matching lines...) Expand all
30 from automaton import * 30 from automaton import *
31 from inspect import getmembers 31 from inspect import getmembers
32 32
33 class NfaState(AutomatonState): 33 class NfaState(AutomatonState):
34 34
35 def __init__(self, node_number): 35 def __init__(self, node_number):
36 super(NfaState, self).__init__(node_number) 36 super(NfaState, self).__init__(node_number)
37 self.__transitions = {} 37 self.__transitions = {}
38 self.__unclosed = set() 38 self.__unclosed = set()
39 self.__epsilon_closure = None 39 self.__epsilon_closure = None
40 self.__transition_action = None 40 self.__action = None
41 41
42 def epsilon_closure(self): 42 def epsilon_closure(self):
43 return self.__epsilon_closure 43 return self.__epsilon_closure
44 44
45 def set_epsilon_closure(self, closure): 45 def set_epsilon_closure(self, closure):
46 assert self.is_closed() 46 assert self.is_closed()
47 assert self.__epsilon_closure == None 47 assert self.__epsilon_closure == None
48 self.__epsilon_closure = frozenset(closure) 48 self.__epsilon_closure = frozenset(closure)
49 49
50 def set_transition_action(self, action): 50 def action(self):
51 assert self.is_closed()
52 return self.__action
53
54 def set_action(self, action):
51 assert not self.is_closed() 55 assert not self.is_closed()
52 assert self.__transition_action == None 56 assert not self.__action
53 self.__transition_action = action 57 self.__action = action
54 58
55 def transitions(self): 59 def transitions(self):
56 assert self.is_closed() 60 assert self.is_closed()
57 return self.__transitions 61 return self.__transitions
58 62
59 def next_states(self, key_filter): 63 def next_states(self, key_filter):
60 assert self.is_closed() 64 assert self.is_closed()
61 first = lambda v: [x[0] for x in v] 65 f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(v)
62 f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(first(v))
63 return reduce(f, self.__transitions.items(), set()) 66 return reduce(f, self.__transitions.items(), set())
64 67
65 def __add_transition(self, key, action, next_state): 68 def __add_transition(self, key, next_state):
66 if next_state == None: 69 if next_state == None:
67 assert not action
68 assert not self.is_closed(), "already closed" 70 assert not self.is_closed(), "already closed"
69 self.__unclosed.add(key) 71 self.__unclosed.add(key)
70 return 72 return
71 if not key in self.__transitions: 73 if not key in self.__transitions:
72 self.__transitions[key] = set() 74 self.__transitions[key] = set()
73 self.__transitions[key].add((next_state, action)) 75 self.__transitions[key].add(next_state)
74 76
75 def add_unclosed_transition(self, key): 77 def add_unclosed_transition(self, key):
76 assert key != TransitionKey.epsilon() 78 assert key != TransitionKey.epsilon()
77 self.__add_transition(key, None, None) 79 self.__add_transition(key, None)
78 80
79 def add_epsilon_transition(self, state): 81 def add_epsilon_transition(self, state):
80 assert state != None 82 assert state != None
81 self.__add_transition(TransitionKey.epsilon(), None, state) 83 self.__add_transition(TransitionKey.epsilon(), state)
82 84
83 def is_closed(self): 85 def is_closed(self):
84 return self.__unclosed == None 86 return self.__unclosed == None
85 87
86 def close(self, end): 88 def close(self, end):
87 assert not self.is_closed() 89 assert not self.is_closed()
88 unclosed, self.__unclosed = self.__unclosed, None 90 unclosed, self.__unclosed = self.__unclosed, None
89 action, self.__transition_action = self.__transition_action, None
90 if end == None: 91 if end == None:
91 assert not unclosed and not action 92 assert not unclosed
92 return 93 return
93 for key in unclosed: 94 for key in unclosed:
94 self.__add_transition(key, action, end) 95 self.__add_transition(key, end)
95 if not unclosed: 96 if not unclosed:
96 self.__add_transition(TransitionKey.epsilon(), action, end) 97 self.__add_transition(TransitionKey.epsilon(), end)
97 98
98 def __matches(self, match_func, value): 99 def __matches(self, match_func, value):
99 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc 100 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
100 return reduce(f, self.__transitions.items(), set()) 101 return reduce(f, self.__transitions.items(), set())
101 102
102 def char_matches(self, value): 103 def char_matches(self, value):
103 return self.__matches(lambda k, v : k.matches_char(v), value) 104 return self.__matches(lambda k, v : k.matches_char(v), value)
104 105
105 def key_matches(self, value): 106 def key_matches(self, value):
106 return self.__matches(lambda k, v : k.matches_key(v), value) 107 return self.__matches(lambda k, v : k.matches_key(v), value)
107 108
108 def __str__(self):
109 return "NfaState(" + str(self.node_number()) + ")"
110
111 @staticmethod 109 @staticmethod
112 def gather_transition_keys(state_set): 110 def gather_transition_keys(state_set):
113 f = lambda acc, state: acc | set(state.__transitions.keys()) 111 f = lambda acc, state: acc | set(state.__transitions.keys())
114 return TransitionKey.disjoint_keys(reduce(f, state_set, set())) 112 return TransitionKey.disjoint_keys(reduce(f, state_set, set()))
115 113
116 class NfaBuilder: 114 class NfaBuilder:
117 115
118 def __init__(self): 116 def __init__(self):
119 self.__node_number = 0 117 self.__node_number = 0
120 self.__operation_map = {} 118 self.__operation_map = {}
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
198 TransitionKey.character_class(graph, self.__character_classes)) 196 TransitionKey.character_class(graph, self.__character_classes))
199 197
200 def __not_class(self, graph): 198 def __not_class(self, graph):
201 return self.__key_state( 199 return self.__key_state(
202 TransitionKey.character_class(graph, self.__character_classes)) 200 TransitionKey.character_class(graph, self.__character_classes))
203 201
204 def __any(self, graph): 202 def __any(self, graph):
205 return self.__key_state(TransitionKey.any()) 203 return self.__key_state(TransitionKey.any())
206 204
207 def __action(self, graph): 205 def __action(self, graph):
208 incoming = graph[3]
209 (start, ends) = self.__process(graph[1]) 206 (start, ends) = self.__process(graph[1])
210 action = graph[2] 207 action = graph[2]
211 if incoming: 208 end = self.__new_state()
212 new_start = self.__new_state() 209 self.__patch_ends(ends, end)
213 new_start.set_transition_action(action) 210 end.set_action(action)
214 new_start.close(start) 211 return (start, [end])
215 start = new_start
216 else:
217 new_end = self.__new_state()
218 for end in ends:
219 end.set_transition_action(action)
220 self.__patch_ends(ends, new_end)
221 ends = [new_end]
222 return (start, ends)
223 212
224 def __continue(self, graph): 213 def __continue(self, graph):
225 (start, ends) = self.__process(graph[1]) 214 (start, ends) = self.__process(graph[1])
226 state = self.__peek_state() 215 state = self.__peek_state()
227 if not state['start_node']: 216 if not state['start_node']:
228 state['start_node'] = self.__new_state() 217 state['start_node'] = self.__new_state()
229 end = self.__new_state() 218 end = self.__new_state()
230 self.__patch_ends(ends, end) 219 self.__patch_ends(ends, end)
231 ends = [end] 220 ends = [end]
232 end.add_epsilon_transition(state['start_node']) 221 end.add_epsilon_transition(state['start_node'])
233 return (start, ends) 222 return (start, ends)
234 223
235 def __join(self, graph): 224 def __join(self, graph):
236 (graph, name, subgraph) = graph[1:] 225 (graph, name, subgraph, modifier) = graph[1:]
237 subgraphs = self.__peek_state()['subgraphs'] 226 subgraphs = self.__peek_state()['subgraphs']
238 if not name in subgraphs: 227 if not name in subgraphs:
239 subgraphs[name] = self.__nfa(subgraph) 228 subgraphs[name] = self.__nfa(subgraph)
240 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name] 229 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
241 (start, ends) = self.__process(graph) 230 (start, ends) = self.__process(graph)
231 if modifier:
232 assert modifier == 'ZERO_OR_MORE'
233 for end in ends:
234 end.add_epsilon_transition(subgraph_end)
242 self.__patch_ends(ends, subgraph_start) 235 self.__patch_ends(ends, subgraph_start)
243 end = self.__new_state() 236 end = self.__new_state()
244 subgraph_end.add_epsilon_transition(end) 237 subgraph_end.add_epsilon_transition(end)
245 return (start, [end]) 238 return (start, [end])
246 239
247 def __process(self, graph): 240 def __process(self, graph):
248 assert type(graph) == TupleType 241 assert type(graph) == TupleType
249 method = "_NfaBuilder__" + graph[0].lower() 242 method = "_NfaBuilder__" + graph[0].lower()
250 if not method in self.__operation_map: 243 if not method in self.__operation_map:
251 matches = filter(lambda (name, func): name == method, self.__members) 244 matches = filter(lambda (name, func): name == method, self.__members)
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 self.__patch_ends(ends, end) 276 self.__patch_ends(ends, end)
284 return (start, end, self.__node_number - start_node_number) 277 return (start, end, self.__node_number - start_node_number)
285 278
286 def nfa(self, graph): 279 def nfa(self, graph):
287 (start, end, nodes_created) = self.__nfa(graph) 280 (start, end, nodes_created) = self.__nfa(graph)
288 end.close(None) 281 end.close(None)
289 return Nfa(start, end, nodes_created) 282 return Nfa(start, end, nodes_created)
290 283
291 @staticmethod 284 @staticmethod
292 def add_action(graph, action): 285 def add_action(graph, action):
293 return ('ACTION', graph, action, False) 286 return ('ACTION', graph, action)
294
295 @staticmethod
296 def add_incoming_action(graph, action):
297 return ('ACTION', graph, action, True)
298 287
299 @staticmethod 288 @staticmethod
300 def add_continue(graph): 289 def add_continue(graph):
301 return ('CONTINUE', graph) 290 return ('CONTINUE', graph)
302 291
303 @staticmethod 292 @staticmethod
304 def join_subgraph(graph, name, subgraph): 293 def join_subgraph(graph, name, subgraph, modifier):
305 return ('JOIN', graph, name, subgraph) 294 if modifier:
295 modifier = NfaBuilder.__modifer_map[modifier]
296 return ('JOIN', graph, name, subgraph, modifier)
306 297
307 @staticmethod 298 @staticmethod
308 def or_graphs(graphs): 299 def or_graphs(graphs):
309 return reduce(lambda acc, g: ('OR', acc, g), graphs) 300 return reduce(lambda acc, g: ('OR', acc, g), graphs)
310 301
311 @staticmethod 302 @staticmethod
312 def cat_graphs(graphs): 303 def cat_graphs(graphs):
313 return reduce(lambda acc, g: ('CAT', acc, g), graphs) 304 return reduce(lambda acc, g: ('CAT', acc, g), graphs)
314 305
315 __modifer_map = { 306 __modifer_map = {
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 return reduce(f, states, set(states)) 356 return reduce(f, states, set(states))
366 357
367 def matches(self, string): 358 def matches(self, string):
368 self.__compute_epsilon_closures() 359 self.__compute_epsilon_closures()
369 valid_states = Nfa.__close(set([self.__start])) 360 valid_states = Nfa.__close(set([self.__start]))
370 for c in string: 361 for c in string:
371 f = lambda acc, state: acc | state.char_matches(c) 362 f = lambda acc, state: acc | state.char_matches(c)
372 transitions = reduce(f, valid_states, set()) 363 transitions = reduce(f, valid_states, set())
373 if not transitions: 364 if not transitions:
374 return False 365 return False
375 valid_states = Nfa.__close(set([x[0] for x in transitions])) 366 valid_states = Nfa.__close(transitions)
376 return self.__end in valid_states 367 return self.__end in valid_states
377 368
378 @staticmethod 369 @staticmethod
379 def __to_dfa(nfa_state_set, dfa_nodes, end_node): 370 def __to_dfa(nfa_state_set, dfa_nodes, end_node):
380 nfa_state_set = Nfa.__close(nfa_state_set) 371 nfa_state_set = Nfa.__close(nfa_state_set)
381 assert nfa_state_set 372 assert nfa_state_set
382 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set)) 373 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
383 if name in dfa_nodes: 374 if name in dfa_nodes:
384 return name 375 return name
376 def gather_actions(states):
377 actions = set()
378 for state in states:
379 if state.action():
380 actions.add(state.action())
381 actions = list(actions)
382 actions.sort()
383 return actions
385 dfa_nodes[name] = { 384 dfa_nodes[name] = {
386 'transitions': {}, 385 'transitions': {},
387 'terminal': end_node in nfa_state_set} 386 'terminal': end_node in nfa_state_set,
387 'actions' : gather_actions(nfa_state_set)}
388 for key in NfaState.gather_transition_keys(nfa_state_set): 388 for key in NfaState.gather_transition_keys(nfa_state_set):
389 match_states = set()
389 f = lambda acc, state: acc | state.key_matches(key) 390 f = lambda acc, state: acc | state.key_matches(key)
390 transitions = reduce(f, nfa_state_set, set()) 391 for state in reduce(f, nfa_state_set, set()):
391 match_states = set()
392 actions = []
393 for (state, action) in transitions:
394 match_states.add(state) 392 match_states.add(state)
395 if action:
396 actions.append(action)
397
398 # Pull in actions which can be taken with an epsilon transition from the
399 # match state.
400 e = TransitionKey.epsilon()
401 if e in state.transitions():
402 for e_trans in state.transitions()[e]:
403 if e_trans[1]:
404 actions.append(e_trans[1])
405 for s in state.epsilon_closure():
406 if e in s.transitions():
407 for e_trans in s.transitions()[e]:
408 if e_trans[1]:
409 actions.append(e_trans[1])
410
411 assert len(match_states) == len(transitions)
412
413 actions.sort()
414 action = actions[0] if actions else None
415 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node) 393 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
416 dfa_nodes[name]['transitions'][key] = (transition_state, action) 394 dfa_nodes[name]['transitions'][key] = transition_state
417 return name 395 return name
418 396
419 def compute_dfa(self): 397 def compute_dfa(self):
420 self.__compute_epsilon_closures() 398 self.__compute_epsilon_closures()
421 dfa_nodes = {} 399 dfa_nodes = {}
422 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end) 400 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
423 return (start_name, dfa_nodes) 401 return (start_name, dfa_nodes)
424 402
425 def to_dot(self): 403 def to_dot(self):
426 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state) 404 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)
427 return self.generate_dot(self.__start, set([self.__end]), iterator) 405 state_iterator = lambda x : x
406 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)
OLDNEW
« no previous file with comments | « tools/lexer_generator/lexer_test.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698