Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(777)

Side by Side Diff: tools/lexer_generator/nfa_builder.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 28 matching lines...) Expand all
39 # of NfaBuilder called __x(). See __process() for mapping from 39 # of NfaBuilder called __x(). See __process() for mapping from
40 # operation to processing methods. 40 # operation to processing methods.
41 41
42 class NfaBuilder(object): 42 class NfaBuilder(object):
43 43
44 def __init__(self, encoding, character_classes, subtree_map): 44 def __init__(self, encoding, character_classes, subtree_map):
45 self.__node_number = 0 45 self.__node_number = 0
46 self.__encoding = encoding 46 self.__encoding = encoding
47 self.__character_classes = character_classes 47 self.__character_classes = character_classes
48 self.__states = [] 48 self.__states = []
49 self.__global_end_node = None 49 self.__global_end = self.__new_state()
50 self.__operation_map = None 50 self.__operation_map = None
51 self.__subtree_map = subtree_map 51 self.__subtree_map = subtree_map
52 52
53 def __new_state(self): 53 def __new_state(self):
54 self.__node_number += 1 54 self.__node_number += 1
55 return NfaState() 55 return NfaState()
56 56
57 def __global_end(self):
58 if not self.__global_end_node:
59 self.__global_end_node = self.__new_state()
60 return self.__global_end_node
61
62 def __or(self, *trees): 57 def __or(self, *trees):
63 start = self.__new_state() 58 start = self.__new_state()
64 ends = [] 59 ends = []
65 for tree in trees: 60 for tree in trees:
66 (sub_start, sub_end) = self.__process(tree) 61 (sub_start, sub_end) = self.__process(tree)
67 start.add_epsilon_transition(sub_start) 62 start.add_epsilon_transition(sub_start)
68 ends += sub_end 63 ends += sub_end
69 start.close(None) 64 start.close(None)
70 return (start, ends) 65 return (start, ends)
71 66
(...skipping 10 matching lines...) Expand all
82 start.add_epsilon_transition(node) 77 start.add_epsilon_transition(node)
83 self.__patch_ends(ends, start) 78 self.__patch_ends(ends, start)
84 return (start, [start]) 79 return (start, [start])
85 80
86 def __zero_or_one(self, subtree): 81 def __zero_or_one(self, subtree):
87 (node, ends) = self.__process(subtree) 82 (node, ends) = self.__process(subtree)
88 start = self.__new_state() 83 start = self.__new_state()
89 start.add_epsilon_transition(node) 84 start.add_epsilon_transition(node)
90 return (start, ends + [start]) 85 return (start, ends + [start])
91 86
92 def __repeat(self, param_min, param_max, subtree): 87 def __repeat(self, subtree, param_min, param_max):
93 'process regex of form subtree{param_min, param_max}' 88 'process regex of form subtree{param_min, param_max}'
94 (param_min, param_max) = (int(param_min), int(param_max)) 89 (param_min, param_max) = (int(param_min), int(param_max))
95 assert param_min > 1 and param_min <= param_max 90 assert param_min > 1 and param_min <= param_max
96 (start, ends) = self.__process(subtree) 91 (start, ends) = self.__process(subtree)
97 # create a chain of param_min subtrees 92 # create a chain of param_min subtrees
98 for i in xrange(1, param_min): 93 for i in xrange(1, param_min):
99 (start2, ends2) = self.__process(subtree) 94 (start2, ends2) = self.__process(subtree)
100 self.__patch_ends(ends, start2) 95 self.__patch_ends(ends, start2)
101 ends = ends2 96 ends = ends2
102 if param_min == param_max: 97 if param_min == param_max:
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
140 return self.__key_state(TransitionKey.character_class( 135 return self.__key_state(TransitionKey.character_class(
141 self.__encoding, Term('CLASS', subtree), self.__character_classes)) 136 self.__encoding, Term('CLASS', subtree), self.__character_classes))
142 137
143 def __not_class(self, subtree): 138 def __not_class(self, subtree):
144 return self.__key_state(TransitionKey.character_class( 139 return self.__key_state(TransitionKey.character_class(
145 self.__encoding, Term('NOT_CLASS', subtree), self.__character_classes)) 140 self.__encoding, Term('NOT_CLASS', subtree), self.__character_classes))
146 141
147 def __any(self): 142 def __any(self):
148 return self.__key_state(TransitionKey.any(self.__encoding)) 143 return self.__key_state(TransitionKey.any(self.__encoding))
149 144
150 def __action(self, subtree, action_term): 145 def __unique_key(self, name):
146 return self.__key_state(TransitionKey.unique(name))
147
148 def __entry_action(self, action, precedence, subtree):
151 (start, ends) = self.__process(subtree) 149 (start, ends) = self.__process(subtree)
152 action = Action.from_term(action_term) 150 node = self.__new_state()
153 end = self.__new_state() 151 node.set_action(Action(action, precedence))
154 self.__patch_ends(ends, end) 152 self.__patch_ends(ends, node)
155 end.set_action(action) 153 return (start, [node])
154
155 def __match_action(self, action, precedence, subtree):
156 (start, ends) = self.__process(subtree)
157 node = self.__new_state()
158 node.set_action(Action(action, precedence))
156 # Force all match actions to be terminal. 159 # Force all match actions to be terminal.
157 if action.match_action(): 160 node.close(self.__global_end)
158 global_end = self.__global_end() 161 # patch via a match edge into existing graph
159 end.add_epsilon_transition(global_end) 162 match_node = self.__new_state()
160 return (start, [end]) 163 match_node.add_transition(TransitionKey.omega(), node)
164 self.__patch_ends(ends, match_node)
165 return (start, [match_node])
161 166
162 def __continue(self, subtree, depth): 167 def __continue(self, subtree, depth):
163 'add an epsilon transitions to the start node of the current subtree' 168 'add an epsilon transitions to the start node of the current subtree'
164 (start, ends) = self.__process(subtree) 169 (start, ends) = self.__process(subtree)
165 index = -1 - min(int(depth), len(self.__states) - 1) 170 index = -1 - min(int(depth), len(self.__states) - 1)
166 state = self.__states[index] 171 state = self.__states[index]
167 if not state['start_node']: 172 if not state['start_node']:
168 state['start_node'] = self.__new_state() 173 state['start_node'] = self.__new_state()
169 self.__patch_ends(ends, state['start_node']) 174 self.__patch_ends(ends, state['start_node'])
170 return (start, []) 175 return (start, [])
171 176
172 def __unique_key(self, name):
173 return self.__key_state(TransitionKey.unique(name))
174
175 def __join(self, tree, name): 177 def __join(self, tree, name):
176 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name) 178 (start, ends) = self.__subgraph(name)
177 if tree: 179 (new_start, new_ends) = self.__process(tree)
178 (start, ends) = self.__process(tree) 180 self.__patch_ends(new_ends, start)
179 self.__patch_ends(ends, subtree_start) 181 return (new_start, ends)
180 else:
181 start = subtree_start
182 if subtree_end:
183 return (start, [subtree_end])
184 else:
185 return (start, [])
186 182
187 def __get_method(self, name): 183 def __get_method(self, name):
188 'lazily build map of all private bound methods and return the one requested' 184 'lazily build map of all private bound methods and return the one requested'
189 if not self.__operation_map: 185 if not self.__operation_map:
190 prefix = "_NfaBuilder__" 186 prefix = "_NfaBuilder__"
191 self.__operation_map = {name[len(prefix):] : f for (name, f) in 187 self.__operation_map = {name[len(prefix):] : f for (name, f) in
192 filter(lambda (name, f): name.startswith(prefix), getmembers(self))} 188 filter(lambda (name, f): name.startswith(prefix), getmembers(self))}
193 return self.__operation_map[name] 189 return self.__operation_map[name]
194 190
195 def __process(self, term): 191 def __process(self, term):
196 assert isinstance(term, Term) 192 assert isinstance(term, Term)
197 method = self.__get_method(term.name().lower()) 193 method = self.__get_method(term.name().lower())
198 return method(*term.args()) 194 return method(*term.args())
199 195
200 def __patch_ends(self, ends, new_end): 196 def __patch_ends(self, ends, new_end):
201 for end in ends: 197 for end in ends:
202 end.close(new_end) 198 end.close(new_end)
203 199
204 def __push_state(self, name): 200 def __push_state(self, name):
205 for state in self.__states: 201 for state in self.__states:
206 assert state['name'] != name, 'recursive subgraph' 202 assert state['name'] != name, 'recursive subgraph'
207 self.__states.append({ 203 self.__states.append({
208 'start_node' : None, 204 'start_node' : None,
209 'name' : name 205 'name' : name
210 }) 206 })
211 207
212 def __pop_state(self): 208 def __subgraph(self, name):
213 return self.__states.pop()
214
215 def __nfa(self, name):
216 tree = self.__subtree_map[name] 209 tree = self.__subtree_map[name]
217 start_node_number = self.__node_number
218 self.__push_state(name) 210 self.__push_state(name)
219 (start, ends) = self.__process(tree) 211 (start, ends) = self.__process(tree)
220 state = self.__pop_state() 212 state = self.__states.pop()
221 # move saved start node into start 213 # move saved start node into start
222 if state['start_node']: 214 if state['start_node']:
223 state['start_node'].close(start) 215 state['start_node'].close(start)
224 start = state['start_node'] 216 start = state['start_node']
225 # Don't create an end node for the subgraph if it would be unused (ends can 217 return (start, ends)
226 # be an empty list e.g., in the case when everything inside the subgraph is
227 # "continue").
228 end = None
229 if ends:
230 end = self.__new_state()
231 self.__patch_ends(ends, end)
232 return (start, end, self.__node_number - start_node_number)
233 218
234 @staticmethod 219 @staticmethod
235 def __compute_epsilon_closures(start_state): 220 def add_action(tree, entry_action, match_action, precedence):
236 def outer(node, state): 221 if entry_action:
237 def inner(node, closure): 222 tree = Term('ENTRY_ACTION', entry_action, precedence, tree)
238 closure.add(node) 223 if match_action:
239 return closure 224 tree = Term('MATCH_ACTION', match_action, precedence, tree)
240 is_epsilon = lambda k: k == TransitionKey.epsilon() 225 return tree
241 state_iter = lambda node : node.state_iter(key_filter = is_epsilon)
242 edge = set(state_iter(node))
243 closure = Automaton.visit_states(
244 edge, inner, state_iter=state_iter, visit_state=set())
245 node.set_epsilon_closure(closure)
246 Automaton.visit_states(set([start_state]), outer)
247
248 @staticmethod
249 def __replace_catch_all(encoding, state):
250 catch_all = TransitionKey.unique('catch_all')
251 states = list(state.state_iter_for_key(catch_all))
252 if not states:
253 return
254 f = lambda acc, state: acc | set(state.epsilon_closure_iter())
255 reachable_states = reduce(f, states, set())
256 f = lambda acc, state: acc | set(state.key_iter())
257 keys = reduce(f, reachable_states, set())
258 keys.discard(TransitionKey.epsilon())
259 keys.discard(catch_all)
260 keys.discard(TransitionKey.unique('eos'))
261 inverse_key = TransitionKey.inverse_key(encoding, keys)
262 if not inverse_key:
263 inverse_key = TransitionKey.unique('no_match')
264 state.swap_key(catch_all, inverse_key)
265
266 @staticmethod
267 def nfa(encoding, character_classes, subtree_map, name):
268 self = NfaBuilder(encoding, character_classes, subtree_map)
269 (start, end, nodes_created) = self.__nfa(name)
270 # close all ends
271 global_end_to_return = self.__global_end_node or end
272 assert global_end_to_return, "no terminal nodes, default tree continues"
273 if end and self.__global_end_node:
274 end.close(self.__global_end_node)
275 global_end_to_return.close(None)
276 # Prepare the nfa
277 self.__compute_epsilon_closures(start)
278 f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
279 Automaton.visit_states(set([start]), f)
280 return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
281
282 @staticmethod
283 def add_action(term, action):
284 return Term('ACTION', term, action.to_term())
285 226
286 @staticmethod 227 @staticmethod
287 def add_continue(tree, depth): 228 def add_continue(tree, depth):
288 return Term('CONTINUE', tree, depth) 229 return Term('CONTINUE', tree, depth)
289 230
290 @staticmethod 231 @staticmethod
291 def unique_key(name): 232 def unique_key(name):
292 return Term('UNIQUE_KEY', name) 233 return Term('UNIQUE_KEY', name)
293 234
294 @staticmethod 235 @staticmethod
295 def join_subtree(tree, subtree_name): 236 def join_subtree(tree, subtree_name):
237 if not tree:
238 return Term('SUBGRAPH', subtree_name)
296 return Term('JOIN', tree, subtree_name) 239 return Term('JOIN', tree, subtree_name)
297 240
298 __modifer_map = { 241 __modifer_map = {
299 '+': 'ONE_OR_MORE', 242 '+': 'ONE_OR_MORE',
300 '?': 'ZERO_OR_ONE', 243 '?': 'ZERO_OR_ONE',
301 '*': 'ZERO_OR_MORE', 244 '*': 'ZERO_OR_MORE',
302 } 245 }
303 246
304 @staticmethod 247 @staticmethod
305 def apply_modifier(modifier, term): 248 def apply_modifier(modifier, term):
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
338 terms = list(NfaBuilder.__flatten_terms(terms, 'OR')) 281 terms = list(NfaBuilder.__flatten_terms(terms, 'OR'))
339 assert terms 282 assert terms
340 return terms[0] if len(terms) == 1 else Term('OR', *terms) 283 return terms[0] if len(terms) == 1 else Term('OR', *terms)
341 284
342 @staticmethod 285 @staticmethod
343 def cat_terms(terms): 286 def cat_terms(terms):
344 terms = NfaBuilder.__flatten_terms(terms, 'CAT') 287 terms = NfaBuilder.__flatten_terms(terms, 'CAT')
345 terms = list(NfaBuilder.__flatten_literals(terms)) 288 terms = list(NfaBuilder.__flatten_literals(terms))
346 assert terms 289 assert terms
347 return terms[0] if len(terms) == 1 else Term('CAT', *terms) 290 return terms[0] if len(terms) == 1 else Term('CAT', *terms)
291
292 @staticmethod
293 def nfa(encoding, character_classes, subtree_map, name):
294 self = NfaBuilder(encoding, character_classes, subtree_map)
295 (start, ends) = self.__subgraph(name)
296 # close all ends
297 end = self.__global_end
298 end.close(None)
299 # TODO(dcarney): patch in default action
300 self.__patch_ends(ends, end)
301 # Prepare the nfa
302 self.__compute_epsilon_closures(start)
303 f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
304 Automaton.visit_states(set([start]), f)
305 return Nfa(self.__encoding, start, end, self.__node_number)
306
307 @staticmethod
308 def __compute_epsilon_closures(start_state):
309 def outer(node, state):
310 def inner(node, closure):
311 closure.add(node)
312 return closure
313 is_epsilon = lambda k: k == TransitionKey.epsilon()
314 state_iter = lambda node : node.state_iter(key_filter = is_epsilon)
315 edge = set(state_iter(node))
316 closure = Automaton.visit_states(
317 edge, inner, state_iter=state_iter, visit_state=set())
318 node.set_epsilon_closure(closure)
319 Automaton.visit_states(set([start_state]), outer)
320
321 @staticmethod
322 def __replace_catch_all(encoding, state):
323 catch_all = TransitionKey.unique('catch_all')
324 states = list(state.state_iter_for_key(catch_all))
325 if not states:
326 return
327 f = lambda acc, state: acc | set(state.epsilon_closure_iter())
328 reachable_states = reduce(f, states, set())
329 f = lambda acc, state: acc | set(state.key_iter())
330 keys = reduce(f, reachable_states, set())
331 keys.discard(TransitionKey.epsilon())
332 keys.discard(catch_all)
333 keys.discard(TransitionKey.unique('eos'))
334 inverse_key = TransitionKey.inverse_key(encoding, keys)
335 if not inverse_key:
336 inverse_key = TransitionKey.unique('no_match')
337 state.swap_key(catch_all, inverse_key)
338
339 @staticmethod
340 def __install_omega_transitions(start_state, default_action):
341 '''install a match transition, a backtrack transition,
342 or a default transition into all nodes'''
343 pass
OLDNEW
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698