Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: tools/lexer_generator/nfa_builder.py

Issue 144643008: Experimental parser: cleanup NfaBuilder a bit (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 23 matching lines...) Expand all
34 # 1. Build a tree of operations on rules. 34 # 1. Build a tree of operations on rules.
35 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN). 35 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN).
36 # Rule parser builds this tree by invoking static methods of NfaBuilder. 36 # Rule parser builds this tree by invoking static methods of NfaBuilder.
37 # 2. For each node, perform the operation of the node to produce an Nfa. 37 # 2. For each node, perform the operation of the node to produce an Nfa.
38 # If an operation is called X, then it is performed by the method 38 # If an operation is called X, then it is performed by the method
39 # of NfaBuilder called __x(). See __process() for mapping from 39 # of NfaBuilder called __x(). See __process() for mapping from
40 # operation to processing methods. 40 # operation to processing methods.
41 41
42 class NfaBuilder(object): 42 class NfaBuilder(object):
43 43
44 def __init__(self, encoding, character_classes): 44 def __init__(self, encoding, character_classes, subtree_map):
45 self.__node_number = 0 45 self.__node_number = 0
46 self.__operation_map = {}
47 self.__members = getmembers(self)
48 self.__encoding = encoding 46 self.__encoding = encoding
49 self.__character_classes = character_classes 47 self.__character_classes = character_classes
50 self.__states = [] 48 self.__states = []
51 self.__global_end_node = None 49 self.__global_end_node = None
50 self.__operation_map = None
51 self.__subtree_map = subtree_map
52 52
53 def __new_state(self): 53 def __new_state(self):
54 self.__node_number += 1 54 self.__node_number += 1
55 return NfaState() 55 return NfaState()
56 56
57 def __global_end(self): 57 def __global_end(self):
58 if not self.__global_end_node: 58 if not self.__global_end_node:
59 self.__global_end_node = self.__new_state() 59 self.__global_end_node = self.__new_state()
60 return self.__global_end_node 60 return self.__global_end_node
61 61
(...skipping 20 matching lines...) Expand all
82 self.__patch_ends(ends, start) 82 self.__patch_ends(ends, start)
83 return (start, [start]) 83 return (start, [start])
84 84
85 def __zero_or_one(self, subtree): 85 def __zero_or_one(self, subtree):
86 (node, ends) = self.__process(subtree) 86 (node, ends) = self.__process(subtree)
87 start = self.__new_state() 87 start = self.__new_state()
88 start.add_epsilon_transition(node) 88 start.add_epsilon_transition(node)
89 return (start, ends + [start]) 89 return (start, ends + [start])
90 90
91 def __repeat(self, param_min, param_max, subtree): 91 def __repeat(self, param_min, param_max, subtree):
92 param_min = int(param_min) 92 'process regex of form subtree{param_min, param_max}'
93 param_max = int(param_max) 93 (param_min, param_max) = (int(param_min), int(param_max))
94 assert param_min > 1 and param_min <= param_max
94 (start, ends) = self.__process(subtree) 95 (start, ends) = self.__process(subtree)
96 # create a chain of param_min subtrees
95 for i in xrange(1, param_min): 97 for i in xrange(1, param_min):
96 (start2, ends2) = self.__process(subtree) 98 (start2, ends2) = self.__process(subtree)
97 self.__patch_ends(ends, start2) 99 self.__patch_ends(ends, start2)
98 ends = ends2 100 ends = ends2
99 if param_min == param_max: 101 if param_min == param_max:
100 return (start, ends) 102 return (start, ends)
101 103 # join in (param_max - param_min) optional subtrees
102 midpoints = [] 104 midpoints = []
103 for i in xrange(param_min, param_max): 105 for i in xrange(param_min, param_max):
104 midpoint = self.__new_state() 106 midpoint = self.__new_state()
105 self.__patch_ends(ends, midpoint) 107 self.__patch_ends(ends, midpoint)
106 (start2, ends) = self.__process(subtree) 108 (start2, ends) = self.__process(subtree)
107 midpoint.add_epsilon_transition(start2) 109 midpoint.add_epsilon_transition(start2)
108 midpoints.append(midpoint) 110 midpoints.append(midpoint)
109
110 return (start, ends + midpoints) 111 return (start, ends + midpoints)
111 112
112 def __cat(self, left_tree, right_tree): 113 def __cat(self, left_tree, right_tree):
113 (left, right) = (self.__process(left_tree), self.__process(right_tree)) 114 (left, right) = (self.__process(left_tree), self.__process(right_tree))
114 self.__patch_ends(left[1], right[0]) 115 self.__patch_ends(left[1], right[0])
115 return (left[0], right[1]) 116 return (left[0], right[1])
116 117
117 def __key_state(self, key): 118 def __key_state(self, key):
118 state = self.__new_state() 119 state = self.__new_state()
119 state.add_unclosed_transition(key) 120 state.add_unclosed_transition(key)
(...skipping 13 matching lines...) Expand all
133 134
134 def __any(self): 135 def __any(self):
135 return self.__key_state(TransitionKey.any(self.__encoding)) 136 return self.__key_state(TransitionKey.any(self.__encoding))
136 137
137 def __action(self, subtree, action_term): 138 def __action(self, subtree, action_term):
138 (start, ends) = self.__process(subtree) 139 (start, ends) = self.__process(subtree)
139 action = Action.from_term(action_term) 140 action = Action.from_term(action_term)
140 end = self.__new_state() 141 end = self.__new_state()
141 self.__patch_ends(ends, end) 142 self.__patch_ends(ends, end)
142 end.set_action(action) 143 end.set_action(action)
144 # Force all match actions to be terminal.
143 if action.match_action(): 145 if action.match_action():
144 global_end = self.__global_end() 146 global_end = self.__global_end()
145 end.add_epsilon_transition(global_end) 147 end.add_epsilon_transition(global_end)
146 return (start, [end]) 148 return (start, [end])
147 149
148 def __continue(self, subtree): 150 def __continue(self, subtree):
151 'add an epsilon transitions to the start node of the current subtree'
149 (start, ends) = self.__process(subtree) 152 (start, ends) = self.__process(subtree)
150 state = self.__peek_state() 153 state = self.__peek_state()
151 if not state['start_node']: 154 if not state['start_node']:
152 state['start_node'] = self.__new_state() 155 state['start_node'] = self.__new_state()
153 self.__patch_ends(ends, state['start_node']) 156 self.__patch_ends(ends, state['start_node'])
154 return (start, []) 157 return (start, [])
155 158
156 def __unique_key(self, name): 159 def __unique_key(self, name):
157 return self.__key_state(TransitionKey.unique(name)) 160 return self.__key_state(TransitionKey.unique(name))
158 161
159 def __join(self, tree, subtree): 162 def __join(self, tree, name):
160 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(subtree) 163 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
161 (start, ends) = self.__process(tree) 164 (start, ends) = self.__process(tree)
162 self.__patch_ends(ends, subtree_start) 165 self.__patch_ends(ends, subtree_start)
163 if subtree_end: 166 if subtree_end:
164 return (start, [subtree_end]) 167 return (start, [subtree_end])
165 else: 168 else:
166 return (start, []) 169 return (start, [])
167 170
171 def __get_method(self, name):
172 'lazily build map of all private bound methods and return the one requested'
173 if not self.__operation_map:
174 prefix = "_NfaBuilder__"
175 self.__operation_map = {name[len(prefix):] : f for (name, f) in
176 filter(lambda (name, f): name.startswith(prefix), getmembers(self))}
177 return self.__operation_map[name]
178
168 def __process(self, term): 179 def __process(self, term):
169 assert isinstance(term, Term) 180 assert isinstance(term, Term)
170 method = "_NfaBuilder__" + term.name().lower() 181 method = self.__get_method(term.name().lower())
171 if not method in self.__operation_map: 182 return method(*term.args())
172 matches = filter(lambda (name, func): name == method, self.__members)
173 assert len(matches) == 1
174 self.__operation_map[method] = matches[0][1]
175 return self.__operation_map[method](*term.args())
176 183
177 def __patch_ends(self, ends, new_end): 184 def __patch_ends(self, ends, new_end):
178 for end in ends: 185 for end in ends:
179 end.close(new_end) 186 end.close(new_end)
180 187
181 def __push_state(self): 188 def __push_state(self, name):
189 for state in self.__states:
190 assert state['name'] != name, 'recursive subgraph'
182 self.__states.append({ 191 self.__states.append({
183 'start_node' : None, 192 'start_node' : None,
193 'name' : name
184 }) 194 })
185 195
186 def __pop_state(self): 196 def __pop_state(self):
187 return self.__states.pop() 197 return self.__states.pop()
188 198
189 def __peek_state(self): 199 def __peek_state(self):
190 return self.__states[-1] 200 return self.__states[-1]
191 201
192 def __nfa(self, term): 202 def __nfa(self, name):
203 tree = self.__subtree_map[name]
193 start_node_number = self.__node_number 204 start_node_number = self.__node_number
194 self.__push_state() 205 self.__push_state(name)
195 (start, ends) = self.__process(term) 206 (start, ends) = self.__process(tree)
196 state = self.__pop_state() 207 state = self.__pop_state()
208 # move saved start node into start
197 if state['start_node']: 209 if state['start_node']:
198 state['start_node'].close(start) 210 state['start_node'].close(start)
199 start = state['start_node'] 211 start = state['start_node']
200 # Don't create an end node for the subgraph if it would be unused (ends can 212 # Don't create an end node for the subgraph if it would be unused (ends can
201 # be an empty list e.g., in the case when everything inside the subgraph is 213 # be an empty list e.g., in the case when everything inside the subgraph is
202 # "continue"). 214 # "continue").
203 end = None 215 end = None
204 if ends: 216 if ends:
205 end = self.__new_state() 217 end = self.__new_state()
206 self.__patch_ends(ends, end) 218 self.__patch_ends(ends, end)
(...skipping 25 matching lines...) Expand all
232 keys = reduce(f, reachable_states, set()) 244 keys = reduce(f, reachable_states, set())
233 keys.discard(TransitionKey.epsilon()) 245 keys.discard(TransitionKey.epsilon())
234 keys.discard(catch_all) 246 keys.discard(catch_all)
235 keys.discard(TransitionKey.unique('eos')) 247 keys.discard(TransitionKey.unique('eos'))
236 inverse_key = TransitionKey.inverse_key(encoding, keys) 248 inverse_key = TransitionKey.inverse_key(encoding, keys)
237 if inverse_key: 249 if inverse_key:
238 transitions[inverse_key] = transitions[catch_all] 250 transitions[inverse_key] = transitions[catch_all]
239 del transitions[catch_all] 251 del transitions[catch_all]
240 252
241 @staticmethod 253 @staticmethod
242 def nfa(encoding, character_classes, rule_term): 254 def nfa(encoding, character_classes, subtree_map, name):
243 255 self = NfaBuilder(encoding, character_classes, subtree_map)
244 self = NfaBuilder(encoding, character_classes) 256 (start, end, nodes_created) = self.__nfa(name)
245 (start, end, nodes_created) = self.__nfa(rule_term) 257 # close all ends
246
247 global_end_to_return = self.__global_end_node or end 258 global_end_to_return = self.__global_end_node or end
248 assert global_end_to_return 259 assert global_end_to_return, "no terminal nodes, default tree continues"
249 if end and self.__global_end_node: 260 if end and self.__global_end_node:
250 end.close(self.__global_end_node) 261 end.close(self.__global_end_node)
251
252 global_end_to_return.close(None) 262 global_end_to_return.close(None)
253 263 # Prepare the nfa
254 self.__compute_epsilon_closures(start) 264 self.__compute_epsilon_closures(start)
255 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) 265 f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
256 Automaton.visit_states(set([start]), f) 266 Automaton.visit_states(set([start]), f)
257
258 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) 267 return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
259 268
260 @staticmethod 269 @staticmethod
261 def add_action(term, action): 270 def add_action(term, action):
262 return Term('ACTION', term, action.to_term()) 271 return Term('ACTION', term, action.to_term())
263 272
264 @staticmethod 273 @staticmethod
265 def add_continue(term): 274 def add_continue(term):
266 return Term('CONTINUE', term) 275 return Term('CONTINUE', term)
267 276
268 @staticmethod 277 @staticmethod
269 def unique_key(name): 278 def unique_key(name):
270 return Term('UNIQUE_KEY', name) 279 return Term('UNIQUE_KEY', name)
271 280
272 @staticmethod 281 @staticmethod
273 def join_subgraph(tree, subtree): 282 def join_subtree(tree, subtree_name):
274 return Term('JOIN', tree, subtree) 283 return Term('JOIN', tree, subtree_name)
275 284
276 @staticmethod 285 @staticmethod
277 def or_terms(terms): 286 def or_terms(terms):
278 return reduce(lambda acc, g: Term('OR', acc, g), terms) 287 return reduce(lambda acc, g: Term('OR', acc, g), terms)
279 288
280 @staticmethod 289 @staticmethod
281 def cat_terms(terms): 290 def cat_terms(terms):
282 return reduce(lambda acc, g: Term('CAT', acc, g), terms) 291 return reduce(lambda acc, g: Term('CAT', acc, g), terms)
283 292
284 __modifer_map = { 293 __modifer_map = {
285 '+': 'ONE_OR_MORE', 294 '+': 'ONE_OR_MORE',
286 '?': 'ZERO_OR_ONE', 295 '?': 'ZERO_OR_ONE',
287 '*': 'ZERO_OR_MORE', 296 '*': 'ZERO_OR_MORE',
288 } 297 }
289 298
290 @staticmethod 299 @staticmethod
291 def apply_modifier(modifier, term): 300 def apply_modifier(modifier, term):
292 return Term(NfaBuilder.__modifer_map[modifier], term) 301 return Term(NfaBuilder.__modifer_map[modifier], term)
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698