Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1346)

Side by Side Diff: tools/lexer_generator/dfa_optimizer.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from transition_keys import TransitionKey 28 from transition_keys import TransitionKey
29 from automaton import Term, Action 29 from automaton import Term, Action, Automaton
30 from dfa import Dfa 30 from dfa import Dfa
31 31
32 # --- Optimization: Replacing tokens with gotos --- 32 # --- Optimization: Replacing tokens with gotos ---
33 # 33 #
34 # This optimization reduces the code size for grammars which have constructs 34 # This optimization reduces the code size for grammars which have constructs
35 # similar to keywords and identifiers. Consider the following grammar: 35 # similar to keywords and identifiers. Consider the following grammar:
36 # 36 #
37 # "baz" <|token(BAZ)|> 37 # "baz" <|token(BAZ)|>
38 # "bazz" <|token(BAZZ)|> 38 # "bazz" <|token(BAZZ)|>
39 # 39 #
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the 136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the
137 # transition from s2 is defined for the superkey, but that is ok. 137 # transition from s2 is defined for the superkey, but that is ok.
138 138
139 class DfaOptimizer(object): 139 class DfaOptimizer(object):
140 140
141 def __init__(self, dfa, log): 141 def __init__(self, dfa, log):
142 self.__dfa = dfa 142 self.__dfa = dfa
143 self.__log = log 143 self.__log = log
144 144
145 @staticmethod 145 @staticmethod
146 def transistions_match(encoding, incoming_key, incoming_state, state): 146 def __transistions_match(encoding, incoming_key, incoming_state, state):
147 keys = set(state.key_iter()) 147 keys = set(state.key_iter())
148 keys.add(incoming_key) 148 keys.add(incoming_key)
149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys) 149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)
150 for key in disjoint_keys: 150 for key in disjoint_keys:
151 if not incoming_key.is_superset_of_key(key): 151 if not incoming_key.is_superset_of_key(key):
152 continue 152 continue
153 transition_state = state.transition_state_for_key(key) 153 transition_state = state.transition_state_for_key(key)
154 if incoming_state.transition_state_for_key(key) != transition_state: 154 if incoming_state.transition_state_for_key(key) != transition_state:
155 return False 155 return False
156 return True 156 return True
157 157
158 def replace_tokens_with_gotos(self): 158 @staticmethod
159 def __build_incoming_transitions(dfa):
160 incoming_transitions = {}
161 def f(state, visitor_state):
162 for key, transition_state in state.key_state_iter():
163 if not transition_state in incoming_transitions:
164 incoming_transitions[transition_state] = []
165 incoming_transitions[transition_state].append((key, state))
166 dfa.visit_all_states(f)
167 return incoming_transitions
168
169 @staticmethod
170 def __match_action(state):
171 state = state.omega_transition()
172 state = state if state and state.transition_count() == 0 else None
173 return Action.empty_action() if not state else state.action()
174
175 @staticmethod
176 def __build_replacement_map(dfa):
177 replacements = {}
178 encoding = dfa.encoding()
179 incoming_transitions = DfaOptimizer.__build_incoming_transitions(dfa)
180 replacement_targets = set([])
181 # TODO(dcarney): this should be an ordered walk
182 for state, incoming in incoming_transitions.items():
183 if len(incoming) < 10:
184 continue
185 # the states action will be replaced, so we just don't optimize if there
186 # is one
187 if state.action(): # TODO(dcarney): allow this via action chaining
188 continue
189 # We only perform this optimization on 'token' actions
190 match_action = DfaOptimizer.__match_action(state)
191 if not match_action.name() == 'token':
192 continue
193 assert state.omega_transition() in dfa.terminal_set()
194 # we can drop incoming edges for states with a match action
195 # of either 'token' or 'harmony_token'
196 def is_replacement_candidate(state):
197 action = DfaOptimizer.__match_action(state)
198 return action.name() == 'token' or action.name() == 'harmony_token'
199 for (incoming_key, incoming_state) in incoming:
200 # check to see if incoming edges are matched by an outgoing edge
201 if (incoming_state == state or # drop self edges
202 incoming_key == TransitionKey.omega() or
203 not is_replacement_candidate(incoming_state) or
204 not DfaOptimizer.__transistions_match(
205 encoding, incoming_key, incoming_state, state)):
206 continue
207 assert (not incoming_state in replacements or
208 replacements[incoming_state][0] == state)
209 # set this transition as to be removed
210 if not incoming_state in replacements:
211 replacements[incoming_state] = (state, set())
212 replacements[incoming_state][1].add(incoming_key)
213 replacement_targets.add(state)
214 # now see if we can gather other transistions into the goto
215 for key in incoming_state.key_iter():
216 if (key != TransitionKey.omega() and
217 key not in replacements[incoming_state][1] and
218 DfaOptimizer.__transistions_match(
219 encoding, key, incoming_state, state)):
220 # this transition can be removed
221 replacements[incoming_state][1].add(key)
222 return (replacement_targets, replacements)
223
224 @staticmethod
225 def __new_state(is_terminal, action):
226 return {
227 'transitions' : {},
228 'terminal' : is_terminal,
229 'action' : action,
230 }
231
232 @staticmethod
233 def __new_state_name(state):
234 return str(state.node_number())
235
236 @staticmethod
237 def __split_target_state(state):
238 old_name = DfaOptimizer.__new_state_name(state)
239 old_match_action = DfaOptimizer.__match_action(state)
240 assert old_match_action.name() == 'token', 'unimplemented'
241 precedence = old_match_action.precedence()
242 match_action = Action(Term('do_stored_token'), precedence)
243 head_action = Action(
244 Term('store_token', old_match_action.term().args()[0]), precedence)
245 tail_action = Action(Term('no_op'), precedence)
246 head = DfaOptimizer.__new_state(False, head_action)
247 tail = DfaOptimizer.__new_state(False, tail_action)
248 match = DfaOptimizer.__new_state(True, match_action)
249 head['transitions'][TransitionKey.omega()] = old_name + '_tail'
250 tail['transitions'][TransitionKey.omega()] = old_name + '_match'
251 return (head, tail, match)
252
253 # generate a store action to replace an existing action
254 @staticmethod
255 def __replacement_action(state, transition_state):
256 old_action = DfaOptimizer.__match_action(state)
257 assert old_action
258 transition_action = DfaOptimizer.__match_action(transition_state).term()
259 if old_action.term() == transition_action:
260 # no need to store token
261 return Action.empty_action()
262 new_name = 'store_' + old_action.name()
263 return Action(
264 Term(new_name, *old_action.term().args()), old_action.precedence())
265
266 @staticmethod
267 def __apply_jump(counters, state, new_state, target_state):
268 # generate a new action for the new omega transition
269 new_action = DfaOptimizer.__replacement_action(state, target_state)
270 # determine new jump target
271 jump_target = DfaOptimizer.__new_state_name(target_state)
272 if not new_action:
273 # just jump to entry of target_state, which will be split
274 new_state['transitions'][TransitionKey.omega()] = jump_target
275 counters['goto_start'] += 1
276 return None
277 counters[new_action.name()] += 1
278 # install new match state
279 match_state_id = DfaOptimizer.__new_state_name(state) + '_match'
280 new_state['transitions'][TransitionKey.omega()] = match_state_id
281 match_state = DfaOptimizer.__new_state(False, new_action)
282 match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail'
283 return match_state
284
285 @staticmethod
286 def __remove_orphaned_states(states, orphanable, start_name):
287 seen = set([])
288 def visit(state_id, acc):
289 seen.add(state_id)
290 def state_iter(state_id):
291 return states[state_id]['transitions'].itervalues()
292 Automaton.visit_states(set([start_name]), visit, state_iter=state_iter)
293 def f(name, state):
294 assert name in seen or name in orphanable
295 return name in seen
296 return {k: v for k, v in states.iteritems() if f(k, v)}
297
298 def __replace_tokens_with_gotos(self):
159 '''replace states with no entry action and match action of type 'token(X)' 299 '''replace states with no entry action and match action of type 'token(X)'
160 with new states with entry action store_token(X) and match action 300 with new states with entry action store_token(X) and match action
161 goto(state_id) which has (far) fewer transitions''' 301 goto(state_id) which has (far) fewer transitions'''
162 302
163 dfa = self.__dfa 303 dfa = self.__dfa
164 encoding = dfa.encoding() 304 (replacement_targets, replacements) = self.__build_replacement_map(dfa)
165 305 if not replacement_targets:
166 incoming_transitions = {} 306 return dfa
167 def build_incoming_transitions(state, visitor_state):
168 for key, transition_state in state.key_state_iter():
169 if not transition_state in incoming_transitions:
170 incoming_transitions[transition_state] = []
171 incoming_transitions[transition_state].append((key,state))
172 dfa.visit_all_states(build_incoming_transitions)
173
174 def is_replacement_candidate(state):
175 return (state.action().match_action().name() == 'token' or
176 state.action().match_action().name() == 'harmony_token')
177
178 replacements = {}
179 for state, incoming in incoming_transitions.items():
180 if len(incoming) < 10:
181 continue
182 if not is_replacement_candidate(state):
183 continue
184 # check to see if incoming edges are matched by an outgoing edge
185 def match_filter((incoming_key, incoming_state)):
186 return (incoming_state != state and # drop self transitions
187 is_replacement_candidate(incoming_state) and
188 self.transistions_match(
189 encoding, incoming_key, incoming_state, state))
190 for incoming_key_state in incoming:
191 if not match_filter(incoming_key_state):
192 continue
193 (incoming_key, incoming_state) = incoming_key_state
194 # set this transition as to be replaced
195 if not incoming_state in replacements:
196 replacements[incoming_state] = {}
197 replacements[incoming_state][incoming_key] = state
198 # now see if we can gather other transistions into the goto
199 for key in incoming_state.key_iter():
200 if key == incoming_key:
201 continue
202 if not self.transistions_match(
203 encoding, key, incoming_state, state):
204 continue
205 # this transition can be removed
206 replacements[incoming_state][key] = None
207 if not replacements:
208 return
209 # perform replacement 307 # perform replacement
210 states = {} 308 states = {}
211 name = lambda state : str(state.node_number()) 309 name = DfaOptimizer.__new_state_name
212 counters = { 310 counters = {
213 'removals' : 0, 311 'removals' : 0,
214 'goto_start' : 0, 312 'goto_start' : 0,
215 'store_token_and_goto' : 0, 313 'store_token' : 0,
216 'store_harmony_token_and_goto' : 0, 314 'store_harmony_token' : 0,
315 'split_state' : 0
217 } 316 }
218 store_states = set([])
219 # generate a store action to replace an existing action
220 def replacement_action(old_action, transition_state):
221 assert old_action.match_action()
222 state_id = name(transition_state)
223 if old_action.match_action().name() == 'token':
224 old_token = old_action.match_action().args()[0]
225 if (transition_state.action().match_action().name() == 'token' and
226 transition_state.action().match_action().args()[0] == old_token):
227 # no need to store token
228 match_action = Term('goto_start', state_id)
229 counters['goto_start'] += 1
230 else:
231 counters['store_token_and_goto'] += 1
232 match_action = Term('store_token_and_goto', old_token, state_id)
233 elif old_action.match_action().name() == 'harmony_token':
234 new_args = list(old_action.match_action().args()) + [state_id]
235 match_action = Term('store_harmony_token_and_goto', *new_args)
236 counters['store_harmony_token_and_goto'] += 1
237 else:
238 raise Exception(old_action.match_action())
239 return Action(old_action.entry_action(),
240 match_action,
241 old_action.precedence())
242 # map the old state to the new state, with fewer transitions and 317 # map the old state to the new state, with fewer transitions and
243 # goto actions 318 # goto actions
319 orphanable = set([])
244 def replace_state(state, acc): 320 def replace_state(state, acc):
245 new_state = { 321 if state in replacements:
246 'transitions' : {}, 322 target_state = replacements[state][0]
247 'terminal' : state in self.__dfa.terminal_set(), 323 deletions = replacements[state][1]
248 'action' : state.action(), 324 else:
249 } 325 deletions = {}
250 states[name(state)] = new_state 326 # this is a replacement target, so we split the state
251 state_replacements = replacements[state] if state in replacements else {} 327 if state in replacement_targets:
328 assert not deletions
329 (head, tail, match) = DfaOptimizer.__split_target_state(state)
330 new_state = tail
331 states[name(state)] = head
332 states[name(state) + '_tail'] = tail
333 states[name(state) + '_match'] = match
334 counters['split_state'] += 1
335 else:
336 new_state = self.__new_state(state in self.__dfa.terminal_set(),
337 state.action())
338 states[name(state)] = new_state
339 assert not TransitionKey.omega() in deletions
252 for (transition_key, transition_state) in state.key_state_iter(): 340 for (transition_key, transition_state) in state.key_state_iter():
253 if not transition_key in state_replacements: 341 # just copy these transitions
342 if transition_key != TransitionKey.omega():
343 if not transition_key in deletions:
344 new_state['transitions'][transition_key] = name(transition_state)
345 continue
346 else:
347 # drop these transitions
348 deletions.remove(transition_key)
349 counters['removals'] += 1
350 continue
351 if transition_key in new_state['transitions']:
352 # this is a split state, omega has already been assigned
353 # mark old match state as orphanable
354 orphanable.add(name(transition_state))
355 elif not state in replacements:
356 # no replacement
254 new_state['transitions'][transition_key] = name(transition_state) 357 new_state['transitions'][transition_key] = name(transition_state)
255 continue 358 else:
256 replacement = state_replacements[transition_key] 359 # mark old omega state as orphanable
257 del state_replacements[transition_key] 360 orphanable.add(name(transition_state))
258 # just drop the transition 361 match_state = DfaOptimizer.__apply_jump(
259 if replacement == None: 362 counters, state, new_state, target_state)
260 counters['removals'] += 1 363 if match_state:
261 continue 364 states[name(state) + '_match'] = match_state
262 assert replacement == transition_state 365 assert not deletions
263 # do goto replacement
264 new_state['action'] = replacement_action(state.action(), replacement)
265 # will need to patch up transition_state
266 store_states.add(name(transition_state))
267 assert not state_replacements
268 self.__dfa.visit_all_states(replace_state) 366 self.__dfa.visit_all_states(replace_state)
269 # now patch up all states with stores
270 start_name = name(self.__dfa.start_state()) 367 start_name = name(self.__dfa.start_state())
271 for state_id in store_states: 368 states = self.__remove_orphaned_states(states, orphanable, start_name)
272 old_action = states[state_id]['action'] 369 # dump stats
273 assert not old_action.entry_action()
274 assert old_action.match_action().name() == 'token', 'unimplemented'
275 entry_action = Term('store_token', old_action.match_action().args()[0])
276 match_action = Term('do_stored_token', state_id)
277 precedence = old_action.precedence()
278 states[state_id]['action'] = Action(
279 entry_action, match_action, precedence)
280 # The state might be only reachable via gotos; make sure it's connected in
281 # the state graph by adding a bogus transition from the start state. This
282 # transition doens't match any character.
283 states[start_name]['transitions'][
284 TransitionKey.unique('no_match')] = state_id
285
286 if self.__log: 370 if self.__log:
287 print 'goto_start inserted %s' % counters['goto_start'] 371 print 'goto_start inserted %s' % counters['goto_start']
288 print 'store_token_and_goto inserted %s' % ( 372 print 'store_token inserted %s' % (
289 counters['store_token_and_goto']) 373 counters['store_token'])
290 print 'store_harmony_token_and_goto %s' % ( 374 print 'store_harmony_token %s' % (
291 counters['store_harmony_token_and_goto']) 375 counters['store_harmony_token'])
292 print 'transitions removed %s' % counters['removals'] 376 print 'transitions removed %s' % counters['removals']
293 print 'do_stored_token actions added %s' % len(store_states) 377 print 'states split %s' % counters['split_state']
294 self.__dfa = Dfa(self.__dfa.encoding(), start_name, states) 378 return Dfa(self.__dfa.encoding(), start_name, states)
295 379
296 @staticmethod 380 @staticmethod
297 def optimize(dfa, log): 381 def optimize(dfa, log):
298 optimizer = DfaOptimizer(dfa, log) 382 optimizer = DfaOptimizer(dfa, log)
299 optimizer.replace_tokens_with_gotos() 383 return optimizer.__replace_tokens_with_gotos()
300 return optimizer.__dfa
OLDNEW
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698