Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Unified Diff: tools/lexer_generator/dfa_optimizer.py

Issue 85413006: Experimental parser: dfa optimization to remove transitions from keyword lexing (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/dfa_optimizer.py
diff --git a/tools/lexer_generator/dfa_optimizer.py b/tools/lexer_generator/dfa_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b2c1e5136afe0e068d212f870449f11bb4afe80
--- /dev/null
+++ b/tools/lexer_generator/dfa_optimizer.py
@@ -0,0 +1,167 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from transition_keys import TransitionKey
+from automaton import Action
+from dfa import Dfa
+
+class DfaOptimizer(object):
+
+ def __init__(self, dfa, log):
+ self.__dfa = dfa
+ self.__log = log
+
+ @staticmethod
+ def transistions_match(encoding, incoming_key, incoming_state, state):
+ keys = set(state.key_iter())
+ keys.add(incoming_key)
+ disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)
+ for key in disjoint_keys:
+ if not incoming_key.is_superset_of_key(key):
+ continue
+ transition_state = state.transition_state_for_key(key)
+ if incoming_state.transition_state_for_key(key) != transition_state:
+ return False
+ return True
+
+ def replace_tokens_with_gotos(self):
+ '''replace states with no entry action and match action of type 'token(X)'
+ with new states with entry action store_token(X) and match action
+ goto(state_id) which has (far) fewer transitions'''
+
+ dfa = self.__dfa
+ encoding = dfa.encoding()
+
+ incoming_transitions = {}
+ def build_incoming_transitions(state, visitor_state):
+ for key, transition_state in state.key_state_iter():
+ if not transition_state in incoming_transitions:
+ incoming_transitions[transition_state] = []
+ incoming_transitions[transition_state].append((key,state))
+ dfa.visit_all_states(build_incoming_transitions)
+
+ def is_replacement_candidate(state):
+ action = state.action()
+ if not action or action.entry_action() or not action.match_action():
+ return False
+ if (action.match_action()[0] == 'token' or
+ action.match_action()[0] == 'harmony_token'):
+ return True
+ return False
+
+ replacements = {}
+ for state, incoming in incoming_transitions.items():
+ if len(incoming) < 10:
+ continue
+ if not is_replacement_candidate(state):
+ continue
+ # check to see if incoming edges are matched by an outgoing edge
+ def match_filter((incoming_key, incoming_state)):
+ return (incoming_state != state and # drop self transitions
+ is_replacement_candidate(incoming_state) and
+ self.transistions_match(
+ encoding, incoming_key, incoming_state, state))
+ for incoming_key_state in incoming:
+ if not match_filter(incoming_key_state):
+ continue
+ (incoming_key, incoming_state) = incoming_key_state
+ # set this transition as to be replaced
+ if not incoming_state in replacements:
+ replacements[incoming_state] = {}
+ replacements[incoming_state][incoming_key] = state
+ # now see if we can gather other transistions into the goto
+ for key in incoming_state.key_iter():
+ if key == incoming_key:
+ continue
+ if not self.transistions_match(
+ encoding, key, incoming_state, state):
+ continue
+ # this transition can be removed
+ replacements[incoming_state][key] = None
+ if not replacements:
+ return
+ # perform replacement
+ states = {}
+ name = lambda state : str(state.node_number())
+ counters = {'removals' : 0, 'gotos' : 0}
+ store_states = set([])
+ # generate a store action to replace an existing action
+ def replacement_action(old_action, match_action):
+ assert not old_action.entry_action()
+ assert old_action.match_action()
+ if old_action.match_action()[0] == 'token':
+ entry_action = ('store_token', old_action.match_action()[1])
+ elif old_action.match_action()[0] == 'harmony_token':
+ entry_action = ('store_harmony_token', old_action.match_action()[1])
+ else:
+ raise Exception(old_action.match_action())
+ return Action(entry_action, match_action, old_action.precedence())
+ # map the old state to the new state, with fewer transitions and
+ # goto actions
+ def replace_state(state, acc):
+ new_state = {
+ 'transitions' : {},
+ 'terminal' : state in self.__dfa.terminal_set(),
+ 'action' : state.action(),
+ }
+ states[name(state)] = new_state
+ state_replacements = replacements[state] if state in replacements else {}
+ for transition_key, transition_state in state.transitions().items():
+ if not transition_key in state_replacements:
+ new_state['transitions'][transition_key] = name(transition_state)
+ continue
+ replacement = state_replacements[transition_key]
+ del state_replacements[transition_key]
+ # just drop the transition
+ if replacement == None:
+ counters['removals'] += 1
+ continue
+ # do goto replacement
+ counters['gotos'] += 1
+ match_action = ('goto', name(transition_state))
+ new_state['action'] = replacement_action(state.action(), match_action)
+ # will need to patch up transition_state
+ store_states.add(name(transition_state))
+ assert not state_replacements
+ self.__dfa.visit_all_states(replace_state)
+ # now patch up all states with stores
+ for state in store_states:
+ old_action = states[state]['action']
+ match_action = ('do_stored_token', None)
+ states[state]['action'] = replacement_action(old_action, match_action)
+ start_name = name(self.__dfa.start_state())
+ if self.__log:
+ print 'gotos inserted %s' % counters['gotos']
+ print 'transitions removed %s' % counters['removals']
+ print 'do_stored_token actions added %s' % len(store_states)
+ self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)
+
+ @staticmethod
+ def optimize(dfa, log):
+ optimizer = DfaOptimizer(dfa, log)
+ optimizer.replace_tokens_with_gotos()
+ return optimizer.__dfa
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698