tools/lexer_generator/dfa_optimizer.py - Issue 85413006: Experimental parser: dfa optimization to remove transitions from keyword lexing

Unified Diff: tools/lexer_generator/dfa_optimizer.py

Issue 85413006: Experimental parser: dfa optimization to remove transitions from keyword lexing (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/dfa_optimizer.py

diff --git a/tools/lexer_generator/dfa_optimizer.py b/tools/lexer_generator/dfa_optimizer.py

new file mode 100644

index 0000000000000000000000000000000000000000..0b2c1e5136afe0e068d212f870449f11bb4afe80

--- /dev/null

+++ b/tools/lexer_generator/dfa_optimizer.py

@@ -0,0 +1,167 @@

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions are

+# met:

+# * Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# * Redistributions in binary form must reproduce the above

+# copyright notice, this list of conditions and the following

+# disclaimer in the documentation and/or other materials provided

+# with the distribution.

+# * Neither the name of Google Inc. nor the names of its

+# contributors may be used to endorse or promote products derived

+# from this software without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+from transition_keys import TransitionKey

+from automaton import Action

+from dfa import Dfa

+class DfaOptimizer(object):

+ def __init__(self, dfa, log):

+ self.__dfa = dfa

+ self.__log = log

+ @staticmethod

+ def transistions_match(encoding, incoming_key, incoming_state, state):

+ keys = set(state.key_iter())

+ keys.add(incoming_key)

+ disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)

+ for key in disjoint_keys:

+ if not incoming_key.is_superset_of_key(key):

+ continue

+ transition_state = state.transition_state_for_key(key)

+ if incoming_state.transition_state_for_key(key) != transition_state:

+ return False

+ return True

+ def replace_tokens_with_gotos(self):

+ '''replace states with no entry action and match action of type 'token(X)'

+ with new states with entry action store_token(X) and match action

+ goto(state_id) which has (far) fewer transitions'''

+ dfa = self.__dfa

+ encoding = dfa.encoding()

+ incoming_transitions = {}

+ def build_incoming_transitions(state, visitor_state):

+ for key, transition_state in state.key_state_iter():

+ if not transition_state in incoming_transitions:

+ incoming_transitions[transition_state] = []

+ incoming_transitions[transition_state].append((key,state))

+ dfa.visit_all_states(build_incoming_transitions)

+ def is_replacement_candidate(state):

+ action = state.action()

+ if not action or action.entry_action() or not action.match_action():

+ return False

+ if (action.match_action()[0] == 'token' or

+ action.match_action()[0] == 'harmony_token'):

+ return True

+ return False

+ replacements = {}

+ for state, incoming in incoming_transitions.items():

+ if len(incoming) < 10:

+ continue

+ if not is_replacement_candidate(state):

+ continue

+ # check to see if incoming edges are matched by an outgoing edge

+ def match_filter((incoming_key, incoming_state)):

+ return (incoming_state != state and # drop self transitions

+ is_replacement_candidate(incoming_state) and

+ self.transistions_match(

+ encoding, incoming_key, incoming_state, state))

+ for incoming_key_state in incoming:

+ if not match_filter(incoming_key_state):

+ continue

+ (incoming_key, incoming_state) = incoming_key_state

+ # set this transition as to be replaced

+ if not incoming_state in replacements:

+ replacements[incoming_state] = {}

+ replacements[incoming_state][incoming_key] = state

+ # now see if we can gather other transistions into the goto

+ for key in incoming_state.key_iter():

+ if key == incoming_key:

+ continue

+ if not self.transistions_match(

+ encoding, key, incoming_state, state):

+ continue

+ # this transition can be removed

+ replacements[incoming_state][key] = None

+ if not replacements:

+ return

+ # perform replacement

+ states = {}

+ name = lambda state : str(state.node_number())

+ counters = {'removals' : 0, 'gotos' : 0}

+ store_states = set([])

+ # generate a store action to replace an existing action

+ def replacement_action(old_action, match_action):

+ assert not old_action.entry_action()

+ assert old_action.match_action()

+ if old_action.match_action()[0] == 'token':

+ entry_action = ('store_token', old_action.match_action()[1])

+ elif old_action.match_action()[0] == 'harmony_token':

+ entry_action = ('store_harmony_token', old_action.match_action()[1])

+ else:

+ raise Exception(old_action.match_action())

+ return Action(entry_action, match_action, old_action.precedence())

+ # map the old state to the new state, with fewer transitions and

+ # goto actions

+ def replace_state(state, acc):

+ new_state = {

+ 'transitions' : {},

+ 'terminal' : state in self.__dfa.terminal_set(),

+ 'action' : state.action(),

+ }

+ states[name(state)] = new_state

+ state_replacements = replacements[state] if state in replacements else {}

+ for transition_key, transition_state in state.transitions().items():

+ if not transition_key in state_replacements:

+ new_state['transitions'][transition_key] = name(transition_state)

+ continue

+ replacement = state_replacements[transition_key]

+ del state_replacements[transition_key]

+ # just drop the transition

+ if replacement == None:

+ counters['removals'] += 1

+ continue

+ # do goto replacement

+ counters['gotos'] += 1

+ match_action = ('goto', name(transition_state))

+ new_state['action'] = replacement_action(state.action(), match_action)

+ # will need to patch up transition_state

+ store_states.add(name(transition_state))

+ assert not state_replacements

+ self.__dfa.visit_all_states(replace_state)

+ # now patch up all states with stores

+ for state in store_states:

+ old_action = states[state]['action']

+ match_action = ('do_stored_token', None)

+ states[state]['action'] = replacement_action(old_action, match_action)

+ start_name = name(self.__dfa.start_state())

+ if self.__log:

+ print 'gotos inserted %s' % counters['gotos']

+ print 'transitions removed %s' % counters['removals']

+ print 'do_stored_token actions added %s' % len(store_states)

+ self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)

+ @staticmethod

+ def optimize(dfa, log):

+ optimizer = DfaOptimizer(dfa, log)

+ optimizer.replace_tokens_with_gotos()

+ return optimizer.__dfa

« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »