Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Unified Diff: tools/lexer_generator/dfa_optimizer.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/dfa_optimizer.py
diff --git a/tools/lexer_generator/dfa_optimizer.py b/tools/lexer_generator/dfa_optimizer.py
index c13f4a9c789092c58b7ca41845d0129f13ec02cf..6aa4a4f645171bea1eab3f906b5c0e5911407f6b 100644
--- a/tools/lexer_generator/dfa_optimizer.py
+++ b/tools/lexer_generator/dfa_optimizer.py
@@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from transition_keys import TransitionKey
-from automaton import Term, Action
+from automaton import Term, Action, Automaton
from dfa import Dfa
# --- Optimization: Replacing tokens with gotos ---
@@ -143,7 +143,7 @@ class DfaOptimizer(object):
self.__log = log
@staticmethod
- def transistions_match(encoding, incoming_key, incoming_state, state):
+ def __transistions_match(encoding, incoming_key, incoming_state, state):
keys = set(state.key_iter())
keys.add(incoming_key)
disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)
@@ -155,146 +155,229 @@ class DfaOptimizer(object):
return False
return True
- def replace_tokens_with_gotos(self):
- '''replace states with no entry action and match action of type 'token(X)'
- with new states with entry action store_token(X) and match action
- goto(state_id) which has (far) fewer transitions'''
-
- dfa = self.__dfa
- encoding = dfa.encoding()
-
+ @staticmethod
+ def __build_incoming_transitions(dfa):
incoming_transitions = {}
- def build_incoming_transitions(state, visitor_state):
+ def f(state, visitor_state):
for key, transition_state in state.key_state_iter():
if not transition_state in incoming_transitions:
incoming_transitions[transition_state] = []
- incoming_transitions[transition_state].append((key,state))
- dfa.visit_all_states(build_incoming_transitions)
+ incoming_transitions[transition_state].append((key, state))
+ dfa.visit_all_states(f)
+ return incoming_transitions
- def is_replacement_candidate(state):
- return (state.action().match_action().name() == 'token' or
- state.action().match_action().name() == 'harmony_token')
+ @staticmethod
+ def __match_action(state):
+ state = state.omega_transition()
+ state = state if state and state.transition_count() == 0 else None
+ return Action.empty_action() if not state else state.action()
+ @staticmethod
+ def __build_replacement_map(dfa):
replacements = {}
+ encoding = dfa.encoding()
+ incoming_transitions = DfaOptimizer.__build_incoming_transitions(dfa)
+ replacement_targets = set([])
+ # TODO(dcarney): this should be an ordered walk
for state, incoming in incoming_transitions.items():
if len(incoming) < 10:
continue
- if not is_replacement_candidate(state):
+ # the states action will be replaced, so we just don't optimize if there
+ # is one
+ if state.action(): # TODO(dcarney): allow this via action chaining
+ continue
+ # We only perform this optimization on 'token' actions
+ match_action = DfaOptimizer.__match_action(state)
+ if not match_action.name() == 'token':
continue
- # check to see if incoming edges are matched by an outgoing edge
- def match_filter((incoming_key, incoming_state)):
- return (incoming_state != state and # drop self transitions
- is_replacement_candidate(incoming_state) and
- self.transistions_match(
- encoding, incoming_key, incoming_state, state))
- for incoming_key_state in incoming:
- if not match_filter(incoming_key_state):
+ assert state.omega_transition() in dfa.terminal_set()
+ # we can drop incoming edges for states with a match action
+ # of either 'token' or 'harmony_token'
+ def is_replacement_candidate(state):
+ action = DfaOptimizer.__match_action(state)
+ return action.name() == 'token' or action.name() == 'harmony_token'
+ for (incoming_key, incoming_state) in incoming:
+ # check to see if incoming edges are matched by an outgoing edge
+ if (incoming_state == state or # drop self edges
+ incoming_key == TransitionKey.omega() or
+ not is_replacement_candidate(incoming_state) or
+ not DfaOptimizer.__transistions_match(
+ encoding, incoming_key, incoming_state, state)):
continue
- (incoming_key, incoming_state) = incoming_key_state
- # set this transition as to be replaced
+ assert (not incoming_state in replacements or
+ replacements[incoming_state][0] == state)
+ # set this transition as to be removed
if not incoming_state in replacements:
- replacements[incoming_state] = {}
- replacements[incoming_state][incoming_key] = state
+ replacements[incoming_state] = (state, set())
+ replacements[incoming_state][1].add(incoming_key)
+ replacement_targets.add(state)
# now see if we can gather other transistions into the goto
for key in incoming_state.key_iter():
- if key == incoming_key:
- continue
- if not self.transistions_match(
- encoding, key, incoming_state, state):
- continue
- # this transition can be removed
- replacements[incoming_state][key] = None
- if not replacements:
- return
+ if (key != TransitionKey.omega() and
+ key not in replacements[incoming_state][1] and
+ DfaOptimizer.__transistions_match(
+ encoding, key, incoming_state, state)):
+ # this transition can be removed
+ replacements[incoming_state][1].add(key)
+ return (replacement_targets, replacements)
+
+ @staticmethod
+ def __new_state(is_terminal, action):
+ return {
+ 'transitions' : {},
+ 'terminal' : is_terminal,
+ 'action' : action,
+ }
+
+ @staticmethod
+ def __new_state_name(state):
+ return str(state.node_number())
+
+ @staticmethod
+ def __split_target_state(state):
+ old_name = DfaOptimizer.__new_state_name(state)
+ old_match_action = DfaOptimizer.__match_action(state)
+ assert old_match_action.name() == 'token', 'unimplemented'
+ precedence = old_match_action.precedence()
+ match_action = Action(Term('do_stored_token'), precedence)
+ head_action = Action(
+ Term('store_token', old_match_action.term().args()[0]), precedence)
+ tail_action = Action(Term('no_op'), precedence)
+ head = DfaOptimizer.__new_state(False, head_action)
+ tail = DfaOptimizer.__new_state(False, tail_action)
+ match = DfaOptimizer.__new_state(True, match_action)
+ head['transitions'][TransitionKey.omega()] = old_name + '_tail'
+ tail['transitions'][TransitionKey.omega()] = old_name + '_match'
+ return (head, tail, match)
+
+ # generate a store action to replace an existing action
+ @staticmethod
+ def __replacement_action(state, transition_state):
+ old_action = DfaOptimizer.__match_action(state)
+ assert old_action
+ transition_action = DfaOptimizer.__match_action(transition_state).term()
+ if old_action.term() == transition_action:
+ # no need to store token
+ return Action.empty_action()
+ new_name = 'store_' + old_action.name()
+ return Action(
+ Term(new_name, *old_action.term().args()), old_action.precedence())
+
+ @staticmethod
+ def __apply_jump(counters, state, new_state, target_state):
+ # generate a new action for the new omega transition
+ new_action = DfaOptimizer.__replacement_action(state, target_state)
+ # determine new jump target
+ jump_target = DfaOptimizer.__new_state_name(target_state)
+ if not new_action:
+ # just jump to entry of target_state, which will be split
+ new_state['transitions'][TransitionKey.omega()] = jump_target
+ counters['goto_start'] += 1
+ return None
+ counters[new_action.name()] += 1
+ # install new match state
+ match_state_id = DfaOptimizer.__new_state_name(state) + '_match'
+ new_state['transitions'][TransitionKey.omega()] = match_state_id
+ match_state = DfaOptimizer.__new_state(False, new_action)
+ match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail'
+ return match_state
+
+ @staticmethod
+ def __remove_orphaned_states(states, orphanable, start_name):
+ seen = set([])
+ def visit(state_id, acc):
+ seen.add(state_id)
+ def state_iter(state_id):
+ return states[state_id]['transitions'].itervalues()
+ Automaton.visit_states(set([start_name]), visit, state_iter=state_iter)
+ def f(name, state):
+ assert name in seen or name in orphanable
+ return name in seen
+ return {k: v for k, v in states.iteritems() if f(k, v)}
+
+ def __replace_tokens_with_gotos(self):
+ '''replace states with no entry action and match action of type 'token(X)'
+ with new states with entry action store_token(X) and match action
+ goto(state_id) which has (far) fewer transitions'''
+
+ dfa = self.__dfa
+ (replacement_targets, replacements) = self.__build_replacement_map(dfa)
+ if not replacement_targets:
+ return dfa
# perform replacement
states = {}
- name = lambda state : str(state.node_number())
+ name = DfaOptimizer.__new_state_name
counters = {
'removals' : 0,
'goto_start' : 0,
- 'store_token_and_goto' : 0,
- 'store_harmony_token_and_goto' : 0,
+ 'store_token' : 0,
+ 'store_harmony_token' : 0,
+ 'split_state' : 0
}
- store_states = set([])
- # generate a store action to replace an existing action
- def replacement_action(old_action, transition_state):
- assert old_action.match_action()
- state_id = name(transition_state)
- if old_action.match_action().name() == 'token':
- old_token = old_action.match_action().args()[0]
- if (transition_state.action().match_action().name() == 'token' and
- transition_state.action().match_action().args()[0] == old_token):
- # no need to store token
- match_action = Term('goto_start', state_id)
- counters['goto_start'] += 1
- else:
- counters['store_token_and_goto'] += 1
- match_action = Term('store_token_and_goto', old_token, state_id)
- elif old_action.match_action().name() == 'harmony_token':
- new_args = list(old_action.match_action().args()) + [state_id]
- match_action = Term('store_harmony_token_and_goto', *new_args)
- counters['store_harmony_token_and_goto'] += 1
- else:
- raise Exception(old_action.match_action())
- return Action(old_action.entry_action(),
- match_action,
- old_action.precedence())
# map the old state to the new state, with fewer transitions and
# goto actions
+ orphanable = set([])
def replace_state(state, acc):
- new_state = {
- 'transitions' : {},
- 'terminal' : state in self.__dfa.terminal_set(),
- 'action' : state.action(),
- }
- states[name(state)] = new_state
- state_replacements = replacements[state] if state in replacements else {}
+ if state in replacements:
+ target_state = replacements[state][0]
+ deletions = replacements[state][1]
+ else:
+ deletions = {}
+ # this is a replacement target, so we split the state
+ if state in replacement_targets:
+ assert not deletions
+ (head, tail, match) = DfaOptimizer.__split_target_state(state)
+ new_state = tail
+ states[name(state)] = head
+ states[name(state) + '_tail'] = tail
+ states[name(state) + '_match'] = match
+ counters['split_state'] += 1
+ else:
+ new_state = self.__new_state(state in self.__dfa.terminal_set(),
+ state.action())
+ states[name(state)] = new_state
+ assert not TransitionKey.omega() in deletions
for (transition_key, transition_state) in state.key_state_iter():
- if not transition_key in state_replacements:
+ # just copy these transitions
+ if transition_key != TransitionKey.omega():
+ if not transition_key in deletions:
+ new_state['transitions'][transition_key] = name(transition_state)
+ continue
+ else:
+ # drop these transitions
+ deletions.remove(transition_key)
+ counters['removals'] += 1
+ continue
+ if transition_key in new_state['transitions']:
+ # this is a split state, omega has already been assigned
+ # mark old match state as orphanable
+ orphanable.add(name(transition_state))
+ elif not state in replacements:
+ # no replacement
new_state['transitions'][transition_key] = name(transition_state)
- continue
- replacement = state_replacements[transition_key]
- del state_replacements[transition_key]
- # just drop the transition
- if replacement == None:
- counters['removals'] += 1
- continue
- assert replacement == transition_state
- # do goto replacement
- new_state['action'] = replacement_action(state.action(), replacement)
- # will need to patch up transition_state
- store_states.add(name(transition_state))
- assert not state_replacements
+ else:
+ # mark old omega state as orphanable
+ orphanable.add(name(transition_state))
+ match_state = DfaOptimizer.__apply_jump(
+ counters, state, new_state, target_state)
+ if match_state:
+ states[name(state) + '_match'] = match_state
+ assert not deletions
self.__dfa.visit_all_states(replace_state)
- # now patch up all states with stores
start_name = name(self.__dfa.start_state())
- for state_id in store_states:
- old_action = states[state_id]['action']
- assert not old_action.entry_action()
- assert old_action.match_action().name() == 'token', 'unimplemented'
- entry_action = Term('store_token', old_action.match_action().args()[0])
- match_action = Term('do_stored_token', state_id)
- precedence = old_action.precedence()
- states[state_id]['action'] = Action(
- entry_action, match_action, precedence)
- # The state might be only reachable via gotos; make sure it's connected in
- # the state graph by adding a bogus transition from the start state. This
- # transition doens't match any character.
- states[start_name]['transitions'][
- TransitionKey.unique('no_match')] = state_id
-
+ states = self.__remove_orphaned_states(states, orphanable, start_name)
+ # dump stats
if self.__log:
print 'goto_start inserted %s' % counters['goto_start']
- print 'store_token_and_goto inserted %s' % (
- counters['store_token_and_goto'])
- print 'store_harmony_token_and_goto %s' % (
- counters['store_harmony_token_and_goto'])
+ print 'store_token inserted %s' % (
+ counters['store_token'])
+ print 'store_harmony_token %s' % (
+ counters['store_harmony_token'])
print 'transitions removed %s' % counters['removals']
- print 'do_stored_token actions added %s' % len(store_states)
- self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)
+ print 'states split %s' % counters['split_state']
+ return Dfa(self.__dfa.encoding(), start_name, states)
@staticmethod
def optimize(dfa, log):
optimizer = DfaOptimizer(dfa, log)
- optimizer.replace_tokens_with_gotos()
- return optimizer.__dfa
+ return optimizer.__replace_tokens_with_gotos()
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698