Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Unified Diff: tools/lexer_generator/dfa.py

Issue 169523003: Experimental parser: split and rename some files (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/code_generator.py ('k') | tools/lexer_generator/dfa_minimizer.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/dfa.py
diff --git a/tools/lexer_generator/dfa.py b/tools/lexer_generator/dfa.py
index 5a3dd5e4b364588ef2e1eb893056661d41af8cb0..be59ac0c27020bf4647e282e6608372d88a2a9a1 100644
--- a/tools/lexer_generator/dfa.py
+++ b/tools/lexer_generator/dfa.py
@@ -27,7 +27,7 @@
from itertools import chain
from automaton import *
-from transition_keys import TransitionKey
+from transition_key import TransitionKey
class DfaState(AutomatonState):
@@ -116,278 +116,3 @@ class Dfa(Automaton):
def terminal_set(self):
return set(self.__terminal_set)
-
- def minimize(self):
- return DfaMinimizer(self).minimize()
-
-class StatePartition(object):
-
- def __init__(self, node_numbers):
- self.__node_numbers = node_numbers
- assert self.__node_numbers
- self.__hash = None
-
- def set(self):
- return self.__node_numbers
-
- def __hash__(self):
- if not self.__hash:
- self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
- return self.__hash
-
- def __eq__(self, other):
- return (isinstance(other, self.__class__) and
- self.__node_numbers == other.__node_numbers)
-
- def __len__(self):
- return len(self.__node_numbers)
-
- def __iter__(self):
- return self.__node_numbers.__iter__()
-
- def __str__(self):
- return str([x for x in self.__node_numbers])
-
-class DfaMinimizer:
-
- __verify = True
-
- @staticmethod
- def set_verify(verify):
- DfaMinimizer.__verify = verify
-
- def __init__(self, dfa):
- self.__dfa = dfa
- self.__id_to_state = None
- self.__state_to_id = None
- self.__alphabet = None
-
- def __create_initial_partitions(self):
- assert not self.__id_to_state
- assert not self.__state_to_id
- assert not self.__alphabet
-
- # For the minimization, we associate each state with an ID. A set of states
- # is represented as a set of state IDs.
- id_to_state = {}
-
- # First we partition the states into the following groups:
- # - terminal states without action
- # - nonterminal states without action
- # - one group per action: terminal states which have the action
- # - one group per action: nonterminal states which have the action
- # These are the keys of initial_partitions. The values are lists of state
- # IDs.
- initial_partitions = {}
- terminal_set = self.__dfa.terminal_set()
- all_keys = [] # Will contain all TransitionKeys in the dfa.
-
- # f fills in initial_partitions, id_to_state and all_keys.
- def f(state, visitor_state):
- state_id = len(id_to_state)
- id_to_state[state_id] = state
- action = state.action()
- all_keys.append(state.key_iter())
- if state in terminal_set:
- key = ("terminal set", action)
- else:
- # Match actions are valid only if we have matched the whole token, not
- # just some sub-part of it.
- # TODO(dcarney): restore
- # assert not action.is_match_action()
- key = ("nonterminal set", action)
- if not key in initial_partitions:
- initial_partitions[key] = set()
- initial_partitions[key].add(state_id)
- self.__dfa.visit_all_states(f)
- partitions = set()
- working_set = set()
-
- # Create StatePartition objects:
- for k, p in initial_partitions.items():
- p = StatePartition(p)
- partitions.add(p)
- working_set.add(p)
- state_to_id = {v : k for k, v in id_to_state.items()}
- assert len(id_to_state) == len(state_to_id)
- self.__state_to_id = state_to_id
- self.__id_to_state = id_to_state
-
- # The alphabet defines the TransitionKeys we need to check when dedicing if
- # two states of the dfa can be in the same partition. See the doc string of
- # TransitionKey.disjoint_keys.
-
- # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is
- # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with
- # TransitionKey [a-d], and state S3 has a transition to S4 with
- # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will
- # become clear when we check the transition for TransitionKey [c-d] (S1 has
- # a transition to S2, S3 has a transition to S4).
- encoding = self.__dfa.encoding()
- self.__alphabet = list(
- TransitionKey.disjoint_keys(encoding, chain(*all_keys)))
-
- # For each state and each TransitionKey in the alphabet, find out which
- # transition we take from the state with the TransitionKey.
- transitions = {}
- for state_id, state in id_to_state.items():
- def f((key_id, key)):
- transition_state = state.transition_state_for_key(key)
- if transition_state:
- return state_to_id[transition_state]
- return None
- transitions[state_id] = map(f, enumerate(self.__alphabet))
- self.__transitions = transitions
- # verify created structures
- if self.__verify:
- for partition in partitions:
- for state_id in partition:
- transition_state_array = transitions[state_id]
- state = id_to_state[state_id]
- for key_id, key in enumerate(self.__alphabet):
- transition_state_id = transition_state_array[key_id]
- transition_state = state.transition_state_for_key(key)
- if not transition_state:
- assert transition_state_id == None
- else:
- assert transition_state_id != None
- assert transition_state_id == state_to_id[transition_state]
- return (working_set, partitions)
-
- @staticmethod
- def __partition_count(partitions):
- return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
-
- def __create_states_from_partitions(self, partitions):
- '''Creates a new set of states where each state corresponds to a
- partition.'''
- id_to_state = self.__id_to_state
- state_to_id = self.__state_to_id
- verify = self.__verify
- partition_to_name = {}
- state_id_to_partition = {}
-
- # Fill in partition_to_name and state_id_to_partition.
- for partition in partitions:
- partition_to_name[partition] = str(partition)
- for state_id in partition:
- state_id_to_partition[state_id] = partition
- transitions = self.__transitions
-
- # Create nodes for partitions.
- partition_name_to_node = {}
- for partition in partitions:
- state_ids = list(partition)
- # state is a representative state for the partition, and state_id is it's
- # ID. To check the transitions between partitions, it's enough to check
- # transitions from the representative state. All other states will have
- # equivalent transitions, that is, transitions which transition into the
- # same partition.
- state_id = state_ids[0]
- state = id_to_state[state_id]
- node = {
- 'transitions' : {},
- 'terminal' : state in self.__dfa.terminal_set(),
- 'action' : state.action(),
- }
- partition_name_to_node[str(partition)] = node
- transition_key_to_state_id = transitions[state_id]
- for key_id, key in enumerate(self.__alphabet):
- transition_state_id = transition_key_to_state_id[key_id]
- if transition_state_id == None:
- if verify:
- # There is no transition for that key from state; check that no
- # other states in the partition have a transition with that key
- # either.
- assert not state.transition_state_for_key(key)
- for s_id in state_ids:
- assert not id_to_state[s_id].transition_state_for_key(key)
- continue
- transition_partition = state_id_to_partition[transition_state_id]
- assert transition_partition
- if verify:
- # There is a transition for that key from state; check that all other
- # states in the partition have an equivalent transition (transition
- # into the same partition).
- for s_id in state_ids:
- transition = id_to_state[s_id].transition_state_for_key(key)
- assert transition
- test_partition = state_id_to_partition[state_to_id[transition]]
- assert test_partition == transition_partition
- # Record the transition between partitions.
- node['transitions'][key] = partition_to_name[transition_partition]
- start_id = state_to_id[self.__dfa.start_state()]
- start_name = partition_to_name[state_id_to_partition[start_id]]
- return (start_name, partition_name_to_node)
-
- def minimize(self):
- '''Minimize the dfa. For the general idea of minimizing automata, see
- http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take
- account the actions associated to stages, i.e., we cannot merge two states
- which have different actions.'''
- (working_set, partitions) = self.__create_initial_partitions()
- node_count = self.__dfa.node_count()
- id_to_state = self.__id_to_state
- state_to_id = self.__state_to_id
- # transitions is a 2-dimensional array indexed by state_id and index of the
- # TransitionKey in the alphabet.
- # transitions[state_id][transition_key_ix] = transition_state_id
- transitions = self.__transitions
- key_range = range(0, len(self.__alphabet))
- while working_set:
- assert working_set <= partitions
- assert self.__partition_count(partitions) == node_count
- # We split other partitions according to test_partition: If a partition
- # contains two states S1 and S2, such that S1 transitions to a state in
- # test_partition with a TransitionKey K, and S2 transitions to a state
- # *not* in test_partition with the same K, then S1 and S2 cannot be in the
- # same partition.
- test_partition = iter(working_set).next()
- working_set.remove(test_partition)
- state_in_test_partition = [False] * len(id_to_state)
- for state_id in test_partition:
- state_in_test_partition[state_id] = True
- for key_index in key_range:
- # states_transitioning_to_test_partition will contain the state_ids of
- # the states (across all partitions) which transition into
- # test_partition (any state within test_partition) with that key.
- states_transitioning_to_test_partition = set()
- for state_id, transition_key_to_state_id in transitions.items():
- transition_state_id = transition_key_to_state_id[key_index]
- if (transition_state_id and
- state_in_test_partition[transition_state_id]):
- states_transitioning_to_test_partition.add(state_id)
- if not states_transitioning_to_test_partition:
- continue
- # For each partition, we need to split it in two: {states which
- # transition into test_partition, states which don't}.
- new_partitions = set()
- old_partitions = set()
- for p in partitions:
- intersection = p.set().intersection(
- states_transitioning_to_test_partition)
- if not intersection:
- continue
- difference = p.set().difference(
- states_transitioning_to_test_partition)
- if not difference:
- continue
- intersection = StatePartition(intersection)
- difference = StatePartition(difference)
- old_partitions.add(p)
- new_partitions.add(intersection)
- new_partitions.add(difference)
- if p in working_set:
- working_set.remove(p)
- working_set.add(intersection)
- working_set.add(difference)
- elif len(intersection) <= len(difference):
- working_set.add(intersection)
- else:
- working_set.add(difference)
- if old_partitions:
- partitions -= old_partitions
- if new_partitions:
- partitions |= new_partitions
- (start_name, mapping) = self.__create_states_from_partitions(partitions)
- return Dfa(self.__dfa.encoding(), start_name, mapping)
« no previous file with comments | « tools/lexer_generator/code_generator.py ('k') | tools/lexer_generator/dfa_minimizer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698