Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(576)

Unified Diff: tools/lexer_generator/dfa.py

Issue 69953022: Experimental parser: faster dfa minimization (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/dfa.py
diff --git a/tools/lexer_generator/dfa.py b/tools/lexer_generator/dfa.py
index 7fba14929f7255be9b8229c76f54f2b302ed7a89..3f0c50aacb6094017f4c5c41a46a719431cb347a 100644
--- a/tools/lexer_generator/dfa.py
+++ b/tools/lexer_generator/dfa.py
@@ -159,12 +159,14 @@ class StatePartition(object):
def __init__(self, node_numbers):
self.__node_numbers = node_numbers
assert self.__node_numbers
- self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
+ self.__hash = None
def set(self):
return self.__node_numbers
def __hash__(self):
+ if not self.__hash:
+ self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
return self.__hash
def __eq__(self, other):
@@ -182,17 +184,31 @@ class StatePartition(object):
class DfaMinimizer:
+ __verify = True
+
+ @staticmethod
+ def set_verify(verify):
+ DfaMinimizer.__verify = verify
+
def __init__(self, dfa):
self.__dfa = dfa
+ self.__id_map = None
+ self.__reverse_id_map = None
+ self.__alphabet = None
def __partition(self):
+ assert not self.__id_map
+ assert not self.__reverse_id_map
+ assert not self.__alphabet
action_map = {}
id_map = {}
terminal_set = self.__dfa.terminal_set()
+ all_keys = []
def f(state, visitor_state):
- node_number = len(id_map)
- id_map[node_number] = state
+ state_id = len(id_map)
+ id_map[state_id] = state
action = state.action()
+ all_keys.append(state.key_iter())
if action:
# TODO add this back
# assert state in self.__terminal_set
@@ -202,55 +218,61 @@ class DfaMinimizer:
action = "terminal_set"
if not action in action_map:
action_map[action] = set()
- action_map[action].add(node_number)
+ action_map[action].add(state_id)
self.__dfa.visit_all_states(f)
partitions = set()
for p in action_map.values():
- assert p
partitions.add(StatePartition(p))
- return (id_map, partitions)
-
- def __generate_alphabet(self):
- keys = []
- self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))
- return TransitionKey.disjoint_keys(chain(*keys))
-
- @staticmethod
- def __find_partition(partitions, id):
- for p in partitions:
- if id in p:
- return p
- assert False
-
- def __verify_partitions(self, id_map, partitions):
- assert len(partitions) <= len(id_map)
- alphabet = self.__generate_alphabet()
- for partition in partitions:
- for key in alphabet:
- first = True
- mapped_partition = None
+ reverse_id_map = {v : k for k, v in id_map.items()}
+ assert len(id_map) == len(reverse_id_map)
+ self.__reverse_id_map = reverse_id_map
+ self.__id_map = id_map
+ self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
+ # map transitions wrt alphabet mapping
+ transitions = {}
+ for state_id, state in id_map.items():
+ def f((key_id, key)):
+ transition = state.key_matches(key)
+ if transition:
+ return reverse_id_map[transition]
+ return None
+ transitions[state_id] = map(f, enumerate(self.__alphabet))
+ self.__transitions = transitions
+ # verify created structures
+ if self.__verify:
+ for partition in partitions:
for state_id in partition:
- s = id_map[state_id].key_matches(key)
- if s:
- s = self.__find_partition(partitions, s.node_number())
- if first:
- first = False
- mapped_partition = s
- assert mapped_partition == s
+ transition_array = transitions[state_id]
+ state = id_map[state_id]
+ for key_id, key in enumerate(self.__alphabet):
+ transition_id = transition_array[key_id]
+ transition_state = state.key_matches(key)
+ if not transition_state:
+ assert transition_id == None
+ else:
+ assert transition_id != None
+ assert transition_id == reverse_id_map[transition_state]
+ return partitions
@staticmethod
def __partition_count(partitions):
return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
- def __merge_partitions(self, id_map, partitions):
+ def __merge_partitions(self, partitions):
+ id_map = self.__id_map
+ reverse_id_map = self.__reverse_id_map
+ verify = self.__verify
mapping = {}
name_map = {}
+ reverse_partition_map = {}
for partition in partitions:
name_map[partition] = str(partition)
- alphabet = self.__generate_alphabet()
- reverse_id_map = {v:k for k, v in id_map.items()}
+ for state_id in partition:
+ reverse_partition_map[state_id] = partition
+ transitions = self.__transitions
for partition in partitions:
- state_id = iter(partition).next()
+ state_ids = list(partition)
+ state_id = state_ids[0]
state = id_map[state_id]
node = {
'transitions' : {},
@@ -258,45 +280,37 @@ class DfaMinimizer:
'action' : state.action(),
}
mapping[str(partition)] = node
- for key in alphabet:
- transition_state = state.key_matches(key)
- if not transition_state:
+ transition_array = transitions[state_id]
+ for key_id, key in enumerate(self.__alphabet):
+ transition_id = transition_array[key_id]
+ if transition_id == None:
+ if verify:
+ assert not state.key_matches(key)
+ for s_id in state_ids:
+ assert not id_map[s_id].key_matches(key)
continue
- transition_id = reverse_id_map[transition_state]
- transition_partition = self.__find_partition(partitions, transition_id)
+ transition_partition = reverse_partition_map[transition_id]
assert transition_partition
+ if verify:
+ for s_id in state_ids:
+ transition = id_map[s_id].key_matches(key)
+ assert transition
+ test_partition = reverse_partition_map[reverse_id_map[transition]]
+ assert test_partition == transition_partition
node['transitions'][key] = name_map[transition_partition]
start_id = reverse_id_map[self.__dfa.start_state()]
- start_name = name_map[self.__find_partition(partitions, start_id)]
+ start_name = name_map[reverse_partition_map[start_id]]
return (start_name, mapping)
def minimize(self):
- (id_map, partitions) = self.__partition()
+ partitions = self.__partition()
node_count = self.__dfa.node_count()
- assert self.__partition_count(partitions) == node_count
- if len(partitions) == 1:
- return self.__dfa
+ id_map = self.__id_map
+ reverse_id_map = self.__reverse_id_map
+ transitions = self.__transitions
+ key_range = range(0, len(self.__alphabet))
working_set = set(partitions)
- # map alphabet
- alphabet_mapping = {}
- for i, key in enumerate(self.__generate_alphabet()):
- alphabet_mapping[key] = i
- key_range = range(0, len(alphabet_mapping))
- # map transitions wrt alphabet mapping
- reverse_id_map = {v:k for k, v in id_map.items()}
- transitions = {}
- for id, state in id_map.items():
- def f((key, key_id)):
- transition = state.key_matches(key)
- if transition:
- return reverse_id_map[transition]
- return None
- transitions[id] = map(f, alphabet_mapping.items())
-
while working_set:
- # print "working_set %s partitions %s nodes %s" % (len(working_set),
- # len(partitions),
- # node_count)
assert working_set <= partitions
assert self.__partition_count(partitions) == node_count
test_partition = iter(working_set).next()
@@ -308,7 +322,7 @@ class DfaMinimizer:
map_into_partition = set()
for state_id, transition_array in transitions.items():
transition_id = transition_array[key_index]
- if transition_id and test_array[transition_id]:
+ if transition_id != None and test_array[transition_id]:
map_into_partition.add(state_id)
if not map_into_partition:
continue
@@ -338,8 +352,5 @@ class DfaMinimizer:
partitions -= old_partitions
if new_partitions:
partitions |= new_partitions
- # self.__verify_partitions(id_map, partitions)
- if len(partitions) == len(id_map):
- return self.__dfa
- (start_name, mapping) = self.__merge_partitions(id_map, partitions)
+ (start_name, mapping) = self.__merge_partitions(partitions)
return Dfa(start_name, mapping)
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698