tools/lexer_generator/dfa.py - Issue 63183007: Experimental parser: dfa minimization

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 63183007: Experimental parser: dfa minimization (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

	28 from itertools import chain

28 from automaton import *	29 from automaton import *

29 from transition_keys import TransitionKey	30 from transition_keys import TransitionKey

30	31

31 class DfaState(AutomatonState):	32 class DfaState(AutomatonState):

32	33

33 def __init__(self, name, action):	34 def __init__(self, name, action):

34 super(DfaState, self).__init__()	35 super(DfaState, self).__init__()

35 self.__name = name	36 self.__name = name

36 self.__transitions = {}	37 self.__transitions = {}

37 self.__action = action	38 self.__action = action

38	39

39 def transitions_to_multiple_states(self):	40 def transitions_to_multiple_states(self):

40 return False	41 return False

41	42

42 def name(self):	43 def name(self):

43 return self.__name	44 return self.__name

44	45

45 def action(self):	46 def action(self):

46 return self.__action	47 return self.__action

47	48

48 def add_transition(self, key, state):	49 def add_transition(self, key, state):

49 assert not self.__transitions.has_key(key)	50 assert not self.__transitions.has_key(key)

50 self.__transitions[key] = state	51 self.__transitions[key] = state

51	52

52 def transitions(self):	53 def transitions(self):

53 return self.__transitions	54 return self.__transitions

54	55

	56 # TODO abstract state matching

	57 def __matches(self, match_func, value):

	58 f = lambda acc, (k, vs): acc \| set([vs]) if match_func(k, value) else acc

	59 matches = reduce(f, self.__transitions.items(), set())

	60 if not matches:

	61 return None

	62 assert len(matches) == 1

	63 return iter(matches).next()

	64

	65 def char_matches(self, value):

	66 return self.__matches(lambda k, v : k.matches_char(v), value)

	67

	68 def key_matches(self, value):

	69 return self.__matches(lambda k, v : k.matches_key(v), value)

	70

55 class Dfa(Automaton):	71 class Dfa(Automaton):

56	72

57 def __init__(self, start_name, mapping):	73 def __init__(self, start_name, mapping):

58 super(Dfa, self).__init__()	74 super(Dfa, self).__init__()

59 self.__terminal_set = set()	75 self.__terminal_set = set()

60 name_map = {}	76 name_map = {}

61 for name, node_data in mapping.items():	77 for name, node_data in mapping.items():

62 node = DfaState(name, node_data['action'])	78 node = DfaState(name, node_data['action'])

63 name_map[name] = node	79 name_map[name] = node

64 if node_data['terminal']:	80 if node_data['terminal']:

(...skipping 10 matching lines...) Expand all Loading...
75 node.add_transition(merged_key, name_map[state])	91 node.add_transition(merged_key, name_map[state])

76 self.__start = name_map[start_name]	92 self.__start = name_map[start_name]

77 self.__node_count = len(mapping)	93 self.__node_count = len(mapping)

78 self.__verify()	94 self.__verify()

79	95

80 def __verify(self):	96 def __verify(self):

81 assert self.__terminal_set	97 assert self.__terminal_set

82 state_count = self.visit_all_states(lambda state, count: count + 1, 0)	98 state_count = self.visit_all_states(lambda state, count: count + 1, 0)

83 assert self.__node_count == state_count	99 assert self.__node_count == state_count

84	100

	101 def node_count(self):

	102 return self.__node_count

	103

85 def start_state(self):	104 def start_state(self):

86 return self.__start	105 return self.__start

87	106

88 def start_set(self):	107 def start_set(self):

89 return set([self.__start])	108 return set([self.__start])

90	109

91 def terminal_set(self):	110 def terminal_set(self):

92 return set(self.__terminal_set)	111 return set(self.__terminal_set)

93	112

94 @staticmethod	113 @staticmethod

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
126 yield (state.action(), last_position, pos)	145 yield (state.action(), last_position, pos)

127 last_position = pos	146 last_position = pos

128 # lex next token	147 # lex next token

129 next = Dfa.__match_char(self.__start, c)	148 next = Dfa.__match_char(self.__start, c)

130 assert next	149 assert next

131 state = next	150 state = next

132 assert state.action() # must invoke default action here	151 assert state.action() # must invoke default action here

133 yield (state.action(), last_position, len(string))	152 yield (state.action(), last_position, len(string))

134	153

135 def minimize(self):	154 def minimize(self):

136 paritions = []	155 return DfaMinimizer(self).minimize()

137 working_set = []	156

	157 class StatePartition(object):

	158

	159 def __init__(self, node_numbers):

	160 self.__node_numbers = frozenset(iter(node_numbers))

	161 assert self.__node_numbers

	162 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)

	163

	164 def set(self):

	165 return self.__node_numbers

	166

	167 def __hash__(self):

	168 return self.__hash

	169

	170 def __eq__(self, other):

	171 return (isinstance(other, self.__class__) and

	172 self.__node_numbers == other.__node_numbers)

	173

	174 def __len__(self):

	175 return len(self.__node_numbers)

	176

	177 def __iter__(self):

	178 return self.__node_numbers.__iter__()

	179

	180 def __str__(self):

	181 return str([x for x in self.__node_numbers])

	182

	183 class DfaMinimizer:

	184

	185 def __init__(self, dfa):

	186 self.__dfa = dfa

	187

	188 def __partition(self):

138 action_map = {}	189 action_map = {}

139 id_map = {}	190 id_map = {}

	191 terminal_set = self.__dfa.terminal_set()

140 def f(state, visitor_state):	192 def f(state, visitor_state):

141 node_number = state.node_number()	193 node_number = state.node_number()

142 assert not node_number in id_map	194 assert not node_number in id_map

143 id_map[node_number] = state	195 id_map[node_number] = state

144 action = state.action()	196 action = state.action()

	197 if action:

	198 # TODO add this back

	199 # assert state in self.__terminal_set

	200 if state not in terminal_set:

	201 action = "nonterminal action " + str(action)

	202 elif state in terminal_set:

	203 action = "terminal_set"

145 if not action in action_map:	204 if not action in action_map:

146 action_map[action] = set()	205 action_map[action] = set()

147 action_map[action].add(node_number)	206 action_map[action].add(node_number)

148 self.visit_all_states(f)	207 self.__dfa.visit_all_states(f)

149 total = 0	208 partitions = set()

150 for p in action_map.values():	209 for p in action_map.values():

151 paritions.append(p)	210 assert p

152 total += len(p)	211 partitions.add(StatePartition(p))

153 assert total == self.__node_count	212 return (id_map, partitions)

154	213

	214 def __generate_alphabet(self):

	215 keys = []

	216 self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))

	217 return TransitionKey.disjoint_keys(chain(*keys))

	218

	219 @staticmethod

	220 def __find_partition(partitions, id):

	221 for p in partitions:

	222 if id in p:

	223 return p

	224 assert False

	225

	226 def __verify_partitions(self, id_map, partitions):

	227 assert len(partitions) <= len(id_map)

	228 alphabet = self.__generate_alphabet()

	229 for partition in partitions:

	230 for key in alphabet:

	231 first = True

	232 mapped_partition = None

	233 for state_id in partition:

	234 s = id_map[state_id].key_matches(key)

	235 if s:

	236 s = self.__find_partition(partitions, s.node_number())

	237 if first:

	238 first = False

	239 mapped_partition = s

	240 assert mapped_partition == s

	241

	242 @staticmethod

	243 def __partition_count(partitions):

	244 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

	245

	246 def minimize(self):

	247 (id_map, partitions) = self.__partition()

	248 node_count = self.__dfa.node_count()

	249 assert self.__partition_count(partitions) == node_count

	250 if len(partitions) == 1:

	251 return self.__dfa

	252 working_set = set(partitions)

	253 alphabet = self.__generate_alphabet()

	254 all_state_ids = set(id_map.keys())

	255 while working_set:

	256 # print "working_set %s partitions %s nodes %s" % (len(working_set),

	257 # len(partitions),

	258 # node_count)

	259 assert working_set <= partitions

	260 assert self.__partition_count(partitions) == node_count

	261 test_partition = iter(working_set).next()

	262 working_set.remove(test_partition)

	263 to_split = None

	264 for key in alphabet:

	265 # print key

	266 transition_map = {}

	267 map_into_partition = set()

	268 for state_id in all_state_ids:

	269 maps_to = id_map[state_id].key_matches(key)

	270 if maps_to and maps_to.node_number() in test_partition:

	271 map_into_partition.add(state_id)

	272 if not map_into_partition:

	273 continue

	274 new_partitions = set()

	275 for p in partitions:

	276 intersection = p.set().intersection(map_into_partition)

	277 difference = p.set().difference(map_into_partition)

	278 if not intersection or not difference:

	279 new_partitions.add(p)

	280 continue

	281 intersection = StatePartition(intersection)

	282 difference = StatePartition(difference)

	283 new_partitions.add(intersection)

	284 new_partitions.add(difference)

	285 if p in working_set:

	286 working_set.remove(p)

	287 working_set.add(intersection)

	288 working_set.add(difference)

	289 elif len(intersection) <= len(difference):

	290 working_set.add(intersection)

	291 else:

	292 working_set.add(difference)

	293 partitions = new_partitions

	294 self.__verify_partitions(id_map, partitions)

	295 if len(partitions) == len(id_map):

	296 return self.__dfa

	297 # merge partitions

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »