Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 63183007: Experimental parser: dfa minimization (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from itertools import chain
28 from automaton import * 29 from automaton import *
29 from transition_keys import TransitionKey 30 from transition_keys import TransitionKey
30 31
31 class DfaState(AutomatonState): 32 class DfaState(AutomatonState):
32 33
33 def __init__(self, name, action): 34 def __init__(self, name, action):
34 super(DfaState, self).__init__() 35 super(DfaState, self).__init__()
35 self.__name = name 36 self.__name = name
36 self.__transitions = {} 37 self.__transitions = {}
37 self.__action = action 38 self.__action = action
38 39
39 def transitions_to_multiple_states(self): 40 def transitions_to_multiple_states(self):
40 return False 41 return False
41 42
42 def name(self): 43 def name(self):
43 return self.__name 44 return self.__name
44 45
45 def action(self): 46 def action(self):
46 return self.__action 47 return self.__action
47 48
48 def add_transition(self, key, state): 49 def add_transition(self, key, state):
49 assert not self.__transitions.has_key(key) 50 assert not self.__transitions.has_key(key)
50 self.__transitions[key] = state 51 self.__transitions[key] = state
51 52
52 def transitions(self): 53 def transitions(self):
53 return self.__transitions 54 return self.__transitions
54 55
56 # TODO abstract state matching
57 def __matches(self, match_func, value):
58 f = lambda acc, (k, vs): acc | set([vs]) if match_func(k, value) else acc
59 matches = reduce(f, self.__transitions.items(), set())
60 if not matches:
61 return None
62 assert len(matches) == 1
63 return iter(matches).next()
64
65 def char_matches(self, value):
66 return self.__matches(lambda k, v : k.matches_char(v), value)
67
68 def key_matches(self, value):
69 return self.__matches(lambda k, v : k.matches_key(v), value)
70
55 class Dfa(Automaton): 71 class Dfa(Automaton):
56 72
57 def __init__(self, start_name, mapping): 73 def __init__(self, start_name, mapping):
58 super(Dfa, self).__init__() 74 super(Dfa, self).__init__()
59 self.__terminal_set = set() 75 self.__terminal_set = set()
60 name_map = {} 76 name_map = {}
61 for name, node_data in mapping.items(): 77 for name, node_data in mapping.items():
62 node = DfaState(name, node_data['action']) 78 node = DfaState(name, node_data['action'])
63 name_map[name] = node 79 name_map[name] = node
64 if node_data['terminal']: 80 if node_data['terminal']:
(...skipping 10 matching lines...) Expand all
75 node.add_transition(merged_key, name_map[state]) 91 node.add_transition(merged_key, name_map[state])
76 self.__start = name_map[start_name] 92 self.__start = name_map[start_name]
77 self.__node_count = len(mapping) 93 self.__node_count = len(mapping)
78 self.__verify() 94 self.__verify()
79 95
80 def __verify(self): 96 def __verify(self):
81 assert self.__terminal_set 97 assert self.__terminal_set
82 state_count = self.visit_all_states(lambda state, count: count + 1, 0) 98 state_count = self.visit_all_states(lambda state, count: count + 1, 0)
83 assert self.__node_count == state_count 99 assert self.__node_count == state_count
84 100
101 def node_count(self):
102 return self.__node_count
103
85 def start_state(self): 104 def start_state(self):
86 return self.__start 105 return self.__start
87 106
88 def start_set(self): 107 def start_set(self):
89 return set([self.__start]) 108 return set([self.__start])
90 109
91 def terminal_set(self): 110 def terminal_set(self):
92 return set(self.__terminal_set) 111 return set(self.__terminal_set)
93 112
94 @staticmethod 113 @staticmethod
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 yield (state.action(), last_position, pos) 145 yield (state.action(), last_position, pos)
127 last_position = pos 146 last_position = pos
128 # lex next token 147 # lex next token
129 next = Dfa.__match_char(self.__start, c) 148 next = Dfa.__match_char(self.__start, c)
130 assert next 149 assert next
131 state = next 150 state = next
132 assert state.action() # must invoke default action here 151 assert state.action() # must invoke default action here
133 yield (state.action(), last_position, len(string)) 152 yield (state.action(), last_position, len(string))
134 153
135 def minimize(self): 154 def minimize(self):
136 paritions = [] 155 return DfaMinimizer(self).minimize()
137 working_set = [] 156
157 class StatePartition(object):
158
159 def __init__(self, node_numbers):
160 self.__node_numbers = frozenset(iter(node_numbers))
161 assert self.__node_numbers
162 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
163
164 def set(self):
165 return self.__node_numbers
166
167 def __hash__(self):
168 return self.__hash
169
170 def __eq__(self, other):
171 return (isinstance(other, self.__class__) and
172 self.__node_numbers == other.__node_numbers)
173
174 def __len__(self):
175 return len(self.__node_numbers)
176
177 def __iter__(self):
178 return self.__node_numbers.__iter__()
179
180 def __str__(self):
181 return str([x for x in self.__node_numbers])
182
183 class DfaMinimizer:
184
185 def __init__(self, dfa):
186 self.__dfa = dfa
187
188 def __partition(self):
138 action_map = {} 189 action_map = {}
139 id_map = {} 190 id_map = {}
191 terminal_set = self.__dfa.terminal_set()
140 def f(state, visitor_state): 192 def f(state, visitor_state):
141 node_number = state.node_number() 193 node_number = state.node_number()
142 assert not node_number in id_map 194 assert not node_number in id_map
143 id_map[node_number] = state 195 id_map[node_number] = state
144 action = state.action() 196 action = state.action()
197 if action:
198 # TODO add this back
199 # assert state in self.__terminal_set
200 if state not in terminal_set:
201 action = "nonterminal action " + str(action)
202 elif state in terminal_set:
203 action = "terminal_set"
145 if not action in action_map: 204 if not action in action_map:
146 action_map[action] = set() 205 action_map[action] = set()
147 action_map[action].add(node_number) 206 action_map[action].add(node_number)
148 self.visit_all_states(f) 207 self.__dfa.visit_all_states(f)
149 total = 0 208 partitions = set()
150 for p in action_map.values(): 209 for p in action_map.values():
151 paritions.append(p) 210 assert p
152 total += len(p) 211 partitions.add(StatePartition(p))
153 assert total == self.__node_count 212 return (id_map, partitions)
154 213
214 def __generate_alphabet(self):
215 keys = []
216 self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))
217 return TransitionKey.disjoint_keys(chain(*keys))
218
219 @staticmethod
220 def __find_partition(partitions, id):
221 for p in partitions:
222 if id in p:
223 return p
224 assert False
225
226 def __verify_partitions(self, id_map, partitions):
227 assert len(partitions) <= len(id_map)
228 alphabet = self.__generate_alphabet()
229 for partition in partitions:
230 for key in alphabet:
231 first = True
232 mapped_partition = None
233 for state_id in partition:
234 s = id_map[state_id].key_matches(key)
235 if s:
236 s = self.__find_partition(partitions, s.node_number())
237 if first:
238 first = False
239 mapped_partition = s
240 assert mapped_partition == s
241
242 @staticmethod
243 def __partition_count(partitions):
244 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
245
246 def minimize(self):
247 (id_map, partitions) = self.__partition()
248 node_count = self.__dfa.node_count()
249 assert self.__partition_count(partitions) == node_count
250 if len(partitions) == 1:
251 return self.__dfa
252 working_set = set(partitions)
253 alphabet = self.__generate_alphabet()
254 all_state_ids = set(id_map.keys())
255 while working_set:
256 # print "working_set %s partitions %s nodes %s" % (len(working_set),
257 # len(partitions),
258 # node_count)
259 assert working_set <= partitions
260 assert self.__partition_count(partitions) == node_count
261 test_partition = iter(working_set).next()
262 working_set.remove(test_partition)
263 to_split = None
264 for key in alphabet:
265 # print key
266 transition_map = {}
267 map_into_partition = set()
268 for state_id in all_state_ids:
269 maps_to = id_map[state_id].key_matches(key)
270 if maps_to and maps_to.node_number() in test_partition:
271 map_into_partition.add(state_id)
272 if not map_into_partition:
273 continue
274 new_partitions = set()
275 for p in partitions:
276 intersection = p.set().intersection(map_into_partition)
277 difference = p.set().difference(map_into_partition)
278 if not intersection or not difference:
279 new_partitions.add(p)
280 continue
281 intersection = StatePartition(intersection)
282 difference = StatePartition(difference)
283 new_partitions.add(intersection)
284 new_partitions.add(difference)
285 if p in working_set:
286 working_set.remove(p)
287 working_set.add(intersection)
288 working_set.add(difference)
289 elif len(intersection) <= len(difference):
290 working_set.add(intersection)
291 else:
292 working_set.add(difference)
293 partitions = new_partitions
294 self.__verify_partitions(id_map, partitions)
295 if len(partitions) == len(id_map):
296 return self.__dfa
297 # merge partitions
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698