Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(547)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 169523003: Experimental parser: split and rename some files (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/code_generator.py ('k') | tools/lexer_generator/dfa_minimizer.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from itertools import chain 28 from itertools import chain
29 from automaton import * 29 from automaton import *
30 from transition_keys import TransitionKey 30 from transition_key import TransitionKey
31 31
32 class DfaState(AutomatonState): 32 class DfaState(AutomatonState):
33 33
34 def __init__(self, name, action, transitions): 34 def __init__(self, name, action, transitions):
35 super(DfaState, self).__init__() 35 super(DfaState, self).__init__()
36 self.__name = name 36 self.__name = name
37 self.__transitions = transitions 37 self.__transitions = transitions
38 self.__action = action 38 self.__action = action
39 assert isinstance(action, Action) 39 assert isinstance(action, Action)
40 40
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 assert self.__node_count == state_count 109 assert self.__node_count == state_count
110 110
111 def node_count(self): 111 def node_count(self):
112 return self.__node_count 112 return self.__node_count
113 113
114 def start_state(self): 114 def start_state(self):
115 return self.__start 115 return self.__start
116 116
117 def terminal_set(self): 117 def terminal_set(self):
118 return set(self.__terminal_set) 118 return set(self.__terminal_set)
119
120 def minimize(self):
121 return DfaMinimizer(self).minimize()
122
123 class StatePartition(object):
124
125 def __init__(self, node_numbers):
126 self.__node_numbers = node_numbers
127 assert self.__node_numbers
128 self.__hash = None
129
130 def set(self):
131 return self.__node_numbers
132
133 def __hash__(self):
134 if not self.__hash:
135 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
136 return self.__hash
137
138 def __eq__(self, other):
139 return (isinstance(other, self.__class__) and
140 self.__node_numbers == other.__node_numbers)
141
142 def __len__(self):
143 return len(self.__node_numbers)
144
145 def __iter__(self):
146 return self.__node_numbers.__iter__()
147
148 def __str__(self):
149 return str([x for x in self.__node_numbers])
150
151 class DfaMinimizer:
152
153 __verify = True
154
155 @staticmethod
156 def set_verify(verify):
157 DfaMinimizer.__verify = verify
158
159 def __init__(self, dfa):
160 self.__dfa = dfa
161 self.__id_to_state = None
162 self.__state_to_id = None
163 self.__alphabet = None
164
165 def __create_initial_partitions(self):
166 assert not self.__id_to_state
167 assert not self.__state_to_id
168 assert not self.__alphabet
169
170 # For the minimization, we associate each state with an ID. A set of states
171 # is represented as a set of state IDs.
172 id_to_state = {}
173
174 # First we partition the states into the following groups:
175 # - terminal states without action
176 # - nonterminal states without action
177 # - one group per action: terminal states which have the action
178 # - one group per action: nonterminal states which have the action
179 # These are the keys of initial_partitions. The values are lists of state
180 # IDs.
181 initial_partitions = {}
182 terminal_set = self.__dfa.terminal_set()
183 all_keys = [] # Will contain all TransitionKeys in the dfa.
184
185 # f fills in initial_partitions, id_to_state and all_keys.
186 def f(state, visitor_state):
187 state_id = len(id_to_state)
188 id_to_state[state_id] = state
189 action = state.action()
190 all_keys.append(state.key_iter())
191 if state in terminal_set:
192 key = ("terminal set", action)
193 else:
194 # Match actions are valid only if we have matched the whole token, not
195 # just some sub-part of it.
196 # TODO(dcarney): restore
197 # assert not action.is_match_action()
198 key = ("nonterminal set", action)
199 if not key in initial_partitions:
200 initial_partitions[key] = set()
201 initial_partitions[key].add(state_id)
202 self.__dfa.visit_all_states(f)
203 partitions = set()
204 working_set = set()
205
206 # Create StatePartition objects:
207 for k, p in initial_partitions.items():
208 p = StatePartition(p)
209 partitions.add(p)
210 working_set.add(p)
211 state_to_id = {v : k for k, v in id_to_state.items()}
212 assert len(id_to_state) == len(state_to_id)
213 self.__state_to_id = state_to_id
214 self.__id_to_state = id_to_state
215
216 # The alphabet defines the TransitionKeys we need to check when dedicing if
217 # two states of the dfa can be in the same partition. See the doc string of
218 # TransitionKey.disjoint_keys.
219
220 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is
221 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with
222 # TransitionKey [a-d], and state S3 has a transition to S4 with
223 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will
224 # become clear when we check the transition for TransitionKey [c-d] (S1 has
225 # a transition to S2, S3 has a transition to S4).
226 encoding = self.__dfa.encoding()
227 self.__alphabet = list(
228 TransitionKey.disjoint_keys(encoding, chain(*all_keys)))
229
230 # For each state and each TransitionKey in the alphabet, find out which
231 # transition we take from the state with the TransitionKey.
232 transitions = {}
233 for state_id, state in id_to_state.items():
234 def f((key_id, key)):
235 transition_state = state.transition_state_for_key(key)
236 if transition_state:
237 return state_to_id[transition_state]
238 return None
239 transitions[state_id] = map(f, enumerate(self.__alphabet))
240 self.__transitions = transitions
241 # verify created structures
242 if self.__verify:
243 for partition in partitions:
244 for state_id in partition:
245 transition_state_array = transitions[state_id]
246 state = id_to_state[state_id]
247 for key_id, key in enumerate(self.__alphabet):
248 transition_state_id = transition_state_array[key_id]
249 transition_state = state.transition_state_for_key(key)
250 if not transition_state:
251 assert transition_state_id == None
252 else:
253 assert transition_state_id != None
254 assert transition_state_id == state_to_id[transition_state]
255 return (working_set, partitions)
256
257 @staticmethod
258 def __partition_count(partitions):
259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
260
261 def __create_states_from_partitions(self, partitions):
262 '''Creates a new set of states where each state corresponds to a
263 partition.'''
264 id_to_state = self.__id_to_state
265 state_to_id = self.__state_to_id
266 verify = self.__verify
267 partition_to_name = {}
268 state_id_to_partition = {}
269
270 # Fill in partition_to_name and state_id_to_partition.
271 for partition in partitions:
272 partition_to_name[partition] = str(partition)
273 for state_id in partition:
274 state_id_to_partition[state_id] = partition
275 transitions = self.__transitions
276
277 # Create nodes for partitions.
278 partition_name_to_node = {}
279 for partition in partitions:
280 state_ids = list(partition)
281 # state is a representative state for the partition, and state_id is it's
282 # ID. To check the transitions between partitions, it's enough to check
283 # transitions from the representative state. All other states will have
284 # equivalent transitions, that is, transitions which transition into the
285 # same partition.
286 state_id = state_ids[0]
287 state = id_to_state[state_id]
288 node = {
289 'transitions' : {},
290 'terminal' : state in self.__dfa.terminal_set(),
291 'action' : state.action(),
292 }
293 partition_name_to_node[str(partition)] = node
294 transition_key_to_state_id = transitions[state_id]
295 for key_id, key in enumerate(self.__alphabet):
296 transition_state_id = transition_key_to_state_id[key_id]
297 if transition_state_id == None:
298 if verify:
299 # There is no transition for that key from state; check that no
300 # other states in the partition have a transition with that key
301 # either.
302 assert not state.transition_state_for_key(key)
303 for s_id in state_ids:
304 assert not id_to_state[s_id].transition_state_for_key(key)
305 continue
306 transition_partition = state_id_to_partition[transition_state_id]
307 assert transition_partition
308 if verify:
309 # There is a transition for that key from state; check that all other
310 # states in the partition have an equivalent transition (transition
311 # into the same partition).
312 for s_id in state_ids:
313 transition = id_to_state[s_id].transition_state_for_key(key)
314 assert transition
315 test_partition = state_id_to_partition[state_to_id[transition]]
316 assert test_partition == transition_partition
317 # Record the transition between partitions.
318 node['transitions'][key] = partition_to_name[transition_partition]
319 start_id = state_to_id[self.__dfa.start_state()]
320 start_name = partition_to_name[state_id_to_partition[start_id]]
321 return (start_name, partition_name_to_node)
322
323 def minimize(self):
324 '''Minimize the dfa. For the general idea of minimizing automata, see
325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take
326 account the actions associated to stages, i.e., we cannot merge two states
327 which have different actions.'''
328 (working_set, partitions) = self.__create_initial_partitions()
329 node_count = self.__dfa.node_count()
330 id_to_state = self.__id_to_state
331 state_to_id = self.__state_to_id
332 # transitions is a 2-dimensional array indexed by state_id and index of the
333 # TransitionKey in the alphabet.
334 # transitions[state_id][transition_key_ix] = transition_state_id
335 transitions = self.__transitions
336 key_range = range(0, len(self.__alphabet))
337 while working_set:
338 assert working_set <= partitions
339 assert self.__partition_count(partitions) == node_count
340 # We split other partitions according to test_partition: If a partition
341 # contains two states S1 and S2, such that S1 transitions to a state in
342 # test_partition with a TransitionKey K, and S2 transitions to a state
343 # *not* in test_partition with the same K, then S1 and S2 cannot be in the
344 # same partition.
345 test_partition = iter(working_set).next()
346 working_set.remove(test_partition)
347 state_in_test_partition = [False] * len(id_to_state)
348 for state_id in test_partition:
349 state_in_test_partition[state_id] = True
350 for key_index in key_range:
351 # states_transitioning_to_test_partition will contain the state_ids of
352 # the states (across all partitions) which transition into
353 # test_partition (any state within test_partition) with that key.
354 states_transitioning_to_test_partition = set()
355 for state_id, transition_key_to_state_id in transitions.items():
356 transition_state_id = transition_key_to_state_id[key_index]
357 if (transition_state_id and
358 state_in_test_partition[transition_state_id]):
359 states_transitioning_to_test_partition.add(state_id)
360 if not states_transitioning_to_test_partition:
361 continue
362 # For each partition, we need to split it in two: {states which
363 # transition into test_partition, states which don't}.
364 new_partitions = set()
365 old_partitions = set()
366 for p in partitions:
367 intersection = p.set().intersection(
368 states_transitioning_to_test_partition)
369 if not intersection:
370 continue
371 difference = p.set().difference(
372 states_transitioning_to_test_partition)
373 if not difference:
374 continue
375 intersection = StatePartition(intersection)
376 difference = StatePartition(difference)
377 old_partitions.add(p)
378 new_partitions.add(intersection)
379 new_partitions.add(difference)
380 if p in working_set:
381 working_set.remove(p)
382 working_set.add(intersection)
383 working_set.add(difference)
384 elif len(intersection) <= len(difference):
385 working_set.add(intersection)
386 else:
387 working_set.add(difference)
388 if old_partitions:
389 partitions -= old_partitions
390 if new_partitions:
391 partitions |= new_partitions
392 (start_name, mapping) = self.__create_states_from_partitions(partitions)
393 return Dfa(self.__dfa.encoding(), start_name, mapping)
OLDNEW
« no previous file with comments | « tools/lexer_generator/code_generator.py ('k') | tools/lexer_generator/dfa_minimizer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698