Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(210)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 78053002: Experimental lexer generator: rename_things(Dfa minimization) += comments part 2 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: new try Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 class DfaMinimizer: 151 class DfaMinimizer:
152 152
153 __verify = True 153 __verify = True
154 154
155 @staticmethod 155 @staticmethod
156 def set_verify(verify): 156 def set_verify(verify):
157 DfaMinimizer.__verify = verify 157 DfaMinimizer.__verify = verify
158 158
159 def __init__(self, dfa): 159 def __init__(self, dfa):
160 self.__dfa = dfa 160 self.__dfa = dfa
161 self.__id_map = None 161 self.__id_to_state = None
162 self.__reverse_id_map = None 162 self.__state_to_id = None
163 self.__alphabet = None 163 self.__alphabet = None
164 164
165 def __create_initial_partitions(self): 165 def __create_initial_partitions(self):
166 assert not self.__id_map 166 assert not self.__id_to_state
167 assert not self.__reverse_id_map 167 assert not self.__state_to_id
168 assert not self.__alphabet 168 assert not self.__alphabet
169 169
170 # For the minimization, we associate each state with an ID. A set of states 170 # For the minimization, we associate each state with an ID. A set of states
171 # is represented as a set of state IDs. 171 # is represented as a set of state IDs.
172 id_map = {} 172 id_to_state = {}
173 173
174 # First we partition the states into the following groups: 174 # First we partition the states into the following groups:
175 # - terminal states without action 175 # - terminal states without action
176 # - nonterminal states without action 176 # - nonterminal states without action
177 # - one group per action: terminal states which have the action 177 # - one group per action: terminal states which have the action
178 # - one group per action: nonterminal states which have the action 178 # - one group per action: nonterminal states which have the action
179 # These are the keys of initial_partitions. The values are lists of state 179 # These are the keys of initial_partitions. The values are lists of state
180 # IDs. 180 # IDs.
181 initial_partitions = {} 181 initial_partitions = {}
182 terminal_set = self.__dfa.terminal_set() 182 terminal_set = self.__dfa.terminal_set()
183 all_keys = [] # Will contain all TransitionKeys in the dfa. 183 all_keys = [] # Will contain all TransitionKeys in the dfa.
184 184
185 # f fills in initial_partitions, id_map and all_keys. 185 # f fills in initial_partitions, id_to_state and all_keys.
186 def f(state, visitor_state): 186 def f(state, visitor_state):
187 state_id = len(id_map) 187 state_id = len(id_to_state)
188 id_map[state_id] = state 188 id_to_state[state_id] = state
189 action = state.action() 189 action = state.action()
190 all_keys.append(state.key_iter()) 190 all_keys.append(state.key_iter())
191 if action: 191 if action:
192 if state not in terminal_set: 192 if state not in terminal_set:
193 assert action.entry_action() 193 assert action.entry_action()
194 key = ("nonterminal action", action) 194 key = ("nonterminal action", action)
195 else: 195 else:
196 key = ("terminal action", action) 196 key = ("terminal action", action)
197 elif state in terminal_set: 197 elif state in terminal_set:
198 key = ("terminal set",) 198 key = ("terminal set",)
199 else: 199 else:
200 key = ("nonterminal set",) 200 key = ("nonterminal set",)
201 if not key in initial_partitions: 201 if not key in initial_partitions:
202 initial_partitions[key] = set() 202 initial_partitions[key] = set()
203 initial_partitions[key].add(state_id) 203 initial_partitions[key].add(state_id)
204 self.__dfa.visit_all_states(f) 204 self.__dfa.visit_all_states(f)
205 partitions = set() 205 partitions = set()
206 working_set = set() 206 working_set = set()
207 207
208 # Create StatePartition objects: 208 # Create StatePartition objects:
209 for k, p in initial_partitions.items(): 209 for k, p in initial_partitions.items():
210 p = StatePartition(p) 210 p = StatePartition(p)
211 partitions.add(p) 211 partitions.add(p)
212 working_set.add(p) 212 working_set.add(p)
213 reverse_id_map = {v : k for k, v in id_map.items()} 213 state_to_id = {v : k for k, v in id_to_state.items()}
214 assert len(id_map) == len(reverse_id_map) 214 assert len(id_to_state) == len(state_to_id)
215 self.__reverse_id_map = reverse_id_map 215 self.__state_to_id = state_to_id
216 self.__id_map = id_map 216 self.__id_to_state = id_to_state
217 217
218 # The alphabet defines the TransitionKeys we need to check when dedicing if 218 # The alphabet defines the TransitionKeys we need to check when dedicing if
219 # to states of the dfa can be in the same partition. See the doc string of 219 # two states of the dfa can be in the same partition. See the doc string of
220 # TransitionKey.disjoint_keys. 220 # TransitionKey.disjoint_keys.
221 221
222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is 222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is
223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with 223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with
224 # TransitionKey [a-d], and state S3 has a transition to S4 with 224 # TransitionKey [a-d], and state S3 has a transition to S4 with
225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will 225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will
226 # become clear when we check the transition for TransitionKey [c-d] (S1 has 226 # become clear when we check the transition for TransitionKey [c-d] (S1 has
227 # a transition to S2, S3 has a transition to S4). 227 # a transition to S2, S3 has a transition to S4).
228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) 228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
229 229
230 # For each state and each TransitionKey in the alphabet, find out which 230 # For each state and each TransitionKey in the alphabet, find out which
231 # transition we take from the state with the TransitionKey. 231 # transition we take from the state with the TransitionKey.
232 transitions = {} 232 transitions = {}
233 for state_id, state in id_map.items(): 233 for state_id, state in id_to_state.items():
234 def f((key_id, key)): 234 def f((key_id, key)):
235 transition_state = state.transition_state_for_key(key) 235 transition_state = state.transition_state_for_key(key)
236 if transition_state: 236 if transition_state:
237 return reverse_id_map[transition_state] 237 return state_to_id[transition_state]
238 return None 238 return None
239 transitions[state_id] = map(f, enumerate(self.__alphabet)) 239 transitions[state_id] = map(f, enumerate(self.__alphabet))
240 self.__transitions = transitions 240 self.__transitions = transitions
241 # verify created structures 241 # verify created structures
242 if self.__verify: 242 if self.__verify:
243 for partition in partitions: 243 for partition in partitions:
244 for state_id in partition: 244 for state_id in partition:
245 transition_state_array = transitions[state_id] 245 transition_state_array = transitions[state_id]
246 state = id_map[state_id] 246 state = id_to_state[state_id]
247 for key_id, key in enumerate(self.__alphabet): 247 for key_id, key in enumerate(self.__alphabet):
248 transition_state_id = transition_state_array[key_id] 248 transition_state_id = transition_state_array[key_id]
249 transition_state = state.transition_state_for_key(key) 249 transition_state = state.transition_state_for_key(key)
250 if not transition_state: 250 if not transition_state:
251 assert transition_state_id == None 251 assert transition_state_id == None
252 else: 252 else:
253 assert transition_state_id != None 253 assert transition_state_id != None
254 assert transition_state_id == reverse_id_map[transition_state] 254 assert transition_state_id == state_to_id[transition_state]
255 return (working_set, partitions) 255 return (working_set, partitions)
256 256
257 @staticmethod 257 @staticmethod
258 def __partition_count(partitions): 258 def __partition_count(partitions):
259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
260 260
261 def __merge_partitions(self, partitions): 261 def __create_states_from_partitions(self, partitions):
262 id_map = self.__id_map 262 '''Creates a new set of states where each state corresponds to a
263 reverse_id_map = self.__reverse_id_map 263 partition.'''
264 id_to_state = self.__id_to_state
265 state_to_id = self.__state_to_id
264 verify = self.__verify 266 verify = self.__verify
265 mapping = {} 267 partition_to_name = {}
266 name_map = {} 268 state_id_to_partition = {}
267 reverse_partition_map = {} 269
270 # Fill in partition_to_name and state_id_to_partition.
268 for partition in partitions: 271 for partition in partitions:
269 name_map[partition] = str(partition) 272 partition_to_name[partition] = str(partition)
270 for state_id in partition: 273 for state_id in partition:
271 reverse_partition_map[state_id] = partition 274 state_id_to_partition[state_id] = partition
272 transitions = self.__transitions 275 transitions = self.__transitions
276
277 # Create nodes for partitions.
278 partition_name_to_node = {}
273 for partition in partitions: 279 for partition in partitions:
274 state_ids = list(partition) 280 state_ids = list(partition)
281 # state is a representative state for the partition, and state_id is it's
282 # ID. To check the transitions between partitions, it's enough to check
283 # transitions from the representative state. All other states will have
284 # equivalent transitions, that is, transitions which transition into the
285 # same partition.
275 state_id = state_ids[0] 286 state_id = state_ids[0]
276 state = id_map[state_id] 287 state = id_to_state[state_id]
277 node = { 288 node = {
278 'transitions' : {}, 289 'transitions' : {},
279 'terminal' : state in self.__dfa.terminal_set(), 290 'terminal' : state in self.__dfa.terminal_set(),
280 'action' : state.action(), 291 'action' : state.action(),
281 } 292 }
282 mapping[str(partition)] = node 293 partition_name_to_node[str(partition)] = node
283 transition_array = transitions[state_id] 294 transition_key_to_state_id = transitions[state_id]
284 for key_id, key in enumerate(self.__alphabet): 295 for key_id, key in enumerate(self.__alphabet):
285 transition_id = transition_array[key_id] 296 transition_state_id = transition_key_to_state_id[key_id]
286 if transition_id == None: 297 if transition_state_id == None:
287 if verify: 298 if verify:
299 # There is no transition for that key from state; check that no
300 # other states in the partition have a transition with that key
301 # either.
288 assert not state.transition_state_for_key(key) 302 assert not state.transition_state_for_key(key)
289 for s_id in state_ids: 303 for s_id in state_ids:
290 assert not id_map[s_id].transition_state_for_key(key) 304 assert not id_to_state[s_id].transition_state_for_key(key)
291 continue 305 continue
292 transition_partition = reverse_partition_map[transition_id] 306 transition_partition = state_id_to_partition[transition_state_id]
293 assert transition_partition 307 assert transition_partition
294 if verify: 308 if verify:
309 # There is a transition for that key from state; check that all other
310 # states in the partition have an equivalent transition (transition
311 # into the same partition).
295 for s_id in state_ids: 312 for s_id in state_ids:
296 transition = id_map[s_id].transition_state_for_key(key) 313 transition = id_to_state[s_id].transition_state_for_key(key)
297 assert transition 314 assert transition
298 test_partition = reverse_partition_map[reverse_id_map[transition]] 315 test_partition = state_id_to_partition[state_to_id[transition]]
299 assert test_partition == transition_partition 316 assert test_partition == transition_partition
300 node['transitions'][key] = name_map[transition_partition] 317 # Record the transition between partitions.
301 start_id = reverse_id_map[self.__dfa.start_state()] 318 node['transitions'][key] = partition_to_name[transition_partition]
302 start_name = name_map[reverse_partition_map[start_id]] 319 start_id = state_to_id[self.__dfa.start_state()]
303 return (start_name, mapping) 320 start_name = partition_to_name[state_id_to_partition[start_id]]
321 return (start_name, partition_name_to_node)
304 322
305 def minimize(self): 323 def minimize(self):
306 '''Minimize the dfa. For the general idea of minimizing automata, see 324 '''Minimize the dfa. For the general idea of minimizing automata, see
307 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take 325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take
308 account the actions associated to stages, i.e., we cannot merge two states 326 account the actions associated to stages, i.e., we cannot merge two states
309 which have different actions.''' 327 which have different actions.'''
310 (working_set, partitions) = self.__create_initial_partitions() 328 (working_set, partitions) = self.__create_initial_partitions()
311 node_count = self.__dfa.node_count() 329 node_count = self.__dfa.node_count()
312 id_map = self.__id_map 330 id_to_state = self.__id_to_state
313 reverse_id_map = self.__reverse_id_map 331 state_to_id = self.__state_to_id
332 # transitions is a 2-dimensional array indexed by state_id and index of the
333 # TransitionKey in the alphabet.
334 # transitions[state_id][transition_key_ix] = transition_state_id
314 transitions = self.__transitions 335 transitions = self.__transitions
315 key_range = range(0, len(self.__alphabet)) 336 key_range = range(0, len(self.__alphabet))
316 while working_set: 337 while working_set:
317 assert working_set <= partitions 338 assert working_set <= partitions
318 assert self.__partition_count(partitions) == node_count 339 assert self.__partition_count(partitions) == node_count
340 # We split other partitions according to test_partition: If a partition
341 # contains two states S1 and S2, such that S1 transitions to a state in
342 # test_partition with a TransitionKey K, and S2 transitions to a state
343 # *not* in test_partition with the same K, then S1 and S2 cannot be in the
344 # same partition.
319 test_partition = iter(working_set).next() 345 test_partition = iter(working_set).next()
320 working_set.remove(test_partition) 346 working_set.remove(test_partition)
321 test_array = [False for x in range(0, len(id_map))] 347 state_in_test_partition = [False] * len(id_to_state)
322 for x in test_partition: 348 for state_id in test_partition:
323 test_array[x] = True 349 state_in_test_partition[state_id] = True
324 for key_index in key_range: 350 for key_index in key_range:
325 map_into_partition = set() 351 # states_transitioning_to_test_partition will contain the state_ids of
326 for state_id, transition_array in transitions.items(): 352 # the states (across all partitions) which transition into
327 transition_id = transition_array[key_index] 353 # test_partition (any state within test_partition) with that key.
328 if transition_id != None and test_array[transition_id]: 354 states_transitioning_to_test_partition = set()
329 map_into_partition.add(state_id) 355 for state_id, transition_key_to_state_id in transitions.items():
330 if not map_into_partition: 356 transition_state_id = transition_key_to_state_id[key_index]
357 if (transition_state_id and
358 state_in_test_partition[transition_state_id]):
359 states_transitioning_to_test_partition.add(state_id)
360 if not states_transitioning_to_test_partition:
331 continue 361 continue
362 # For each partition, we need to split it in two: {states which
363 # transition into test_partition, states which don't}.
332 new_partitions = set() 364 new_partitions = set()
333 old_partitions = set() 365 old_partitions = set()
334 for p in partitions: 366 for p in partitions:
335 intersection = p.set().intersection(map_into_partition) 367 intersection = p.set().intersection(
368 states_transitioning_to_test_partition)
336 if not intersection: 369 if not intersection:
337 continue 370 continue
338 difference = p.set().difference(map_into_partition) 371 difference = p.set().difference(
372 states_transitioning_to_test_partition)
339 if not difference: 373 if not difference:
340 continue 374 continue
341 intersection = StatePartition(intersection) 375 intersection = StatePartition(intersection)
342 difference = StatePartition(difference) 376 difference = StatePartition(difference)
343 old_partitions.add(p) 377 old_partitions.add(p)
344 new_partitions.add(intersection) 378 new_partitions.add(intersection)
345 new_partitions.add(difference) 379 new_partitions.add(difference)
346 if p in working_set: 380 if p in working_set:
347 working_set.remove(p) 381 working_set.remove(p)
348 working_set.add(intersection) 382 working_set.add(intersection)
349 working_set.add(difference) 383 working_set.add(difference)
350 elif len(intersection) <= len(difference): 384 elif len(intersection) <= len(difference):
351 working_set.add(intersection) 385 working_set.add(intersection)
352 else: 386 else:
353 working_set.add(difference) 387 working_set.add(difference)
354 if old_partitions: 388 if old_partitions:
355 partitions -= old_partitions 389 partitions -= old_partitions
356 if new_partitions: 390 if new_partitions:
357 partitions |= new_partitions 391 partitions |= new_partitions
358 (start_name, mapping) = self.__merge_partitions(partitions) 392 (start_name, mapping) = self.__create_states_from_partitions(partitions)
359 return Dfa(start_name, mapping) 393 return Dfa(start_name, mapping)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698