Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(763)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 77733005: Experimental lexer generator: rename_things(Dfa minimization) += comments (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: comments from dcarney Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 matches = reduce(f, self.__transitions.items(), set()) 61 matches = reduce(f, self.__transitions.items(), set())
62 # Since this is a dfa, we should have at most one such state. Return it. 62 # Since this is a dfa, we should have at most one such state. Return it.
63 if not matches: 63 if not matches:
64 return None 64 return None
65 assert len(matches) == 1 65 assert len(matches) == 1
66 return iter(matches).next() 66 return iter(matches).next()
67 67
68 def next_state_with_char(self, value): 68 def next_state_with_char(self, value):
69 return self.__matches(lambda k, v : k.matches_char(v), value) 69 return self.__matches(lambda k, v : k.matches_char(v), value)
70 70
71 def key_matches(self, value): 71 def transition_state_with_key(self, key):
72 return self.__matches(lambda k, v : k.is_superset_of_key(v), value) 72 # 'key' can be a subkey of one of the TransitionKeys for this state, but it
73 # cannot partially overlap a TransitionKey for this state.
74 return self.__matches(lambda k, v : k.is_superset_of_key(v), key)
73 75
74 class Dfa(Automaton): 76 class Dfa(Automaton):
75 77
76 def __init__(self, start_name, mapping): 78 def __init__(self, start_name, mapping):
77 super(Dfa, self).__init__() 79 super(Dfa, self).__init__()
78 self.__terminal_set = set() 80 self.__terminal_set = set()
79 name_map = {} 81 name_map = {}
80 for name, node_data in mapping.items(): 82 for name, node_data in mapping.items():
81 node = DfaState(name, node_data['action']) 83 node = DfaState(name, node_data['action'])
82 name_map[name] = node 84 name_map[name] = node
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
200 @staticmethod 202 @staticmethod
201 def set_verify(verify): 203 def set_verify(verify):
202 DfaMinimizer.__verify = verify 204 DfaMinimizer.__verify = verify
203 205
204 def __init__(self, dfa): 206 def __init__(self, dfa):
205 self.__dfa = dfa 207 self.__dfa = dfa
206 self.__id_map = None 208 self.__id_map = None
207 self.__reverse_id_map = None 209 self.__reverse_id_map = None
208 self.__alphabet = None 210 self.__alphabet = None
209 211
210 def __partition(self): 212 def __create_initial_partitions(self):
211 assert not self.__id_map 213 assert not self.__id_map
212 assert not self.__reverse_id_map 214 assert not self.__reverse_id_map
213 assert not self.__alphabet 215 assert not self.__alphabet
214 action_map = {} 216
217 # For the minimization, we associate each state with an ID. A set of states
218 # is represented as a set of state IDs.
215 id_map = {} 219 id_map = {}
220
221 # First we partition the states into the following groups:
222 # - terminal states without action
223 # - nonterminal states without action
224 # - one group per action: terminal states which have the action
225 # - one group per action: nonterminal states which have the action
226 # These are the keys of initial_partitions. The values are lists of state
227 # IDs.
228 initial_partitions = {}
216 terminal_set = self.__dfa.terminal_set() 229 terminal_set = self.__dfa.terminal_set()
217 all_keys = [] 230 all_keys = [] # Will contain all TransitionKeys in the dfa.
231
232 # f fills in initial_partitions, id_map and all_keys.
218 def f(state, visitor_state): 233 def f(state, visitor_state):
219 state_id = len(id_map) 234 state_id = len(id_map)
220 id_map[state_id] = state 235 id_map[state_id] = state
221 action = state.action() 236 action = state.action()
222 all_keys.append(state.key_iter()) 237 all_keys.append(state.key_iter())
223 if action: 238 if action:
224 if state not in terminal_set: 239 if state not in terminal_set:
225 assert action.entry_action() 240 assert action.entry_action()
226 key = ("nonterminal action", action) 241 key = ("nonterminal action", action)
227 else: 242 else:
228 key = ("terminal action", action) 243 key = ("terminal action", action)
229 elif state in terminal_set: 244 elif state in terminal_set:
230 key = ("terminal set",) 245 key = ("terminal set",)
231 else: 246 else:
232 key = ("nonterminal set",) 247 key = ("nonterminal set",)
233 if not key in action_map: 248 if not key in initial_partitions:
234 action_map[key] = set() 249 initial_partitions[key] = set()
235 action_map[key].add(state_id) 250 initial_partitions[key].add(state_id)
236 self.__dfa.visit_all_states(f) 251 self.__dfa.visit_all_states(f)
237 partitions = set() 252 partitions = set()
238 working_set = set() 253 working_set = set()
239 for k, p in action_map.items(): 254
255 # Create StatePartition objects:
256 for k, p in initial_partitions.items():
240 p = StatePartition(p) 257 p = StatePartition(p)
241 partitions.add(p) 258 partitions.add(p)
242 if k[0] == "terminal_set" or k[0] == "terminal action" or True: 259 working_set.add(p)
243 working_set.add(p)
244 reverse_id_map = {v : k for k, v in id_map.items()} 260 reverse_id_map = {v : k for k, v in id_map.items()}
245 assert len(id_map) == len(reverse_id_map) 261 assert len(id_map) == len(reverse_id_map)
246 self.__reverse_id_map = reverse_id_map 262 self.__reverse_id_map = reverse_id_map
247 self.__id_map = id_map 263 self.__id_map = id_map
264
265 # The alphabet defines the TransitionKeys we need to check when dedicing if
266 # to states of the dfa can be in the same partition. See the doc string of
267 # TransitionKey.disjoint_keys.
268
269 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is
270 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with
271 # TransitionKey [a-d], and state S3 has a transition to S4 with
272 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will
273 # become clear when we check the transition for TransitionKey [c-d] (S1 has
274 # a transition to S2, S3 has a transition to S4).
248 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) 275 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
249 # map transitions wrt alphabet mapping 276
277 # For each state and each TransitionKey in the alphabet, find out which
278 # transition we take from the state with the TransitionKey.
250 transitions = {} 279 transitions = {}
251 for state_id, state in id_map.items(): 280 for state_id, state in id_map.items():
252 def f((key_id, key)): 281 def f((key_id, key)):
253 transition = state.key_matches(key) 282 transition_state = state.transition_state_with_key(key)
254 if transition: 283 if transition_state:
255 return reverse_id_map[transition] 284 return reverse_id_map[transition_state]
256 return None 285 return None
257 transitions[state_id] = map(f, enumerate(self.__alphabet)) 286 transitions[state_id] = map(f, enumerate(self.__alphabet))
258 self.__transitions = transitions 287 self.__transitions = transitions
259 # verify created structures 288 # verify created structures
260 if self.__verify: 289 if self.__verify:
261 for partition in partitions: 290 for partition in partitions:
262 for state_id in partition: 291 for state_id in partition:
263 transition_array = transitions[state_id] 292 transition_state_array = transitions[state_id]
264 state = id_map[state_id] 293 state = id_map[state_id]
265 for key_id, key in enumerate(self.__alphabet): 294 for key_id, key in enumerate(self.__alphabet):
266 transition_id = transition_array[key_id] 295 transition_state_id = transition_state_array[key_id]
267 transition_state = state.key_matches(key) 296 transition_state = state.transition_state_with_key(key)
268 if not transition_state: 297 if not transition_state:
269 assert transition_id == None 298 assert transition_state_id == None
270 else: 299 else:
271 assert transition_id != None 300 assert transition_state_id != None
272 assert transition_id == reverse_id_map[transition_state] 301 assert transition_state_id == reverse_id_map[transition_state]
273 return (working_set, partitions) 302 return (working_set, partitions)
274 303
275 @staticmethod 304 @staticmethod
276 def __partition_count(partitions): 305 def __partition_count(partitions):
277 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) 306 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
278 307
279 def __merge_partitions(self, partitions): 308 def __merge_partitions(self, partitions):
280 id_map = self.__id_map 309 id_map = self.__id_map
281 reverse_id_map = self.__reverse_id_map 310 reverse_id_map = self.__reverse_id_map
282 verify = self.__verify 311 verify = self.__verify
(...skipping 13 matching lines...) Expand all
296 'transitions' : {}, 325 'transitions' : {},
297 'terminal' : state in self.__dfa.terminal_set(), 326 'terminal' : state in self.__dfa.terminal_set(),
298 'action' : state.action(), 327 'action' : state.action(),
299 } 328 }
300 mapping[str(partition)] = node 329 mapping[str(partition)] = node
301 transition_array = transitions[state_id] 330 transition_array = transitions[state_id]
302 for key_id, key in enumerate(self.__alphabet): 331 for key_id, key in enumerate(self.__alphabet):
303 transition_id = transition_array[key_id] 332 transition_id = transition_array[key_id]
304 if transition_id == None: 333 if transition_id == None:
305 if verify: 334 if verify:
306 assert not state.key_matches(key) 335 assert not state.transition_state_with_key(key)
307 for s_id in state_ids: 336 for s_id in state_ids:
308 assert not id_map[s_id].key_matches(key) 337 assert not id_map[s_id].transition_state_with_key(key)
309 continue 338 continue
310 transition_partition = reverse_partition_map[transition_id] 339 transition_partition = reverse_partition_map[transition_id]
311 assert transition_partition 340 assert transition_partition
312 if verify: 341 if verify:
313 for s_id in state_ids: 342 for s_id in state_ids:
314 transition = id_map[s_id].key_matches(key) 343 transition = id_map[s_id].transition_state_with_key(key)
315 assert transition 344 assert transition
316 test_partition = reverse_partition_map[reverse_id_map[transition]] 345 test_partition = reverse_partition_map[reverse_id_map[transition]]
317 assert test_partition == transition_partition 346 assert test_partition == transition_partition
318 node['transitions'][key] = name_map[transition_partition] 347 node['transitions'][key] = name_map[transition_partition]
319 start_id = reverse_id_map[self.__dfa.start_state()] 348 start_id = reverse_id_map[self.__dfa.start_state()]
320 start_name = name_map[reverse_partition_map[start_id]] 349 start_name = name_map[reverse_partition_map[start_id]]
321 return (start_name, mapping) 350 return (start_name, mapping)
322 351
323 def minimize(self): 352 def minimize(self):
324 (working_set, partitions) = self.__partition() 353 '''Minimize the dfa. For the general idea of minimizing automata, see
354 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take
355 account the actions associated to stages, i.e., we cannot merge two states
356 which have different actions.'''
357 (working_set, partitions) = self.__create_initial_partitions()
325 node_count = self.__dfa.node_count() 358 node_count = self.__dfa.node_count()
326 id_map = self.__id_map 359 id_map = self.__id_map
327 reverse_id_map = self.__reverse_id_map 360 reverse_id_map = self.__reverse_id_map
328 transitions = self.__transitions 361 transitions = self.__transitions
329 key_range = range(0, len(self.__alphabet)) 362 key_range = range(0, len(self.__alphabet))
330 while working_set: 363 while working_set:
331 assert working_set <= partitions 364 assert working_set <= partitions
332 assert self.__partition_count(partitions) == node_count 365 assert self.__partition_count(partitions) == node_count
333 test_partition = iter(working_set).next() 366 test_partition = iter(working_set).next()
334 working_set.remove(test_partition) 367 working_set.remove(test_partition)
(...skipping 29 matching lines...) Expand all
364 elif len(intersection) <= len(difference): 397 elif len(intersection) <= len(difference):
365 working_set.add(intersection) 398 working_set.add(intersection)
366 else: 399 else:
367 working_set.add(difference) 400 working_set.add(difference)
368 if old_partitions: 401 if old_partitions:
369 partitions -= old_partitions 402 partitions -= old_partitions
370 if new_partitions: 403 if new_partitions:
371 partitions |= new_partitions 404 partitions |= new_partitions
372 (start_name, mapping) = self.__merge_partitions(partitions) 405 (start_name, mapping) = self.__merge_partitions(partitions)
373 return Dfa(start_name, mapping) 406 return Dfa(start_name, mapping)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698