| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 151 class DfaMinimizer: | 151 class DfaMinimizer: |
| 152 | 152 |
| 153 __verify = True | 153 __verify = True |
| 154 | 154 |
| 155 @staticmethod | 155 @staticmethod |
| 156 def set_verify(verify): | 156 def set_verify(verify): |
| 157 DfaMinimizer.__verify = verify | 157 DfaMinimizer.__verify = verify |
| 158 | 158 |
| 159 def __init__(self, dfa): | 159 def __init__(self, dfa): |
| 160 self.__dfa = dfa | 160 self.__dfa = dfa |
| 161 self.__id_map = None | 161 self.__id_to_state = None |
| 162 self.__reverse_id_map = None | 162 self.__state_to_id = None |
| 163 self.__alphabet = None | 163 self.__alphabet = None |
| 164 | 164 |
| 165 def __create_initial_partitions(self): | 165 def __create_initial_partitions(self): |
| 166 assert not self.__id_map | 166 assert not self.__id_to_state |
| 167 assert not self.__reverse_id_map | 167 assert not self.__state_to_id |
| 168 assert not self.__alphabet | 168 assert not self.__alphabet |
| 169 | 169 |
| 170 # For the minimization, we associate each state with an ID. A set of states | 170 # For the minimization, we associate each state with an ID. A set of states |
| 171 # is represented as a set of state IDs. | 171 # is represented as a set of state IDs. |
| 172 id_map = {} | 172 id_to_state = {} |
| 173 | 173 |
| 174 # First we partition the states into the following groups: | 174 # First we partition the states into the following groups: |
| 175 # - terminal states without action | 175 # - terminal states without action |
| 176 # - nonterminal states without action | 176 # - nonterminal states without action |
| 177 # - one group per action: terminal states which have the action | 177 # - one group per action: terminal states which have the action |
| 178 # - one group per action: nonterminal states which have the action | 178 # - one group per action: nonterminal states which have the action |
| 179 # These are the keys of initial_partitions. The values are lists of state | 179 # These are the keys of initial_partitions. The values are lists of state |
| 180 # IDs. | 180 # IDs. |
| 181 initial_partitions = {} | 181 initial_partitions = {} |
| 182 terminal_set = self.__dfa.terminal_set() | 182 terminal_set = self.__dfa.terminal_set() |
| 183 all_keys = [] # Will contain all TransitionKeys in the dfa. | 183 all_keys = [] # Will contain all TransitionKeys in the dfa. |
| 184 | 184 |
| 185 # f fills in initial_partitions, id_map and all_keys. | 185 # f fills in initial_partitions, id_to_state and all_keys. |
| 186 def f(state, visitor_state): | 186 def f(state, visitor_state): |
| 187 state_id = len(id_map) | 187 state_id = len(id_to_state) |
| 188 id_map[state_id] = state | 188 id_to_state[state_id] = state |
| 189 action = state.action() | 189 action = state.action() |
| 190 all_keys.append(state.key_iter()) | 190 all_keys.append(state.key_iter()) |
| 191 if action: | 191 if action: |
| 192 if state not in terminal_set: | 192 if state not in terminal_set: |
| 193 assert action.entry_action() | 193 assert action.entry_action() |
| 194 key = ("nonterminal action", action) | 194 key = ("nonterminal action", action) |
| 195 else: | 195 else: |
| 196 key = ("terminal action", action) | 196 key = ("terminal action", action) |
| 197 elif state in terminal_set: | 197 elif state in terminal_set: |
| 198 key = ("terminal set",) | 198 key = ("terminal set",) |
| 199 else: | 199 else: |
| 200 key = ("nonterminal set",) | 200 key = ("nonterminal set",) |
| 201 if not key in initial_partitions: | 201 if not key in initial_partitions: |
| 202 initial_partitions[key] = set() | 202 initial_partitions[key] = set() |
| 203 initial_partitions[key].add(state_id) | 203 initial_partitions[key].add(state_id) |
| 204 self.__dfa.visit_all_states(f) | 204 self.__dfa.visit_all_states(f) |
| 205 partitions = set() | 205 partitions = set() |
| 206 working_set = set() | 206 working_set = set() |
| 207 | 207 |
| 208 # Create StatePartition objects: | 208 # Create StatePartition objects: |
| 209 for k, p in initial_partitions.items(): | 209 for k, p in initial_partitions.items(): |
| 210 p = StatePartition(p) | 210 p = StatePartition(p) |
| 211 partitions.add(p) | 211 partitions.add(p) |
| 212 working_set.add(p) | 212 working_set.add(p) |
| 213 reverse_id_map = {v : k for k, v in id_map.items()} | 213 state_to_id = {v : k for k, v in id_to_state.items()} |
| 214 assert len(id_map) == len(reverse_id_map) | 214 assert len(id_to_state) == len(state_to_id) |
| 215 self.__reverse_id_map = reverse_id_map | 215 self.__state_to_id = state_to_id |
| 216 self.__id_map = id_map | 216 self.__id_to_state = id_to_state |
| 217 | 217 |
| 218 # The alphabet defines the TransitionKeys we need to check when dedicing if | 218 # The alphabet defines the TransitionKeys we need to check when dedicing if |
| 219 # to states of the dfa can be in the same partition. See the doc string of | 219 # two states of the dfa can be in the same partition. See the doc string of |
| 220 # TransitionKey.disjoint_keys. | 220 # TransitionKey.disjoint_keys. |
| 221 | 221 |
| 222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is | 222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is |
| 223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with | 223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with |
| 224 # TransitionKey [a-d], and state S3 has a transition to S4 with | 224 # TransitionKey [a-d], and state S3 has a transition to S4 with |
| 225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will | 225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will |
| 226 # become clear when we check the transition for TransitionKey [c-d] (S1 has | 226 # become clear when we check the transition for TransitionKey [c-d] (S1 has |
| 227 # a transition to S2, S3 has a transition to S4). | 227 # a transition to S2, S3 has a transition to S4). |
| 228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) | 228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) |
| 229 | 229 |
| 230 # For each state and each TransitionKey in the alphabet, find out which | 230 # For each state and each TransitionKey in the alphabet, find out which |
| 231 # transition we take from the state with the TransitionKey. | 231 # transition we take from the state with the TransitionKey. |
| 232 transitions = {} | 232 transitions = {} |
| 233 for state_id, state in id_map.items(): | 233 for state_id, state in id_to_state.items(): |
| 234 def f((key_id, key)): | 234 def f((key_id, key)): |
| 235 transition_state = state.transition_state_for_key(key) | 235 transition_state = state.transition_state_for_key(key) |
| 236 if transition_state: | 236 if transition_state: |
| 237 return reverse_id_map[transition_state] | 237 return state_to_id[transition_state] |
| 238 return None | 238 return None |
| 239 transitions[state_id] = map(f, enumerate(self.__alphabet)) | 239 transitions[state_id] = map(f, enumerate(self.__alphabet)) |
| 240 self.__transitions = transitions | 240 self.__transitions = transitions |
| 241 # verify created structures | 241 # verify created structures |
| 242 if self.__verify: | 242 if self.__verify: |
| 243 for partition in partitions: | 243 for partition in partitions: |
| 244 for state_id in partition: | 244 for state_id in partition: |
| 245 transition_state_array = transitions[state_id] | 245 transition_state_array = transitions[state_id] |
| 246 state = id_map[state_id] | 246 state = id_to_state[state_id] |
| 247 for key_id, key in enumerate(self.__alphabet): | 247 for key_id, key in enumerate(self.__alphabet): |
| 248 transition_state_id = transition_state_array[key_id] | 248 transition_state_id = transition_state_array[key_id] |
| 249 transition_state = state.transition_state_for_key(key) | 249 transition_state = state.transition_state_for_key(key) |
| 250 if not transition_state: | 250 if not transition_state: |
| 251 assert transition_state_id == None | 251 assert transition_state_id == None |
| 252 else: | 252 else: |
| 253 assert transition_state_id != None | 253 assert transition_state_id != None |
| 254 assert transition_state_id == reverse_id_map[transition_state] | 254 assert transition_state_id == state_to_id[transition_state] |
| 255 return (working_set, partitions) | 255 return (working_set, partitions) |
| 256 | 256 |
| 257 @staticmethod | 257 @staticmethod |
| 258 def __partition_count(partitions): | 258 def __partition_count(partitions): |
| 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) | 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) |
| 260 | 260 |
| 261 def __merge_partitions(self, partitions): | 261 def __create_states_from_partitions(self, partitions): |
| 262 id_map = self.__id_map | 262 '''Creates a new set of states where each state corresponds to a |
| 263 reverse_id_map = self.__reverse_id_map | 263 partition.''' |
| 264 id_to_state = self.__id_to_state |
| 265 state_to_id = self.__state_to_id |
| 264 verify = self.__verify | 266 verify = self.__verify |
| 265 mapping = {} | 267 partition_to_name = {} |
| 266 name_map = {} | 268 state_id_to_partition = {} |
| 267 reverse_partition_map = {} | 269 |
| 270 # Fill in partition_to_name and state_id_to_partition. |
| 268 for partition in partitions: | 271 for partition in partitions: |
| 269 name_map[partition] = str(partition) | 272 partition_to_name[partition] = str(partition) |
| 270 for state_id in partition: | 273 for state_id in partition: |
| 271 reverse_partition_map[state_id] = partition | 274 state_id_to_partition[state_id] = partition |
| 272 transitions = self.__transitions | 275 transitions = self.__transitions |
| 276 |
| 277 # Create nodes for partitions. |
| 278 partition_name_to_node = {} |
| 273 for partition in partitions: | 279 for partition in partitions: |
| 274 state_ids = list(partition) | 280 state_ids = list(partition) |
| 281 # state is a representative state for the partition, and state_id is it's |
| 282 # ID. To check the transitions between partitions, it's enough to check |
| 283 # transitions from the representative state. All other states will have |
| 284 # equivalent transitions, that is, transitions which transition into the |
| 285 # same partition. |
| 275 state_id = state_ids[0] | 286 state_id = state_ids[0] |
| 276 state = id_map[state_id] | 287 state = id_to_state[state_id] |
| 277 node = { | 288 node = { |
| 278 'transitions' : {}, | 289 'transitions' : {}, |
| 279 'terminal' : state in self.__dfa.terminal_set(), | 290 'terminal' : state in self.__dfa.terminal_set(), |
| 280 'action' : state.action(), | 291 'action' : state.action(), |
| 281 } | 292 } |
| 282 mapping[str(partition)] = node | 293 partition_name_to_node[str(partition)] = node |
| 283 transition_array = transitions[state_id] | 294 transition_key_to_state_id = transitions[state_id] |
| 284 for key_id, key in enumerate(self.__alphabet): | 295 for key_id, key in enumerate(self.__alphabet): |
| 285 transition_id = transition_array[key_id] | 296 transition_state_id = transition_key_to_state_id[key_id] |
| 286 if transition_id == None: | 297 if transition_state_id == None: |
| 287 if verify: | 298 if verify: |
| 299 # There is no transition for that key from state; check that no |
| 300 # other states in the partition have a transition with that key |
| 301 # either. |
| 288 assert not state.transition_state_for_key(key) | 302 assert not state.transition_state_for_key(key) |
| 289 for s_id in state_ids: | 303 for s_id in state_ids: |
| 290 assert not id_map[s_id].transition_state_for_key(key) | 304 assert not id_to_state[s_id].transition_state_for_key(key) |
| 291 continue | 305 continue |
| 292 transition_partition = reverse_partition_map[transition_id] | 306 transition_partition = state_id_to_partition[transition_state_id] |
| 293 assert transition_partition | 307 assert transition_partition |
| 294 if verify: | 308 if verify: |
| 309 # There is a transition for that key from state; check that all other |
| 310 # states in the partition have an equivalent transition (transition |
| 311 # into the same partition). |
| 295 for s_id in state_ids: | 312 for s_id in state_ids: |
| 296 transition = id_map[s_id].transition_state_for_key(key) | 313 transition = id_to_state[s_id].transition_state_for_key(key) |
| 297 assert transition | 314 assert transition |
| 298 test_partition = reverse_partition_map[reverse_id_map[transition]] | 315 test_partition = state_id_to_partition[state_to_id[transition]] |
| 299 assert test_partition == transition_partition | 316 assert test_partition == transition_partition |
| 300 node['transitions'][key] = name_map[transition_partition] | 317 # Record the transition between partitions. |
| 301 start_id = reverse_id_map[self.__dfa.start_state()] | 318 node['transitions'][key] = partition_to_name[transition_partition] |
| 302 start_name = name_map[reverse_partition_map[start_id]] | 319 start_id = state_to_id[self.__dfa.start_state()] |
| 303 return (start_name, mapping) | 320 start_name = partition_to_name[state_id_to_partition[start_id]] |
| 321 return (start_name, partition_name_to_node) |
| 304 | 322 |
| 305 def minimize(self): | 323 def minimize(self): |
| 306 '''Minimize the dfa. For the general idea of minimizing automata, see | 324 '''Minimize the dfa. For the general idea of minimizing automata, see |
| 307 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take | 325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take |
| 308 account the actions associated to stages, i.e., we cannot merge two states | 326 account the actions associated to stages, i.e., we cannot merge two states |
| 309 which have different actions.''' | 327 which have different actions.''' |
| 310 (working_set, partitions) = self.__create_initial_partitions() | 328 (working_set, partitions) = self.__create_initial_partitions() |
| 311 node_count = self.__dfa.node_count() | 329 node_count = self.__dfa.node_count() |
| 312 id_map = self.__id_map | 330 id_to_state = self.__id_to_state |
| 313 reverse_id_map = self.__reverse_id_map | 331 state_to_id = self.__state_to_id |
| 332 # transitions is a 2-dimensional array indexed by state_id and index of the |
| 333 # TransitionKey in the alphabet. |
| 334 # transitions[state_id][transition_key_ix] = transition_state_id |
| 314 transitions = self.__transitions | 335 transitions = self.__transitions |
| 315 key_range = range(0, len(self.__alphabet)) | 336 key_range = range(0, len(self.__alphabet)) |
| 316 while working_set: | 337 while working_set: |
| 317 assert working_set <= partitions | 338 assert working_set <= partitions |
| 318 assert self.__partition_count(partitions) == node_count | 339 assert self.__partition_count(partitions) == node_count |
| 340 # We split other partitions according to test_partition: If a partition |
| 341 # contains two states S1 and S2, such that S1 transitions to a state in |
| 342 # test_partition with a TransitionKey K, and S2 transitions to a state |
| 343 # *not* in test_partition with the same K, then S1 and S2 cannot be in the |
| 344 # same partition. |
| 319 test_partition = iter(working_set).next() | 345 test_partition = iter(working_set).next() |
| 320 working_set.remove(test_partition) | 346 working_set.remove(test_partition) |
| 321 test_array = [False for x in range(0, len(id_map))] | 347 state_in_test_partition = [False] * len(id_to_state) |
| 322 for x in test_partition: | 348 for state_id in test_partition: |
| 323 test_array[x] = True | 349 state_in_test_partition[state_id] = True |
| 324 for key_index in key_range: | 350 for key_index in key_range: |
| 325 map_into_partition = set() | 351 # states_transitioning_to_test_partition will contain the state_ids of |
| 326 for state_id, transition_array in transitions.items(): | 352 # the states (across all partitions) which transition into |
| 327 transition_id = transition_array[key_index] | 353 # test_partition (any state within test_partition) with that key. |
| 328 if transition_id != None and test_array[transition_id]: | 354 states_transitioning_to_test_partition = set() |
| 329 map_into_partition.add(state_id) | 355 for state_id, transition_key_to_state_id in transitions.items(): |
| 330 if not map_into_partition: | 356 transition_state_id = transition_key_to_state_id[key_index] |
| 357 if (transition_state_id and |
| 358 state_in_test_partition[transition_state_id]): |
| 359 states_transitioning_to_test_partition.add(state_id) |
| 360 if not states_transitioning_to_test_partition: |
| 331 continue | 361 continue |
| 362 # For each partition, we need to split it in two: {states which |
| 363 # transition into test_partition, states which don't}. |
| 332 new_partitions = set() | 364 new_partitions = set() |
| 333 old_partitions = set() | 365 old_partitions = set() |
| 334 for p in partitions: | 366 for p in partitions: |
| 335 intersection = p.set().intersection(map_into_partition) | 367 intersection = p.set().intersection( |
| 368 states_transitioning_to_test_partition) |
| 336 if not intersection: | 369 if not intersection: |
| 337 continue | 370 continue |
| 338 difference = p.set().difference(map_into_partition) | 371 difference = p.set().difference( |
| 372 states_transitioning_to_test_partition) |
| 339 if not difference: | 373 if not difference: |
| 340 continue | 374 continue |
| 341 intersection = StatePartition(intersection) | 375 intersection = StatePartition(intersection) |
| 342 difference = StatePartition(difference) | 376 difference = StatePartition(difference) |
| 343 old_partitions.add(p) | 377 old_partitions.add(p) |
| 344 new_partitions.add(intersection) | 378 new_partitions.add(intersection) |
| 345 new_partitions.add(difference) | 379 new_partitions.add(difference) |
| 346 if p in working_set: | 380 if p in working_set: |
| 347 working_set.remove(p) | 381 working_set.remove(p) |
| 348 working_set.add(intersection) | 382 working_set.add(intersection) |
| 349 working_set.add(difference) | 383 working_set.add(difference) |
| 350 elif len(intersection) <= len(difference): | 384 elif len(intersection) <= len(difference): |
| 351 working_set.add(intersection) | 385 working_set.add(intersection) |
| 352 else: | 386 else: |
| 353 working_set.add(difference) | 387 working_set.add(difference) |
| 354 if old_partitions: | 388 if old_partitions: |
| 355 partitions -= old_partitions | 389 partitions -= old_partitions |
| 356 if new_partitions: | 390 if new_partitions: |
| 357 partitions |= new_partitions | 391 partitions |= new_partitions |
| 358 (start_name, mapping) = self.__merge_partitions(partitions) | 392 (start_name, mapping) = self.__create_states_from_partitions(partitions) |
| 359 return Dfa(start_name, mapping) | 393 return Dfa(start_name, mapping) |
| OLD | NEW |