| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from itertools import chain | 28 from itertools import chain |
| 29 from automaton import * | 29 from automaton import * |
| 30 from transition_keys import TransitionKey | 30 from transition_key import TransitionKey |
| 31 | 31 |
| 32 class DfaState(AutomatonState): | 32 class DfaState(AutomatonState): |
| 33 | 33 |
| 34 def __init__(self, name, action, transitions): | 34 def __init__(self, name, action, transitions): |
| 35 super(DfaState, self).__init__() | 35 super(DfaState, self).__init__() |
| 36 self.__name = name | 36 self.__name = name |
| 37 self.__transitions = transitions | 37 self.__transitions = transitions |
| 38 self.__action = action | 38 self.__action = action |
| 39 assert isinstance(action, Action) | 39 assert isinstance(action, Action) |
| 40 | 40 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 109 assert self.__node_count == state_count | 109 assert self.__node_count == state_count |
| 110 | 110 |
| 111 def node_count(self): | 111 def node_count(self): |
| 112 return self.__node_count | 112 return self.__node_count |
| 113 | 113 |
| 114 def start_state(self): | 114 def start_state(self): |
| 115 return self.__start | 115 return self.__start |
| 116 | 116 |
| 117 def terminal_set(self): | 117 def terminal_set(self): |
| 118 return set(self.__terminal_set) | 118 return set(self.__terminal_set) |
| 119 | |
| 120 def minimize(self): | |
| 121 return DfaMinimizer(self).minimize() | |
| 122 | |
| 123 class StatePartition(object): | |
| 124 | |
| 125 def __init__(self, node_numbers): | |
| 126 self.__node_numbers = node_numbers | |
| 127 assert self.__node_numbers | |
| 128 self.__hash = None | |
| 129 | |
| 130 def set(self): | |
| 131 return self.__node_numbers | |
| 132 | |
| 133 def __hash__(self): | |
| 134 if not self.__hash: | |
| 135 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0) | |
| 136 return self.__hash | |
| 137 | |
| 138 def __eq__(self, other): | |
| 139 return (isinstance(other, self.__class__) and | |
| 140 self.__node_numbers == other.__node_numbers) | |
| 141 | |
| 142 def __len__(self): | |
| 143 return len(self.__node_numbers) | |
| 144 | |
| 145 def __iter__(self): | |
| 146 return self.__node_numbers.__iter__() | |
| 147 | |
| 148 def __str__(self): | |
| 149 return str([x for x in self.__node_numbers]) | |
| 150 | |
| 151 class DfaMinimizer: | |
| 152 | |
| 153 __verify = True | |
| 154 | |
| 155 @staticmethod | |
| 156 def set_verify(verify): | |
| 157 DfaMinimizer.__verify = verify | |
| 158 | |
| 159 def __init__(self, dfa): | |
| 160 self.__dfa = dfa | |
| 161 self.__id_to_state = None | |
| 162 self.__state_to_id = None | |
| 163 self.__alphabet = None | |
| 164 | |
| 165 def __create_initial_partitions(self): | |
| 166 assert not self.__id_to_state | |
| 167 assert not self.__state_to_id | |
| 168 assert not self.__alphabet | |
| 169 | |
| 170 # For the minimization, we associate each state with an ID. A set of states | |
| 171 # is represented as a set of state IDs. | |
| 172 id_to_state = {} | |
| 173 | |
| 174 # First we partition the states into the following groups: | |
| 175 # - terminal states without action | |
| 176 # - nonterminal states without action | |
| 177 # - one group per action: terminal states which have the action | |
| 178 # - one group per action: nonterminal states which have the action | |
| 179 # These are the keys of initial_partitions. The values are lists of state | |
| 180 # IDs. | |
| 181 initial_partitions = {} | |
| 182 terminal_set = self.__dfa.terminal_set() | |
| 183 all_keys = [] # Will contain all TransitionKeys in the dfa. | |
| 184 | |
| 185 # f fills in initial_partitions, id_to_state and all_keys. | |
| 186 def f(state, visitor_state): | |
| 187 state_id = len(id_to_state) | |
| 188 id_to_state[state_id] = state | |
| 189 action = state.action() | |
| 190 all_keys.append(state.key_iter()) | |
| 191 if state in terminal_set: | |
| 192 key = ("terminal set", action) | |
| 193 else: | |
| 194 # Match actions are valid only if we have matched the whole token, not | |
| 195 # just some sub-part of it. | |
| 196 # TODO(dcarney): restore | |
| 197 # assert not action.is_match_action() | |
| 198 key = ("nonterminal set", action) | |
| 199 if not key in initial_partitions: | |
| 200 initial_partitions[key] = set() | |
| 201 initial_partitions[key].add(state_id) | |
| 202 self.__dfa.visit_all_states(f) | |
| 203 partitions = set() | |
| 204 working_set = set() | |
| 205 | |
| 206 # Create StatePartition objects: | |
| 207 for k, p in initial_partitions.items(): | |
| 208 p = StatePartition(p) | |
| 209 partitions.add(p) | |
| 210 working_set.add(p) | |
| 211 state_to_id = {v : k for k, v in id_to_state.items()} | |
| 212 assert len(id_to_state) == len(state_to_id) | |
| 213 self.__state_to_id = state_to_id | |
| 214 self.__id_to_state = id_to_state | |
| 215 | |
| 216 # The alphabet defines the TransitionKeys we need to check when dedicing if | |
| 217 # two states of the dfa can be in the same partition. See the doc string of | |
| 218 # TransitionKey.disjoint_keys. | |
| 219 | |
| 220 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is | |
| 221 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with | |
| 222 # TransitionKey [a-d], and state S3 has a transition to S4 with | |
| 223 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will | |
| 224 # become clear when we check the transition for TransitionKey [c-d] (S1 has | |
| 225 # a transition to S2, S3 has a transition to S4). | |
| 226 encoding = self.__dfa.encoding() | |
| 227 self.__alphabet = list( | |
| 228 TransitionKey.disjoint_keys(encoding, chain(*all_keys))) | |
| 229 | |
| 230 # For each state and each TransitionKey in the alphabet, find out which | |
| 231 # transition we take from the state with the TransitionKey. | |
| 232 transitions = {} | |
| 233 for state_id, state in id_to_state.items(): | |
| 234 def f((key_id, key)): | |
| 235 transition_state = state.transition_state_for_key(key) | |
| 236 if transition_state: | |
| 237 return state_to_id[transition_state] | |
| 238 return None | |
| 239 transitions[state_id] = map(f, enumerate(self.__alphabet)) | |
| 240 self.__transitions = transitions | |
| 241 # verify created structures | |
| 242 if self.__verify: | |
| 243 for partition in partitions: | |
| 244 for state_id in partition: | |
| 245 transition_state_array = transitions[state_id] | |
| 246 state = id_to_state[state_id] | |
| 247 for key_id, key in enumerate(self.__alphabet): | |
| 248 transition_state_id = transition_state_array[key_id] | |
| 249 transition_state = state.transition_state_for_key(key) | |
| 250 if not transition_state: | |
| 251 assert transition_state_id == None | |
| 252 else: | |
| 253 assert transition_state_id != None | |
| 254 assert transition_state_id == state_to_id[transition_state] | |
| 255 return (working_set, partitions) | |
| 256 | |
| 257 @staticmethod | |
| 258 def __partition_count(partitions): | |
| 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) | |
| 260 | |
| 261 def __create_states_from_partitions(self, partitions): | |
| 262 '''Creates a new set of states where each state corresponds to a | |
| 263 partition.''' | |
| 264 id_to_state = self.__id_to_state | |
| 265 state_to_id = self.__state_to_id | |
| 266 verify = self.__verify | |
| 267 partition_to_name = {} | |
| 268 state_id_to_partition = {} | |
| 269 | |
| 270 # Fill in partition_to_name and state_id_to_partition. | |
| 271 for partition in partitions: | |
| 272 partition_to_name[partition] = str(partition) | |
| 273 for state_id in partition: | |
| 274 state_id_to_partition[state_id] = partition | |
| 275 transitions = self.__transitions | |
| 276 | |
| 277 # Create nodes for partitions. | |
| 278 partition_name_to_node = {} | |
| 279 for partition in partitions: | |
| 280 state_ids = list(partition) | |
| 281 # state is a representative state for the partition, and state_id is it's | |
| 282 # ID. To check the transitions between partitions, it's enough to check | |
| 283 # transitions from the representative state. All other states will have | |
| 284 # equivalent transitions, that is, transitions which transition into the | |
| 285 # same partition. | |
| 286 state_id = state_ids[0] | |
| 287 state = id_to_state[state_id] | |
| 288 node = { | |
| 289 'transitions' : {}, | |
| 290 'terminal' : state in self.__dfa.terminal_set(), | |
| 291 'action' : state.action(), | |
| 292 } | |
| 293 partition_name_to_node[str(partition)] = node | |
| 294 transition_key_to_state_id = transitions[state_id] | |
| 295 for key_id, key in enumerate(self.__alphabet): | |
| 296 transition_state_id = transition_key_to_state_id[key_id] | |
| 297 if transition_state_id == None: | |
| 298 if verify: | |
| 299 # There is no transition for that key from state; check that no | |
| 300 # other states in the partition have a transition with that key | |
| 301 # either. | |
| 302 assert not state.transition_state_for_key(key) | |
| 303 for s_id in state_ids: | |
| 304 assert not id_to_state[s_id].transition_state_for_key(key) | |
| 305 continue | |
| 306 transition_partition = state_id_to_partition[transition_state_id] | |
| 307 assert transition_partition | |
| 308 if verify: | |
| 309 # There is a transition for that key from state; check that all other | |
| 310 # states in the partition have an equivalent transition (transition | |
| 311 # into the same partition). | |
| 312 for s_id in state_ids: | |
| 313 transition = id_to_state[s_id].transition_state_for_key(key) | |
| 314 assert transition | |
| 315 test_partition = state_id_to_partition[state_to_id[transition]] | |
| 316 assert test_partition == transition_partition | |
| 317 # Record the transition between partitions. | |
| 318 node['transitions'][key] = partition_to_name[transition_partition] | |
| 319 start_id = state_to_id[self.__dfa.start_state()] | |
| 320 start_name = partition_to_name[state_id_to_partition[start_id]] | |
| 321 return (start_name, partition_name_to_node) | |
| 322 | |
| 323 def minimize(self): | |
| 324 '''Minimize the dfa. For the general idea of minimizing automata, see | |
| 325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take | |
| 326 account the actions associated to stages, i.e., we cannot merge two states | |
| 327 which have different actions.''' | |
| 328 (working_set, partitions) = self.__create_initial_partitions() | |
| 329 node_count = self.__dfa.node_count() | |
| 330 id_to_state = self.__id_to_state | |
| 331 state_to_id = self.__state_to_id | |
| 332 # transitions is a 2-dimensional array indexed by state_id and index of the | |
| 333 # TransitionKey in the alphabet. | |
| 334 # transitions[state_id][transition_key_ix] = transition_state_id | |
| 335 transitions = self.__transitions | |
| 336 key_range = range(0, len(self.__alphabet)) | |
| 337 while working_set: | |
| 338 assert working_set <= partitions | |
| 339 assert self.__partition_count(partitions) == node_count | |
| 340 # We split other partitions according to test_partition: If a partition | |
| 341 # contains two states S1 and S2, such that S1 transitions to a state in | |
| 342 # test_partition with a TransitionKey K, and S2 transitions to a state | |
| 343 # *not* in test_partition with the same K, then S1 and S2 cannot be in the | |
| 344 # same partition. | |
| 345 test_partition = iter(working_set).next() | |
| 346 working_set.remove(test_partition) | |
| 347 state_in_test_partition = [False] * len(id_to_state) | |
| 348 for state_id in test_partition: | |
| 349 state_in_test_partition[state_id] = True | |
| 350 for key_index in key_range: | |
| 351 # states_transitioning_to_test_partition will contain the state_ids of | |
| 352 # the states (across all partitions) which transition into | |
| 353 # test_partition (any state within test_partition) with that key. | |
| 354 states_transitioning_to_test_partition = set() | |
| 355 for state_id, transition_key_to_state_id in transitions.items(): | |
| 356 transition_state_id = transition_key_to_state_id[key_index] | |
| 357 if (transition_state_id and | |
| 358 state_in_test_partition[transition_state_id]): | |
| 359 states_transitioning_to_test_partition.add(state_id) | |
| 360 if not states_transitioning_to_test_partition: | |
| 361 continue | |
| 362 # For each partition, we need to split it in two: {states which | |
| 363 # transition into test_partition, states which don't}. | |
| 364 new_partitions = set() | |
| 365 old_partitions = set() | |
| 366 for p in partitions: | |
| 367 intersection = p.set().intersection( | |
| 368 states_transitioning_to_test_partition) | |
| 369 if not intersection: | |
| 370 continue | |
| 371 difference = p.set().difference( | |
| 372 states_transitioning_to_test_partition) | |
| 373 if not difference: | |
| 374 continue | |
| 375 intersection = StatePartition(intersection) | |
| 376 difference = StatePartition(difference) | |
| 377 old_partitions.add(p) | |
| 378 new_partitions.add(intersection) | |
| 379 new_partitions.add(difference) | |
| 380 if p in working_set: | |
| 381 working_set.remove(p) | |
| 382 working_set.add(intersection) | |
| 383 working_set.add(difference) | |
| 384 elif len(intersection) <= len(difference): | |
| 385 working_set.add(intersection) | |
| 386 else: | |
| 387 working_set.add(difference) | |
| 388 if old_partitions: | |
| 389 partitions -= old_partitions | |
| 390 if new_partitions: | |
| 391 partitions |= new_partitions | |
| 392 (start_name, mapping) = self.__create_states_from_partitions(partitions) | |
| 393 return Dfa(self.__dfa.encoding(), start_name, mapping) | |
| OLD | NEW |