| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 152 yield (state.action(), last_position, len(string)) | 152 yield (state.action(), last_position, len(string)) |
| 153 | 153 |
| 154 def minimize(self): | 154 def minimize(self): |
| 155 return DfaMinimizer(self).minimize() | 155 return DfaMinimizer(self).minimize() |
| 156 | 156 |
| 157 class StatePartition(object): | 157 class StatePartition(object): |
| 158 | 158 |
| 159 def __init__(self, node_numbers): | 159 def __init__(self, node_numbers): |
| 160 self.__node_numbers = node_numbers | 160 self.__node_numbers = node_numbers |
| 161 assert self.__node_numbers | 161 assert self.__node_numbers |
| 162 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0) | 162 self.__hash = None |
| 163 | 163 |
| 164 def set(self): | 164 def set(self): |
| 165 return self.__node_numbers | 165 return self.__node_numbers |
| 166 | 166 |
| 167 def __hash__(self): | 167 def __hash__(self): |
| 168 if not self.__hash: |
| 169 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0) |
| 168 return self.__hash | 170 return self.__hash |
| 169 | 171 |
| 170 def __eq__(self, other): | 172 def __eq__(self, other): |
| 171 return (isinstance(other, self.__class__) and | 173 return (isinstance(other, self.__class__) and |
| 172 self.__node_numbers == other.__node_numbers) | 174 self.__node_numbers == other.__node_numbers) |
| 173 | 175 |
| 174 def __len__(self): | 176 def __len__(self): |
| 175 return len(self.__node_numbers) | 177 return len(self.__node_numbers) |
| 176 | 178 |
| 177 def __iter__(self): | 179 def __iter__(self): |
| 178 return self.__node_numbers.__iter__() | 180 return self.__node_numbers.__iter__() |
| 179 | 181 |
| 180 def __str__(self): | 182 def __str__(self): |
| 181 return str([x for x in self.__node_numbers]) | 183 return str([x for x in self.__node_numbers]) |
| 182 | 184 |
| 183 class DfaMinimizer: | 185 class DfaMinimizer: |
| 184 | 186 |
| 187 __verify = True |
| 188 |
| 189 @staticmethod |
| 190 def set_verify(verify): |
| 191 DfaMinimizer.__verify = verify |
| 192 |
| 185 def __init__(self, dfa): | 193 def __init__(self, dfa): |
| 186 self.__dfa = dfa | 194 self.__dfa = dfa |
| 195 self.__id_map = None |
| 196 self.__reverse_id_map = None |
| 197 self.__alphabet = None |
| 187 | 198 |
| 188 def __partition(self): | 199 def __partition(self): |
| 200 assert not self.__id_map |
| 201 assert not self.__reverse_id_map |
| 202 assert not self.__alphabet |
| 189 action_map = {} | 203 action_map = {} |
| 190 id_map = {} | 204 id_map = {} |
| 191 terminal_set = self.__dfa.terminal_set() | 205 terminal_set = self.__dfa.terminal_set() |
| 206 all_keys = [] |
| 192 def f(state, visitor_state): | 207 def f(state, visitor_state): |
| 193 node_number = len(id_map) | 208 state_id = len(id_map) |
| 194 id_map[node_number] = state | 209 id_map[state_id] = state |
| 195 action = state.action() | 210 action = state.action() |
| 211 all_keys.append(state.key_iter()) |
| 196 if action: | 212 if action: |
| 197 # TODO add this back | 213 # TODO add this back |
| 198 # assert state in self.__terminal_set | 214 # assert state in self.__terminal_set |
| 199 if state not in terminal_set: | 215 if state not in terminal_set: |
| 200 action = "nonterminal action " + str(action) | 216 action = "nonterminal action " + str(action) |
| 201 elif state in terminal_set: | 217 elif state in terminal_set: |
| 202 action = "terminal_set" | 218 action = "terminal_set" |
| 203 if not action in action_map: | 219 if not action in action_map: |
| 204 action_map[action] = set() | 220 action_map[action] = set() |
| 205 action_map[action].add(node_number) | 221 action_map[action].add(state_id) |
| 206 self.__dfa.visit_all_states(f) | 222 self.__dfa.visit_all_states(f) |
| 207 partitions = set() | 223 partitions = set() |
| 208 for p in action_map.values(): | 224 for p in action_map.values(): |
| 209 assert p | |
| 210 partitions.add(StatePartition(p)) | 225 partitions.add(StatePartition(p)) |
| 211 return (id_map, partitions) | 226 reverse_id_map = {v : k for k, v in id_map.items()} |
| 212 | 227 assert len(id_map) == len(reverse_id_map) |
| 213 def __generate_alphabet(self): | 228 self.__reverse_id_map = reverse_id_map |
| 214 keys = [] | 229 self.__id_map = id_map |
| 215 self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter())) | 230 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) |
| 216 return TransitionKey.disjoint_keys(chain(*keys)) | 231 # map transitions wrt alphabet mapping |
| 217 | 232 transitions = {} |
| 218 @staticmethod | 233 for state_id, state in id_map.items(): |
| 219 def __find_partition(partitions, id): | 234 def f((key_id, key)): |
| 220 for p in partitions: | 235 transition = state.key_matches(key) |
| 221 if id in p: | 236 if transition: |
| 222 return p | 237 return reverse_id_map[transition] |
| 223 assert False | 238 return None |
| 224 | 239 transitions[state_id] = map(f, enumerate(self.__alphabet)) |
| 225 def __verify_partitions(self, id_map, partitions): | 240 self.__transitions = transitions |
| 226 assert len(partitions) <= len(id_map) | 241 # verify created structures |
| 227 alphabet = self.__generate_alphabet() | 242 if self.__verify: |
| 228 for partition in partitions: | 243 for partition in partitions: |
| 229 for key in alphabet: | |
| 230 first = True | |
| 231 mapped_partition = None | |
| 232 for state_id in partition: | 244 for state_id in partition: |
| 233 s = id_map[state_id].key_matches(key) | 245 transition_array = transitions[state_id] |
| 234 if s: | 246 state = id_map[state_id] |
| 235 s = self.__find_partition(partitions, s.node_number()) | 247 for key_id, key in enumerate(self.__alphabet): |
| 236 if first: | 248 transition_id = transition_array[key_id] |
| 237 first = False | 249 transition_state = state.key_matches(key) |
| 238 mapped_partition = s | 250 if not transition_state: |
| 239 assert mapped_partition == s | 251 assert transition_id == None |
| 252 else: |
| 253 assert transition_id != None |
| 254 assert transition_id == reverse_id_map[transition_state] |
| 255 return partitions |
| 240 | 256 |
| 241 @staticmethod | 257 @staticmethod |
| 242 def __partition_count(partitions): | 258 def __partition_count(partitions): |
| 243 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) | 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) |
| 244 | 260 |
| 245 def __merge_partitions(self, id_map, partitions): | 261 def __merge_partitions(self, partitions): |
| 262 id_map = self.__id_map |
| 263 reverse_id_map = self.__reverse_id_map |
| 264 verify = self.__verify |
| 246 mapping = {} | 265 mapping = {} |
| 247 name_map = {} | 266 name_map = {} |
| 267 reverse_partition_map = {} |
| 248 for partition in partitions: | 268 for partition in partitions: |
| 249 name_map[partition] = str(partition) | 269 name_map[partition] = str(partition) |
| 250 alphabet = self.__generate_alphabet() | 270 for state_id in partition: |
| 251 reverse_id_map = {v:k for k, v in id_map.items()} | 271 reverse_partition_map[state_id] = partition |
| 272 transitions = self.__transitions |
| 252 for partition in partitions: | 273 for partition in partitions: |
| 253 state_id = iter(partition).next() | 274 state_ids = list(partition) |
| 275 state_id = state_ids[0] |
| 254 state = id_map[state_id] | 276 state = id_map[state_id] |
| 255 node = { | 277 node = { |
| 256 'transitions' : {}, | 278 'transitions' : {}, |
| 257 'terminal' : state in self.__dfa.terminal_set(), | 279 'terminal' : state in self.__dfa.terminal_set(), |
| 258 'action' : state.action(), | 280 'action' : state.action(), |
| 259 } | 281 } |
| 260 mapping[str(partition)] = node | 282 mapping[str(partition)] = node |
| 261 for key in alphabet: | 283 transition_array = transitions[state_id] |
| 262 transition_state = state.key_matches(key) | 284 for key_id, key in enumerate(self.__alphabet): |
| 263 if not transition_state: | 285 transition_id = transition_array[key_id] |
| 286 if transition_id == None: |
| 287 if verify: |
| 288 assert not state.key_matches(key) |
| 289 for s_id in state_ids: |
| 290 assert not id_map[s_id].key_matches(key) |
| 264 continue | 291 continue |
| 265 transition_id = reverse_id_map[transition_state] | 292 transition_partition = reverse_partition_map[transition_id] |
| 266 transition_partition = self.__find_partition(partitions, transition_id) | |
| 267 assert transition_partition | 293 assert transition_partition |
| 294 if verify: |
| 295 for s_id in state_ids: |
| 296 transition = id_map[s_id].key_matches(key) |
| 297 assert transition |
| 298 test_partition = reverse_partition_map[reverse_id_map[transition]] |
| 299 assert test_partition == transition_partition |
| 268 node['transitions'][key] = name_map[transition_partition] | 300 node['transitions'][key] = name_map[transition_partition] |
| 269 start_id = reverse_id_map[self.__dfa.start_state()] | 301 start_id = reverse_id_map[self.__dfa.start_state()] |
| 270 start_name = name_map[self.__find_partition(partitions, start_id)] | 302 start_name = name_map[reverse_partition_map[start_id]] |
| 271 return (start_name, mapping) | 303 return (start_name, mapping) |
| 272 | 304 |
| 273 def minimize(self): | 305 def minimize(self): |
| 274 (id_map, partitions) = self.__partition() | 306 partitions = self.__partition() |
| 275 node_count = self.__dfa.node_count() | 307 node_count = self.__dfa.node_count() |
| 276 assert self.__partition_count(partitions) == node_count | 308 id_map = self.__id_map |
| 277 if len(partitions) == 1: | 309 reverse_id_map = self.__reverse_id_map |
| 278 return self.__dfa | 310 transitions = self.__transitions |
| 311 key_range = range(0, len(self.__alphabet)) |
| 279 working_set = set(partitions) | 312 working_set = set(partitions) |
| 280 # map alphabet | |
| 281 alphabet_mapping = {} | |
| 282 for i, key in enumerate(self.__generate_alphabet()): | |
| 283 alphabet_mapping[key] = i | |
| 284 key_range = range(0, len(alphabet_mapping)) | |
| 285 # map transitions wrt alphabet mapping | |
| 286 reverse_id_map = {v:k for k, v in id_map.items()} | |
| 287 transitions = {} | |
| 288 for id, state in id_map.items(): | |
| 289 def f((key, key_id)): | |
| 290 transition = state.key_matches(key) | |
| 291 if transition: | |
| 292 return reverse_id_map[transition] | |
| 293 return None | |
| 294 transitions[id] = map(f, alphabet_mapping.items()) | |
| 295 | |
| 296 while working_set: | 313 while working_set: |
| 297 # print "working_set %s partitions %s nodes %s" % (len(working_set), | |
| 298 # len(partitions), | |
| 299 # node_count) | |
| 300 assert working_set <= partitions | 314 assert working_set <= partitions |
| 301 assert self.__partition_count(partitions) == node_count | 315 assert self.__partition_count(partitions) == node_count |
| 302 test_partition = iter(working_set).next() | 316 test_partition = iter(working_set).next() |
| 303 working_set.remove(test_partition) | 317 working_set.remove(test_partition) |
| 304 test_array = [False for x in range(0, len(id_map))] | 318 test_array = [False for x in range(0, len(id_map))] |
| 305 for x in test_partition: | 319 for x in test_partition: |
| 306 test_array[x] = True | 320 test_array[x] = True |
| 307 for key_index in key_range: | 321 for key_index in key_range: |
| 308 map_into_partition = set() | 322 map_into_partition = set() |
| 309 for state_id, transition_array in transitions.items(): | 323 for state_id, transition_array in transitions.items(): |
| 310 transition_id = transition_array[key_index] | 324 transition_id = transition_array[key_index] |
| 311 if transition_id and test_array[transition_id]: | 325 if transition_id != None and test_array[transition_id]: |
| 312 map_into_partition.add(state_id) | 326 map_into_partition.add(state_id) |
| 313 if not map_into_partition: | 327 if not map_into_partition: |
| 314 continue | 328 continue |
| 315 new_partitions = set() | 329 new_partitions = set() |
| 316 old_partitions = set() | 330 old_partitions = set() |
| 317 for p in partitions: | 331 for p in partitions: |
| 318 intersection = p.set().intersection(map_into_partition) | 332 intersection = p.set().intersection(map_into_partition) |
| 319 if not intersection: | 333 if not intersection: |
| 320 continue | 334 continue |
| 321 difference = p.set().difference(map_into_partition) | 335 difference = p.set().difference(map_into_partition) |
| 322 if not difference: | 336 if not difference: |
| 323 continue | 337 continue |
| 324 intersection = StatePartition(intersection) | 338 intersection = StatePartition(intersection) |
| 325 difference = StatePartition(difference) | 339 difference = StatePartition(difference) |
| 326 old_partitions.add(p) | 340 old_partitions.add(p) |
| 327 new_partitions.add(intersection) | 341 new_partitions.add(intersection) |
| 328 new_partitions.add(difference) | 342 new_partitions.add(difference) |
| 329 if p in working_set: | 343 if p in working_set: |
| 330 working_set.remove(p) | 344 working_set.remove(p) |
| 331 working_set.add(intersection) | 345 working_set.add(intersection) |
| 332 working_set.add(difference) | 346 working_set.add(difference) |
| 333 elif len(intersection) <= len(difference): | 347 elif len(intersection) <= len(difference): |
| 334 working_set.add(intersection) | 348 working_set.add(intersection) |
| 335 else: | 349 else: |
| 336 working_set.add(difference) | 350 working_set.add(difference) |
| 337 if old_partitions: | 351 if old_partitions: |
| 338 partitions -= old_partitions | 352 partitions -= old_partitions |
| 339 if new_partitions: | 353 if new_partitions: |
| 340 partitions |= new_partitions | 354 partitions |= new_partitions |
| 341 # self.__verify_partitions(id_map, partitions) | 355 (start_name, mapping) = self.__merge_partitions(partitions) |
| 342 if len(partitions) == len(id_map): | |
| 343 return self.__dfa | |
| 344 (start_name, mapping) = self.__merge_partitions(id_map, partitions) | |
| 345 return Dfa(start_name, mapping) | 356 return Dfa(start_name, mapping) |
| OLD | NEW |