| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from types import TupleType | 28 from types import TupleType |
| 29 from transition_keys import TransitionKey |
| 29 from inspect import getmembers | 30 from inspect import getmembers |
| 30 | 31 |
| 31 | |
| 32 class NfaState: | 32 class NfaState: |
| 33 | 33 |
| 34 __epsilon_key = "epsilon" | |
| 35 __any_key = "any" | |
| 36 | |
| 37 @staticmethod | |
| 38 def epsilon_key(): | |
| 39 return NfaState.__epsilon_key | |
| 40 | |
| 41 @staticmethod | |
| 42 def any_key(): | |
| 43 return NfaState.__any_key | |
| 44 | |
| 45 def __init__(self, node_number): | 34 def __init__(self, node_number): |
| 46 self.__transitions = {} | 35 self.__transitions = {} |
| 47 self.__unclosed = set() | 36 self.__unclosed = set() |
| 48 self.__node_number = node_number | 37 self.__node_number = node_number |
| 49 self.__epsilon_closure = None | 38 self.__epsilon_closure = None |
| 50 | 39 |
| 51 def node_number(self): | 40 def node_number(self): |
| 52 return self.__node_number | 41 return self.__node_number |
| 53 | 42 |
| 54 def epsilon_closure(self): | 43 def epsilon_closure(self): |
| 55 return self.__epsilon_closure | 44 return self.__epsilon_closure |
| 56 | 45 |
| 57 def set_epsilon_closure(self, closure): | 46 def set_epsilon_closure(self, closure): |
| 58 assert self.is_closed() | 47 assert self.is_closed() |
| 59 assert self.__epsilon_closure == None | 48 assert self.__epsilon_closure == None |
| 60 self.__epsilon_closure = frozenset(closure) | 49 self.__epsilon_closure = frozenset(closure) |
| 61 | 50 |
| 62 def transitions(self): | 51 def transitions(self): |
| 63 assert self.is_closed() | 52 assert self.is_closed() |
| 64 return self.__transitions | 53 return self.__transitions |
| 65 | 54 |
| 66 def get_transition_set(self, key): | 55 def get_epsilon_transitions(self): |
| 56 key = TransitionKey.epsilon(); |
| 67 if not self.__transitions.has_key(key): return frozenset() | 57 if not self.__transitions.has_key(key): return frozenset() |
| 68 return frozenset(self.__transitions[key]) | 58 return frozenset(self.__transitions[key]) |
| 69 | 59 |
| 70 def __add_transition(self, key, value): | 60 def __add_transition(self, key, value): |
| 71 if value == None: | 61 if value == None: |
| 72 assert not self.is_closed(), "already closed" | 62 assert not self.is_closed(), "already closed" |
| 73 self.__unclosed.add(key) | 63 self.__unclosed.add(key) |
| 74 return | 64 return |
| 75 if not self.__transitions.has_key(key): | 65 if not self.__transitions.has_key(key): |
| 76 self.__transitions[key] = set() | 66 self.__transitions[key] = set() |
| 77 self.__transitions[key].add(value) | 67 self.__transitions[key].add(value) |
| 78 | 68 |
| 79 def add_character_transition(self, char): | 69 def add_unclosed_transition(self, key): |
| 80 self.__add_transition(char, None) | 70 assert key != TransitionKey.epsilon() |
| 81 | 71 self.__add_transition(key, None) |
| 82 def add_any_transition(self): | |
| 83 self.__add_transition(self.__any_key, None) | |
| 84 | |
| 85 def add_class_transition(self, class_data, inverted): | |
| 86 self.__add_transition('class_data', None) | |
| 87 | 72 |
| 88 def add_epsilon_transition(self, state): | 73 def add_epsilon_transition(self, state): |
| 89 assert state != None | 74 assert state != None |
| 90 self.__add_transition(self.__epsilon_key, state) | 75 self.__add_transition(TransitionKey.epsilon(), state) |
| 91 | 76 |
| 92 def is_closed(self): | 77 def is_closed(self): |
| 93 return self.__unclosed == None | 78 return self.__unclosed == None |
| 94 | 79 |
| 95 def close(self, end): | 80 def close(self, end): |
| 96 assert not self.is_closed() | 81 assert not self.is_closed() |
| 97 if end == None: | 82 if end == None: |
| 98 assert not self.__unclosed | 83 assert not self.__unclosed |
| 99 else: | 84 else: |
| 100 for key in self.__unclosed: | 85 for key in self.__unclosed: |
| 101 self.__add_transition(key, end) | 86 self.__add_transition(key, end) |
| 102 if not self.__unclosed: | 87 if not self.__unclosed: |
| 103 self.add_epsilon_transition(end) | 88 self.add_epsilon_transition(end) |
| 104 self.__unclosed = None | 89 self.__unclosed = None |
| 105 | 90 |
| 106 def matches(self, char): | 91 def __matches(self, match_func, value): |
| 107 matches = self.get_transition_set(char) | 92 matches = set() |
| 108 matches = matches.union(self.get_transition_set(self.__any_key)) | 93 for key, values in self.__transitions.items(): |
| 109 # TODO class matches | 94 if match_func(key, value): |
| 95 matches |= values |
| 110 return matches | 96 return matches |
| 111 | 97 |
| 98 def char_matches(self, value): |
| 99 return self.__matches((lambda k, v : k.matches_char(v)), value) |
| 100 |
| 101 def key_matches(self, value): |
| 102 return self.__matches((lambda k, v : k.matches_key(v)), value) |
| 103 |
| 112 @staticmethod | 104 @staticmethod |
| 113 def gather_transition_keys(state_set): | 105 def gather_transition_keys(state_set): |
| 114 keys = set() | 106 keys = set() |
| 115 for state in state_set: | 107 for state in state_set: |
| 116 for key, values in state.transitions().items(): | 108 keys |= set(state.__transitions.keys()) |
| 117 if key == NfaState.__epsilon_key: continue | 109 return TransitionKey.merge_key_set(keys) |
| 118 keys.add(key) | |
| 119 return keys | |
| 120 | 110 |
| 121 class NfaBuilder: | 111 class NfaBuilder: |
| 122 | 112 |
| 123 def __init__(self): | 113 def __init__(self): |
| 124 self.__operation_map = {} | 114 self.__operation_map = {} |
| 125 self.__members = getmembers(self) | 115 self.__members = getmembers(self) |
| 126 | 116 |
| 127 def __new_state(self): | 117 def __new_state(self): |
| 128 node_number = self.node_number | 118 node_number = self.node_number |
| 129 self.node_number = self.node_number + 1 | 119 self.node_number = self.node_number + 1 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 156 (node, ends) = self.__process(graph[1]) | 146 (node, ends) = self.__process(graph[1]) |
| 157 start = self.__new_state() | 147 start = self.__new_state() |
| 158 start.add_epsilon_transition(node) | 148 start.add_epsilon_transition(node) |
| 159 return (start, ends + [start]) | 149 return (start, ends + [start]) |
| 160 | 150 |
| 161 def __cat(self, graph): | 151 def __cat(self, graph): |
| 162 (left, right) = (self.__process(graph[1]), self.__process(graph[2])) | 152 (left, right) = (self.__process(graph[1]), self.__process(graph[2])) |
| 163 self.__patch_ends(left[1], right[0]) | 153 self.__patch_ends(left[1], right[0]) |
| 164 return (left[0], right[1]) | 154 return (left[0], right[1]) |
| 165 | 155 |
| 166 def __literal(self, graph): | 156 def __key_state(self, key): |
| 167 contents = graph[1] | |
| 168 state = self.__new_state() | 157 state = self.__new_state() |
| 169 state.add_character_transition(contents) | 158 state.add_unclosed_transition(key) |
| 170 return (state, [state]) | 159 return (state, [state]) |
| 171 | 160 |
| 161 def __literal(self, graph): |
| 162 return self.__key_state(TransitionKey.single_char(graph[1])) |
| 163 |
| 172 def __class(self, graph): | 164 def __class(self, graph): |
| 173 state = self.__new_state() | 165 return self.__key_state(TransitionKey.character_class(False, graph[1])) |
| 174 state.add_class_transition(graph[1], False) | |
| 175 return (state, [state]) | |
| 176 | 166 |
| 177 def __not_class(self, graph): | 167 def __not_class(self, graph): |
| 178 state = self.__new_state() | 168 return self.__key_state(TransitionKey.character_class(True, graph[1])) |
| 179 state.add_class_transition(graph[1], True) | |
| 180 return (state, [state]) | |
| 181 | 169 |
| 182 def __any(self, graph): | 170 def __any(self, graph): |
| 183 state = self.__new_state() | 171 return self.__key_state(TransitionKey.any()) |
| 184 state.add_any_transition() | |
| 185 return (state, [state]) | |
| 186 | 172 |
| 187 def __process(self, graph): | 173 def __process(self, graph): |
| 188 assert type(graph) == TupleType | 174 assert type(graph) == TupleType |
| 189 method = "_NfaBuilder__" + graph[0].lower() | 175 method = "_NfaBuilder__" + graph[0].lower() |
| 190 if (not self.__operation_map.has_key(method)): | 176 if (not self.__operation_map.has_key(method)): |
| 191 matches = [func for (name, func) in self.__members if name == method] | 177 matches = [func for (name, func) in self.__members if name == method] |
| 192 assert len(matches) == 1 | 178 assert len(matches) == 1 |
| 193 self.__operation_map[method] = matches[0] | 179 self.__operation_map[method] = matches[0] |
| 194 return self.__operation_map[method](graph) | 180 return self.__operation_map[method](graph) |
| 195 | 181 |
| 196 def __patch_ends(self, ends, new_end): | 182 def __patch_ends(self, ends, new_end): |
| 197 for end in ends: | 183 for end in ends: |
| 198 end.close(new_end) | 184 end.close(new_end) |
| 199 | 185 |
| 200 def nfa(self, graph): | 186 def nfa(self, graph): |
| 201 self.node_number = 0 | 187 self.node_number = 0 |
| 202 (start, ends) = self.__process(graph) | 188 (start, ends) = self.__process(graph) |
| 203 end = self.__new_state() | 189 end = self.__new_state() |
| 204 self.__patch_ends(ends, end) | 190 self.__patch_ends(ends, end) |
| 205 end.close(None) | 191 end.close(None) |
| 206 return Nfa(start, end, self.node_number) | 192 return Nfa(start, end, self.node_number) |
| 207 | 193 |
| 208 | |
| 209 class Nfa: | 194 class Nfa: |
| 210 | 195 |
| 211 def __init__(self, start, end, nodes_created): | 196 def __init__(self, start, end, nodes_created): |
| 212 self.__start = start | 197 self.__start = start |
| 213 self.__end = end | 198 self.__end = end |
| 214 self.__epsilon_closure_computed = False | 199 self.__epsilon_closure_computed = False |
| 215 self.__verify(nodes_created) | 200 self.__verify(nodes_created) |
| 216 | 201 |
| 217 @staticmethod | 202 @staticmethod |
| 218 def __visit_edges(start, include_start, just_epsilon, function, state): | 203 def __visit_edges(edge, compute_next_edge, visitor, state): |
| 219 | |
| 220 def compute_next_edge(node, next_edge): | |
| 221 if just_epsilon: | |
| 222 return next_edge.union(node.get_transition_set(node.epsilon_key())) | |
| 223 else: | |
| 224 for key, values in node.transitions().items(): | |
| 225 next_edge = next_edge.union(values) | |
| 226 return next_edge | |
| 227 | |
| 228 edge = set() | |
| 229 if include_start: | |
| 230 edge.add(start) | |
| 231 else: | |
| 232 edge = compute_next_edge(start, edge) | |
| 233 | |
| 234 visited = set() | 204 visited = set() |
| 235 while edge: | 205 while edge: |
| 236 next_edge = set() | 206 next_edge = set() |
| 237 for node in edge: | 207 for node in edge: |
| 238 next_edge = compute_next_edge(node, next_edge) | 208 next_edge |= compute_next_edge(node) |
| 239 state = function(node, state) | 209 state = visitor(node, state) |
| 240 visited = visited.union(edge) | 210 visited |= edge |
| 241 edge = next_edge.difference(visited) | 211 edge = next_edge - visited |
| 242 return state | 212 return state |
| 243 | 213 |
| 244 def __visit_all_edges(self, function, state): | 214 def __visit_all_edges(self, function, state): |
| 245 return Nfa.__visit_edges(self.__start, True, False, function, state) | 215 edge = set([self.__start]) |
| 216 def next_edge(node): |
| 217 next = set() |
| 218 for values in node.transitions().values(): |
| 219 next |= values |
| 220 return next |
| 221 return Nfa.__visit_edges(edge, next_edge, function, state) |
| 246 | 222 |
| 247 def __verify(self, nodes_created): | 223 def __verify(self, nodes_created): |
| 248 def f(node, node_list): | 224 def f(node, node_list): |
| 249 assert node.is_closed() | 225 assert node.is_closed() |
| 250 node_list.append(node) | 226 node_list.append(node) |
| 251 return node_list | 227 return node_list |
| 252 node_list = self.__visit_all_edges(f, []) | 228 node_list = self.__visit_all_edges(f, []) |
| 253 assert len(node_list) == nodes_created | 229 assert len(node_list) == nodes_created |
| 254 | 230 |
| 255 def __compute_epsilon_closures(self): | 231 def __compute_epsilon_closures(self): |
| 256 if self.__epsilon_closure_computed: | 232 if self.__epsilon_closure_computed: |
| 257 return | 233 return |
| 258 self.__epsilon_closure_computed = True | 234 self.__epsilon_closure_computed = True |
| 259 def outer(node, state): | 235 def outer(node, state): |
| 260 def inner(node, closure): | 236 def inner(node, closure): |
| 261 closure.add(node) | 237 closure.add(node) |
| 262 return closure | 238 return closure |
| 263 closure = Nfa.__visit_edges(node, False, True, inner, set()) | 239 next_edge = lambda node : node.get_epsilon_transitions() |
| 240 edge = next_edge(node) |
| 241 closure = Nfa.__visit_edges(edge, next_edge, inner, set()) |
| 264 node.set_epsilon_closure(closure) | 242 node.set_epsilon_closure(closure) |
| 265 self.__visit_all_edges(outer, None) | 243 self.__visit_all_edges(outer, None) |
| 266 | 244 |
| 267 @staticmethod | 245 @staticmethod |
| 268 def __close(states): | 246 def __close(states): |
| 247 closure = set(states) |
| 269 for node in states: | 248 for node in states: |
| 270 states = states.union(node.epsilon_closure()) | 249 closure |= node.epsilon_closure() |
| 271 return states | 250 return closure |
| 272 | 251 |
| 273 def matches(self, string): | 252 def matches(self, string): |
| 274 self.__compute_epsilon_closures() | 253 self.__compute_epsilon_closures() |
| 275 valid_states = Nfa.__close(set([self.__start])) | 254 valid_states = Nfa.__close(set([self.__start])) |
| 276 for c in string: | 255 for c in string: |
| 277 next_valid_states = set() | 256 next_valid_states = set() |
| 278 for valid_state in valid_states: | 257 for valid_state in valid_states: |
| 279 next_valid_states = next_valid_states.union(valid_state.matches(c)) | 258 next_valid_states |= valid_state.char_matches(c) |
| 280 valid_states = Nfa.__close(next_valid_states) | 259 valid_states = Nfa.__close(next_valid_states) |
| 281 if not valid_states: | 260 if not valid_states: |
| 282 return False | 261 return False |
| 283 return self.__end in valid_states | 262 return self.__end in valid_states |
| 284 | 263 |
| 285 @staticmethod | 264 @staticmethod |
| 286 def __to_dfa(nfa_state_set, dfa_builder): | 265 def __to_dfa(nfa_state_set, dfa_builder): |
| 287 nfa_state_set = Nfa.__close(nfa_state_set) | 266 nfa_state_set = Nfa.__close(nfa_state_set) |
| 288 name = str([x.node_number() for x in nfa_state_set]) | 267 name = str([x.node_number() for x in nfa_state_set]) |
| 289 (dfa_nodes, end_nodes, end_node) = dfa_builder | 268 (dfa_nodes, end_nodes, end_node) = dfa_builder |
| 290 if dfa_nodes.has_key(name): | 269 if dfa_nodes.has_key(name): |
| 291 return name | 270 return name |
| 292 dfa_node = {} | 271 dfa_node = {} |
| 293 dfa_nodes[name] = dfa_node | 272 dfa_nodes[name] = dfa_node |
| 294 for key in NfaState.gather_transition_keys(nfa_state_set): | 273 for key in NfaState.gather_transition_keys(nfa_state_set): |
| 295 reachable_set = set() | 274 reachable_set = set() |
| 296 for nfa_state in nfa_state_set: | 275 for nfa_state in nfa_state_set: |
| 297 reachable_set = reachable_set | nfa_state.matches(key) | 276 reachable_set |= nfa_state.key_matches(key) |
| 298 dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder) | 277 dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder) |
| 299 if end_node in nfa_state_set: | 278 if end_node in nfa_state_set: |
| 300 end_nodes.add(name) | 279 end_nodes.add(name) |
| 301 return name | 280 return name |
| 302 | 281 |
| 303 def compute_dfa(self): | 282 def compute_dfa(self): |
| 304 self.__compute_epsilon_closures() | 283 self.__compute_epsilon_closures() |
| 305 dfa_nodes = {} | 284 dfa_nodes = {} |
| 306 end_nodes = set() | 285 end_nodes = set() |
| 307 dfa_builder = (dfa_nodes, end_nodes, self.__end) | 286 dfa_builder = (dfa_nodes, end_nodes, self.__end) |
| 308 start_name = self.__to_dfa(set([self.__start]), dfa_builder) | 287 start_name = self.__to_dfa(set([self.__start]), dfa_builder) |
| 309 return (start_name, dfa_nodes, end_nodes) | 288 return (start_name, dfa_nodes, end_nodes) |
| 310 | 289 |
| 311 def to_dot(self): | 290 def to_dot(self): |
| 312 | 291 |
| 313 def f(node, node_content): | 292 def f(node, node_content): |
| 314 for key, values in node.transitions().items(): | 293 for key, values in node.transitions().items(): |
| 315 if key == node.epsilon_key(): key = "ε" | 294 if key == TransitionKey.epsilon(): |
| 316 if key != node.epsilon_key() and len(key) == 1: | 295 key = "ε" |
| 317 key = "'%s'" % key | |
| 318 for value in values: | 296 for value in values: |
| 319 node_content.append( | 297 node_content.append( |
| 320 " S_%d -> S_%d [ label = \"%s\" ];" % | 298 " S_%d -> S_%d [ label = \"%s\" ];" % |
| 321 (node.node_number(), value.node_number(), key)) | 299 (node.node_number(), value.node_number(), key)) |
| 322 return node_content | 300 return node_content |
| 323 | 301 |
| 324 node_content = self.__visit_all_edges(f, []) | 302 node_content = self.__visit_all_edges(f, []) |
| 325 | 303 |
| 326 return ''' | 304 return ''' |
| 327 digraph finite_state_machine { | 305 digraph finite_state_machine { |
| 328 rankdir=LR; | 306 rankdir=LR; |
| 329 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s | 307 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s |
| 330 node [shape = doublecircle, style=unfilled]; S_%s | 308 node [shape = doublecircle, style=unfilled]; S_%s |
| 331 node [shape = circle]; | 309 node [shape = circle]; |
| 332 %s | 310 %s |
| 333 } | 311 } |
| 334 ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_
content)) | 312 ''' % (self.__start.node_number(), |
| 335 | 313 self.__end.node_number(), |
| 314 "\n".join(node_content)) |
| OLD | NEW |