| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 from transition_keys import TransitionKey | 29 from transition_keys import TransitionKey |
| 30 from inspect import getmembers | 30 from inspect import getmembers |
| 31 | 31 |
| 32 class NfaState: | 32 class NfaState: |
| 33 | 33 |
| 34 def __init__(self, node_number): | 34 def __init__(self, node_number): |
| 35 self.__transitions = {} | 35 self.__transitions = {} |
| 36 self.__unclosed = set() | 36 self.__unclosed = set() |
| 37 self.__node_number = node_number | 37 self.__node_number = node_number |
| 38 self.__epsilon_closure = None | 38 self.__epsilon_closure = None |
| 39 self.__transition_action = None |
| 39 | 40 |
| 40 def node_number(self): | 41 def node_number(self): |
| 41 return self.__node_number | 42 return self.__node_number |
| 42 | 43 |
| 43 def epsilon_closure(self): | 44 def epsilon_closure(self): |
| 44 return self.__epsilon_closure | 45 return self.__epsilon_closure |
| 45 | 46 |
| 46 def set_epsilon_closure(self, closure): | 47 def set_epsilon_closure(self, closure): |
| 47 assert self.is_closed() | 48 assert self.is_closed() |
| 48 assert self.__epsilon_closure == None | 49 assert self.__epsilon_closure == None |
| 49 self.__epsilon_closure = frozenset(closure) | 50 self.__epsilon_closure = frozenset(closure) |
| 50 | 51 |
| 52 def set_transition_action(self, action): |
| 53 assert not self.is_closed() |
| 54 assert self.__transition_action == None |
| 55 self.__transition_action = action |
| 56 |
| 51 def transitions(self): | 57 def transitions(self): |
| 52 assert self.is_closed() | 58 assert self.is_closed() |
| 53 return self.__transitions | 59 return self.__transitions |
| 54 | 60 |
| 55 def get_epsilon_transitions(self): | 61 def next_states(self, key_filter): |
| 56 key = TransitionKey.epsilon(); | 62 assert self.is_closed() |
| 57 if not key in self.__transitions: return frozenset() | 63 first = lambda v: [x[0] for x in v] |
| 58 return frozenset(self.__transitions[key]) | 64 f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(first(v)) |
| 65 return reduce(f, self.__transitions.items(), set()) |
| 59 | 66 |
| 60 def __add_transition(self, key, value): | 67 def __add_transition(self, key, action, next_state): |
| 61 if value == None: | 68 if next_state == None: |
| 69 assert not action |
| 62 assert not self.is_closed(), "already closed" | 70 assert not self.is_closed(), "already closed" |
| 63 self.__unclosed.add(key) | 71 self.__unclosed.add(key) |
| 64 return | 72 return |
| 65 if not key in self.__transitions: | 73 if not key in self.__transitions: |
| 66 self.__transitions[key] = set() | 74 self.__transitions[key] = set() |
| 67 self.__transitions[key].add(value) | 75 self.__transitions[key].add((next_state, action)) |
| 68 | 76 |
| 69 def add_unclosed_transition(self, key): | 77 def add_unclosed_transition(self, key): |
| 70 assert key != TransitionKey.epsilon() | 78 assert key != TransitionKey.epsilon() |
| 71 self.__add_transition(key, None) | 79 self.__add_transition(key, None, None) |
| 72 | 80 |
| 73 def add_epsilon_transition(self, state): | 81 def add_epsilon_transition(self, state): |
| 74 assert state != None | 82 assert state != None |
| 75 self.__add_transition(TransitionKey.epsilon(), state) | 83 self.__add_transition(TransitionKey.epsilon(), None, state) |
| 76 | 84 |
| 77 def is_closed(self): | 85 def is_closed(self): |
| 78 return self.__unclosed == None | 86 return self.__unclosed == None |
| 79 | 87 |
| 80 def close(self, end): | 88 def close(self, end): |
| 81 assert not self.is_closed() | 89 assert not self.is_closed() |
| 90 unclosed, self.__unclosed = self.__unclosed, None |
| 91 action, self.__transition_action = self.__transition_action, None |
| 82 if end == None: | 92 if end == None: |
| 83 assert not self.__unclosed | 93 assert not unclosed |
| 84 else: | 94 assert not action |
| 85 for key in self.__unclosed: | 95 return |
| 86 self.__add_transition(key, end) | 96 for key in unclosed: |
| 87 if not self.__unclosed: | 97 self.__add_transition(key, action, end) |
| 88 self.add_epsilon_transition(end) | 98 if not unclosed: |
| 89 self.__unclosed = None | 99 self.__add_transition(TransitionKey.epsilon(), action, end) |
| 90 | 100 |
| 91 def __matches(self, match_func, value): | 101 def __matches(self, match_func, value): |
| 92 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc | 102 f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc |
| 93 return reduce(f, self.__transitions.items(), set()) | 103 return reduce(f, self.__transitions.items(), set()) |
| 94 | 104 |
| 95 def char_matches(self, value): | 105 def char_matches(self, value): |
| 96 return self.__matches(lambda k, v : k.matches_char(v), value) | 106 return self.__matches(lambda k, v : k.matches_char(v), value) |
| 97 | 107 |
| 98 def key_matches(self, value): | 108 def key_matches(self, value): |
| 99 return self.__matches(lambda k, v : k.matches_key(v), value) | 109 return self.__matches(lambda k, v : k.matches_key(v), value) |
| 100 | 110 |
| 101 @staticmethod | 111 @staticmethod |
| 102 def gather_transition_keys(state_set): | 112 def gather_transition_keys(state_set): |
| 103 f = lambda acc, state: acc | set(state.__transitions.keys()) | 113 f = lambda acc, state: acc | set(state.__transitions.keys()) |
| 104 return TransitionKey.disjoint_keys(reduce(f, state_set, set())) | 114 return TransitionKey.disjoint_keys(reduce(f, state_set, set())) |
| 105 | 115 |
| 106 class NfaBuilder: | 116 class NfaBuilder: |
| 107 | 117 |
| 108 def __init__(self): | 118 def __init__(self): |
| 119 self.__node_number = 0 |
| 109 self.__operation_map = {} | 120 self.__operation_map = {} |
| 110 self.__members = getmembers(self) | 121 self.__members = getmembers(self) |
| 111 | 122 |
| 112 def __new_state(self): | 123 def __new_state(self): |
| 113 node_number = self.node_number | 124 self.__node_number += 1 |
| 114 self.node_number = self.node_number + 1 | 125 return NfaState(self.__node_number - 1) |
| 115 return NfaState(node_number) | |
| 116 | 126 |
| 117 def __or(self, graph): | 127 def __or(self, graph): |
| 118 start = self.__new_state() | 128 start = self.__new_state() |
| 119 ends = [] | 129 ends = [] |
| 120 for x in [self.__process(graph[1]), self.__process(graph[2])]: | 130 for x in [self.__process(graph[1]), self.__process(graph[2])]: |
| 121 start.add_epsilon_transition(x[0]) | 131 start.add_epsilon_transition(x[0]) |
| 122 ends += x[1] | 132 ends += x[1] |
| 123 start.close(None) | 133 start.close(None) |
| 124 return (start, ends) | 134 return (start, ends) |
| 125 | 135 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 158 | 168 |
| 159 def __class(self, graph): | 169 def __class(self, graph): |
| 160 return self.__key_state(TransitionKey.character_class(False, graph[1])) | 170 return self.__key_state(TransitionKey.character_class(False, graph[1])) |
| 161 | 171 |
| 162 def __not_class(self, graph): | 172 def __not_class(self, graph): |
| 163 return self.__key_state(TransitionKey.character_class(True, graph[1])) | 173 return self.__key_state(TransitionKey.character_class(True, graph[1])) |
| 164 | 174 |
| 165 def __any(self, graph): | 175 def __any(self, graph): |
| 166 return self.__key_state(TransitionKey.any()) | 176 return self.__key_state(TransitionKey.any()) |
| 167 | 177 |
| 178 def __action(self, graph): |
| 179 result = self.__process(graph[1]) |
| 180 for end in result[1]: |
| 181 end.set_transition_action(graph[2]) |
| 182 return result |
| 183 |
| 168 def __process(self, graph): | 184 def __process(self, graph): |
| 169 assert type(graph) == TupleType | 185 assert type(graph) == TupleType |
| 170 method = "_NfaBuilder__" + graph[0].lower() | 186 method = "_NfaBuilder__" + graph[0].lower() |
| 171 if not method in self.__operation_map: | 187 if not method in self.__operation_map: |
| 172 matches = filter(lambda (name, func): name == method, self.__members) | 188 matches = filter(lambda (name, func): name == method, self.__members) |
| 173 assert len(matches) == 1 | 189 assert len(matches) == 1 |
| 174 self.__operation_map[method] = matches[0][1] | 190 self.__operation_map[method] = matches[0][1] |
| 175 return self.__operation_map[method](graph) | 191 return self.__operation_map[method](graph) |
| 176 | 192 |
| 177 def __patch_ends(self, ends, new_end): | 193 def __patch_ends(self, ends, new_end): |
| 178 for end in ends: | 194 for end in ends: |
| 179 end.close(new_end) | 195 end.close(new_end) |
| 180 | 196 |
| 181 def nfa(self, graph): | 197 def nfa(self, graph): |
| 182 self.node_number = 0 | 198 start_node_number = self.__node_number |
| 183 (start, ends) = self.__process(graph) | 199 (start, ends) = self.__process(graph) |
| 184 end = self.__new_state() | 200 end = self.__new_state() |
| 185 self.__patch_ends(ends, end) | 201 self.__patch_ends(ends, end) |
| 186 end.close(None) | 202 end.close(None) |
| 187 return Nfa(start, end, self.node_number) | 203 return Nfa(start, end, self.__node_number - start_node_number) |
| 204 |
| 205 @staticmethod |
| 206 def add_action(graph, action): |
| 207 return ('ACTION', graph, action) |
| 208 |
| 209 @staticmethod |
| 210 def or_graphs(graphs): |
| 211 return reduce(lambda acc, g: ('OR', acc, g), graphs) |
| 212 |
| 213 @staticmethod |
| 214 def cat_graphs(graphs): |
| 215 return reduce(lambda acc, g: ('CAT', acc, g), graphs) |
| 188 | 216 |
| 189 class Nfa: | 217 class Nfa: |
| 190 | 218 |
| 191 def __init__(self, start, end, nodes_created): | 219 def __init__(self, start, end, nodes_created): |
| 192 self.__start = start | 220 self.__start = start |
| 193 self.__end = end | 221 self.__end = end |
| 194 self.__epsilon_closure_computed = False | 222 self.__epsilon_closure_computed = False |
| 195 self.__verify(nodes_created) | 223 self.__verify(nodes_created) |
| 196 | 224 |
| 197 @staticmethod | 225 @staticmethod |
| 198 def __visit_edges(edge, compute_next_edge, visitor, state): | 226 def __visit_edges(edge, compute_next_edge, visitor, state): |
| 199 visited = set() | 227 visited = set() |
| 200 while edge: | 228 while edge: |
| 201 f = lambda (next_edge, state), node: ( | 229 f = lambda (next_edge, state), node: ( |
| 202 next_edge | compute_next_edge(node), | 230 next_edge | compute_next_edge(node), |
| 203 visitor(node, state)) | 231 visitor(node, state)) |
| 204 (next_edge, state) = reduce(f, edge, (set(), state)) | 232 (next_edge, state) = reduce(f, edge, (set(), state)) |
| 205 visited |= edge | 233 visited |= edge |
| 206 edge = next_edge - visited | 234 edge = next_edge - visited |
| 207 return state | 235 return state |
| 208 | 236 |
| 209 def __visit_all_edges(self, visitor, state): | 237 def __visit_all_edges(self, visitor, state): |
| 210 edge = set([self.__start]) | 238 edge = set([self.__start]) |
| 211 def next_edge(node): | 239 next_edge = lambda node: node.next_states(lambda x : True) |
| 212 f = lambda acc, values: acc | values | |
| 213 return reduce(f, node.transitions().values(), set()) | |
| 214 return Nfa.__visit_edges(edge, next_edge, visitor, state) | 240 return Nfa.__visit_edges(edge, next_edge, visitor, state) |
| 215 | 241 |
| 216 def __verify(self, nodes_created): | 242 def __verify(self, nodes_created): |
| 217 def f(node, node_list): | 243 def f(node, node_list): |
| 218 assert node.is_closed() | 244 assert node.is_closed() |
| 219 node_list.append(node) | 245 node_list.append(node) |
| 220 return node_list | 246 return node_list |
| 221 node_list = self.__visit_all_edges(f, []) | 247 node_list = self.__visit_all_edges(f, []) |
| 222 assert len(node_list) == nodes_created | 248 assert len(node_list) == nodes_created |
| 223 | 249 |
| 224 def __compute_epsilon_closures(self): | 250 def __compute_epsilon_closures(self): |
| 225 if self.__epsilon_closure_computed: | 251 if self.__epsilon_closure_computed: |
| 226 return | 252 return |
| 227 self.__epsilon_closure_computed = True | 253 self.__epsilon_closure_computed = True |
| 228 def outer(node, state): | 254 def outer(node, state): |
| 229 def inner(node, closure): | 255 def inner(node, closure): |
| 230 closure.add(node) | 256 closure.add(node) |
| 231 return closure | 257 return closure |
| 232 next_edge = lambda node : node.get_epsilon_transitions() | 258 is_epsilon = lambda k: k == TransitionKey.epsilon() |
| 259 next_edge = lambda node : node.next_states(is_epsilon) |
| 233 edge = next_edge(node) | 260 edge = next_edge(node) |
| 234 closure = Nfa.__visit_edges(edge, next_edge, inner, set()) | 261 closure = Nfa.__visit_edges(edge, next_edge, inner, set()) |
| 235 node.set_epsilon_closure(closure) | 262 node.set_epsilon_closure(closure) |
| 236 self.__visit_all_edges(outer, None) | 263 self.__visit_all_edges(outer, None) |
| 237 | 264 |
| 238 @staticmethod | 265 @staticmethod |
| 239 def __close(states): | 266 def __close(states): |
| 240 f = lambda acc, node: acc | node.epsilon_closure() | 267 f = lambda acc, node: acc | node.epsilon_closure() |
| 241 return reduce(f, states, set(states)) | 268 return reduce(f, states, set(states)) |
| 242 | 269 |
| 243 def matches(self, string): | 270 def matches(self, string): |
| 244 self.__compute_epsilon_closures() | 271 self.__compute_epsilon_closures() |
| 245 valid_states = Nfa.__close(set([self.__start])) | 272 valid_states = Nfa.__close(set([self.__start])) |
| 246 for c in string: | 273 for c in string: |
| 247 f = lambda acc, state: acc | state.char_matches(c) | 274 f = lambda acc, state: acc | state.char_matches(c) |
| 248 valid_states = Nfa.__close(reduce(f, valid_states, set())) | 275 transitions = reduce(f, valid_states, set()) |
| 249 if not valid_states: | 276 if not transitions: |
| 250 return False | 277 return False |
| 278 valid_states = Nfa.__close(set([x[0] for x in transitions])) |
| 251 return self.__end in valid_states | 279 return self.__end in valid_states |
| 252 | 280 |
| 253 @staticmethod | 281 @staticmethod |
| 254 def __to_dfa(nfa_state_set, builder): | 282 def __to_dfa(nfa_state_set, dfa_nodes, end_node): |
| 255 nfa_state_set = Nfa.__close(nfa_state_set) | 283 nfa_state_set = Nfa.__close(nfa_state_set) |
| 256 assert nfa_state_set | 284 assert nfa_state_set |
| 257 name = str([x.node_number() for x in nfa_state_set]) | 285 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set)) |
| 258 (dfa_nodes, end_nodes, end_node) = builder | |
| 259 if name in dfa_nodes: | 286 if name in dfa_nodes: |
| 260 return name | 287 return name |
| 261 dfa_node = {} | 288 dfa_nodes[name] = { |
| 262 dfa_nodes[name] = dfa_node | 289 'transitions': {}, |
| 290 'terminal': end_node in nfa_state_set} |
| 263 for key in NfaState.gather_transition_keys(nfa_state_set): | 291 for key in NfaState.gather_transition_keys(nfa_state_set): |
| 264 f = lambda acc, state: acc | state.key_matches(key) | 292 f = lambda acc, state: acc | state.key_matches(key) |
| 265 dfa_node[key] = Nfa.__to_dfa(reduce(f, nfa_state_set, set()), builder) | 293 transitions = reduce(f, nfa_state_set, set()) |
| 266 if end_node in nfa_state_set: | 294 match_states = set() |
| 267 end_nodes.add(name) | 295 actions = set() |
| 296 for (state, action) in transitions: |
| 297 match_states.add(state) |
| 298 if action: |
| 299 actions.add(action) |
| 300 assert len(match_states) == len(transitions) |
| 301 assert not actions or len(actions) == 1 |
| 302 action = iter(actions).next() if actions else None |
| 303 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node) |
| 304 dfa_nodes[name]['transitions'][key] = (transition_state, action) |
| 268 return name | 305 return name |
| 269 | 306 |
| 270 def compute_dfa(self): | 307 def compute_dfa(self): |
| 271 self.__compute_epsilon_closures() | 308 self.__compute_epsilon_closures() |
| 272 dfa_nodes = {} | 309 dfa_nodes = {} |
| 273 end_nodes = set() | 310 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end) |
| 274 dfa_builder = (dfa_nodes, end_nodes, self.__end) | 311 return (start_name, dfa_nodes) |
| 275 start_name = self.__to_dfa(set([self.__start]), dfa_builder) | |
| 276 return (start_name, dfa_nodes, end_nodes) | |
| 277 | 312 |
| 278 def to_dot(self): | 313 def to_dot(self): |
| 279 | 314 |
| 280 def f(node, node_content): | 315 def f(node, node_content): |
| 281 for key, values in node.transitions().items(): | 316 for key, values in node.transitions().items(): |
| 282 if key == TransitionKey.epsilon(): | 317 if key == TransitionKey.epsilon(): |
| 283 key = "ε" | 318 key = "ε" |
| 284 for value in values: | 319 for value in values: |
| 285 node_content.append( | 320 node_content.append( |
| 286 " S_%d -> S_%d [ label = \"%s\" ];" % | 321 " S_%d -> S_%d [ label = \"%s\" ];" % |
| 287 (node.node_number(), value.node_number(), key)) | 322 (node.node_number(), value[0].node_number(), key)) |
| 288 return node_content | 323 return node_content |
| 289 | 324 |
| 290 node_content = self.__visit_all_edges(f, []) | 325 node_content = self.__visit_all_edges(f, []) |
| 291 | 326 |
| 292 return ''' | 327 return ''' |
| 293 digraph finite_state_machine { | 328 digraph finite_state_machine { |
| 294 rankdir=LR; | 329 rankdir=LR; |
| 295 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s | 330 node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s |
| 296 node [shape = doublecircle, style=unfilled]; S_%s | 331 node [shape = doublecircle, style=unfilled]; S_%s |
| 297 node [shape = circle]; | 332 node [shape = circle]; |
| 298 %s | 333 %s |
| 299 } | 334 } |
| 300 ''' % (self.__start.node_number(), | 335 ''' % (self.__start.node_number(), |
| 301 self.__end.node_number(), | 336 self.__end.node_number(), |
| 302 "\n".join(node_content)) | 337 "\n".join(node_content)) |
| OLD | NEW |