| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 | 31 |
| 32 class NfaBuilder(object): | 32 class NfaBuilder(object): |
| 33 | 33 |
| 34 def __init__(self, encoding, character_classes = {}): | 34 def __init__(self, encoding, character_classes = {}): |
| 35 self.__node_number = 0 | 35 self.__node_number = 0 |
| 36 self.__operation_map = {} | 36 self.__operation_map = {} |
| 37 self.__members = getmembers(self) | 37 self.__members = getmembers(self) |
| 38 self.__encoding = encoding | 38 self.__encoding = encoding |
| 39 self.__character_classes = character_classes | 39 self.__character_classes = character_classes |
| 40 self.__states = [] | 40 self.__states = [] |
| 41 self.__global_end_node = None |
| 41 | 42 |
| 42 def __new_state(self): | 43 def __new_state(self): |
| 43 self.__node_number += 1 | 44 self.__node_number += 1 |
| 44 return NfaState() | 45 return NfaState() |
| 45 | 46 |
| 47 def __global_end(self): |
| 48 if not self.__global_end_node: |
| 49 self.__global_end_node = self.__new_state() |
| 50 return self.__global_end_node |
| 51 |
| 46 def __or(self, graph): | 52 def __or(self, graph): |
| 47 start = self.__new_state() | 53 start = self.__new_state() |
| 48 ends = [] | 54 ends = [] |
| 49 for x in [self.__process(graph[1]), self.__process(graph[2])]: | 55 for x in [self.__process(graph[1]), self.__process(graph[2])]: |
| 50 start.add_epsilon_transition(x[0]) | 56 start.add_epsilon_transition(x[0]) |
| 51 ends += x[1] | 57 ends += x[1] |
| 52 start.close(None) | 58 start.close(None) |
| 53 return (start, ends) | 59 return (start, ends) |
| 54 | 60 |
| 55 def __one_or_more(self, graph): | 61 def __one_or_more(self, graph): |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 124 end = self.__new_state() | 130 end = self.__new_state() |
| 125 start.close(end) | 131 start.close(end) |
| 126 return (start, [end]) | 132 return (start, [end]) |
| 127 | 133 |
| 128 def __action(self, graph): | 134 def __action(self, graph): |
| 129 (start, ends) = self.__process(graph[1]) | 135 (start, ends) = self.__process(graph[1]) |
| 130 action = graph[2] | 136 action = graph[2] |
| 131 end = self.__new_state() | 137 end = self.__new_state() |
| 132 self.__patch_ends(ends, end) | 138 self.__patch_ends(ends, end) |
| 133 end.set_action(action) | 139 end.set_action(action) |
| 140 if action.match_action(): |
| 141 global_end = self.__global_end() |
| 142 end.add_epsilon_transition(global_end) |
| 134 return (start, [end]) | 143 return (start, [end]) |
| 135 | 144 |
| 136 def __continue(self, graph): | 145 def __continue(self, graph): |
| 137 (start, ends) = self.__process(graph[1]) | 146 (start, ends) = self.__process(graph[1]) |
| 138 state = self.__peek_state() | 147 state = self.__peek_state() |
| 139 if not state['start_node']: | 148 if not state['start_node']: |
| 140 state['start_node'] = self.__new_state() | 149 state['start_node'] = self.__new_state() |
| 141 self.__patch_ends(ends, state['start_node']) | 150 self.__patch_ends(ends, state['start_node']) |
| 142 return (start, []) | 151 return (start, []) |
| 143 | 152 |
| 144 def __catch_all(self, graph): | 153 def __catch_all(self, graph): |
| 145 return self.__key_state(TransitionKey.unique('catch_all')) | 154 return self.__key_state(TransitionKey.unique('catch_all')) |
| 146 | 155 |
| 147 def __join(self, graph): | 156 def __join(self, graph): |
| 148 (graph, name, subgraph) = graph[1:] | 157 (graph, name, subgraph) = graph[1:] |
| 149 subgraphs = self.__peek_state()['subgraphs'] | 158 subgraphs = self.__peek_state()['subgraphs'] |
| 150 if not name in subgraphs: | 159 if not name in subgraphs: |
| 151 subgraphs[name] = self.__nfa(subgraph) | 160 subgraphs[name] = self.__nfa(subgraph) |
| 152 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name] | 161 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name] |
| 153 (start, ends) = self.__process(graph) | 162 (start, ends) = self.__process(graph) |
| 154 self.__patch_ends(ends, subgraph_start) | 163 self.__patch_ends(ends, subgraph_start) |
| 155 end = self.__new_state() | 164 if subgraph_end: |
| 156 subgraph_end.add_epsilon_transition(end) | 165 end = self.__new_state() |
| 157 return (start, [end]) | 166 subgraph_end.add_epsilon_transition(end) |
| 167 return (start, [end]) |
| 168 else: |
| 169 return (start, []) |
| 158 | 170 |
| 159 def __process(self, graph): | 171 def __process(self, graph): |
| 160 assert type(graph) == TupleType | 172 assert type(graph) == TupleType |
| 161 method = "_NfaBuilder__" + graph[0].lower() | 173 method = "_NfaBuilder__" + graph[0].lower() |
| 162 if not method in self.__operation_map: | 174 if not method in self.__operation_map: |
| 163 matches = filter(lambda (name, func): name == method, self.__members) | 175 matches = filter(lambda (name, func): name == method, self.__members) |
| 164 assert len(matches) == 1 | 176 assert len(matches) == 1 |
| 165 self.__operation_map[method] = matches[0][1] | 177 self.__operation_map[method] = matches[0][1] |
| 166 return self.__operation_map[method](graph) | 178 return self.__operation_map[method](graph) |
| 167 | 179 |
| 168 def __patch_ends(self, ends, new_end): | 180 def __patch_ends(self, ends, new_end): |
| 169 for end in ends: | 181 for end in ends: |
| 170 end.close(new_end) | 182 end.close(new_end) |
| 171 | 183 |
| 172 def __push_state(self): | 184 def __push_state(self): |
| 173 self.__states.append({ | 185 self.__states.append({ |
| 174 'start_node' : None, | 186 'start_node' : None, |
| 175 'subgraphs' : {}, | 187 'subgraphs' : {}, |
| 176 'unpatched_ends' : [], | 188 'unpatched_ends' : [], |
| 177 }) | 189 }) |
| 178 | 190 |
| 179 def __pop_state(self): | 191 def __pop_state(self): |
| 180 return self.__states.pop() | 192 return self.__states.pop() |
| 181 | 193 |
| 182 def __peek_state(self): | 194 def __peek_state(self): |
| 183 return self.__states[len(self.__states) - 1] | 195 return self.__states[-1] |
| 184 | 196 |
| 185 def __nfa(self, graph): | 197 def __nfa(self, graph): |
| 186 start_node_number = self.__node_number | 198 start_node_number = self.__node_number |
| 187 self.__push_state() | 199 self.__push_state() |
| 188 (start, ends) = self.__process(graph) | 200 (start, ends) = self.__process(graph) |
| 189 state = self.__pop_state() | 201 state = self.__pop_state() |
| 190 if state['start_node']: | 202 if state['start_node']: |
| 191 state['start_node'].close(start) | 203 state['start_node'].close(start) |
| 192 start = state['start_node'] | 204 start = state['start_node'] |
| 193 for k, subgraph in state['subgraphs'].items(): | 205 for k, subgraph in state['subgraphs'].items(): |
| 194 subgraph[1].close(None) | 206 if subgraph[1]: |
| 195 end = self.__new_state() | 207 subgraph[1].close(None) |
| 196 if self.__states: | 208 |
| 197 self.__peek_state()['unpatched_ends'] += state['unpatched_ends'] | 209 # Don't create an end node for the subgraph if it would be unused (ends can |
| 198 else: | 210 # be an empty list e.g., in the case when everything inside the subgraph is |
| 199 self.__patch_ends(state['unpatched_ends'], end) | 211 # "continue"). |
| 200 self.__patch_ends(ends, end) | 212 end = None |
| 213 if ends: |
| 214 end = self.__new_state() |
| 215 self.__patch_ends(ends, end) |
| 216 |
| 201 return (start, end, self.__node_number - start_node_number) | 217 return (start, end, self.__node_number - start_node_number) |
| 202 | 218 |
| 203 @staticmethod | 219 @staticmethod |
| 204 def __compute_epsilon_closures(start_state): | 220 def __compute_epsilon_closures(start_state): |
| 205 def outer(node, state): | 221 def outer(node, state): |
| 206 def inner(node, closure): | 222 def inner(node, closure): |
| 207 closure.add(node) | 223 closure.add(node) |
| 208 return closure | 224 return closure |
| 209 is_epsilon = lambda k: k == TransitionKey.epsilon() | 225 is_epsilon = lambda k: k == TransitionKey.epsilon() |
| 210 state_iter = lambda node : node.state_iter(key_filter = is_epsilon) | 226 state_iter = lambda node : node.state_iter(key_filter = is_epsilon) |
| (...skipping 15 matching lines...) Expand all Loading... |
| 226 keys = reduce(f, reachable_states, set()) | 242 keys = reduce(f, reachable_states, set()) |
| 227 keys.discard(TransitionKey.epsilon()) | 243 keys.discard(TransitionKey.epsilon()) |
| 228 keys.discard(catch_all) | 244 keys.discard(catch_all) |
| 229 inverse_key = TransitionKey.inverse_key(encoding, keys) | 245 inverse_key = TransitionKey.inverse_key(encoding, keys) |
| 230 if inverse_key: | 246 if inverse_key: |
| 231 transitions[inverse_key] = transitions[catch_all] | 247 transitions[inverse_key] = transitions[catch_all] |
| 232 del transitions[catch_all] | 248 del transitions[catch_all] |
| 233 | 249 |
| 234 def nfa(self, graph): | 250 def nfa(self, graph): |
| 235 (start, end, nodes_created) = self.__nfa(graph) | 251 (start, end, nodes_created) = self.__nfa(graph) |
| 236 end.close(None) | 252 |
| 253 global_end_to_return = self.__global_end_node or end |
| 254 assert global_end_to_return |
| 255 if end and self.__global_end_node: |
| 256 end.close(self.__global_end_node) |
| 257 |
| 258 global_end_to_return.close(None) |
| 259 |
| 260 # FIXME: the same NfaBuilder should not be called twice, so this cleanup |
| 261 # should be unnecessary. |
| 262 self.__global_end_node = None |
| 263 |
| 237 self.__compute_epsilon_closures(start) | 264 self.__compute_epsilon_closures(start) |
| 238 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) | 265 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) |
| 239 Automaton.visit_states(set([start]), f) | 266 Automaton.visit_states(set([start]), f) |
| 240 return Nfa(self.__encoding, start, end, nodes_created) | 267 |
| 268 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) |
| 241 | 269 |
| 242 @staticmethod | 270 @staticmethod |
| 243 def rule_tree_dot(graph): | 271 def rule_tree_dot(graph): |
| 244 node_ix = [0] | 272 node_ix = [0] |
| 245 node_template = 'node [label="%s"]; N_%d;' | 273 node_template = 'node [label="%s"]; N_%d;' |
| 246 edge_template = 'N_%d -> N_%d' | 274 edge_template = 'N_%d -> N_%d' |
| 247 nodes = [] | 275 nodes = [] |
| 248 edges = [] | 276 edges = [] |
| 249 | 277 |
| 250 def escape(v): | 278 def escape(v): |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 304 | 332 |
| 305 __modifer_map = { | 333 __modifer_map = { |
| 306 '+': 'ONE_OR_MORE', | 334 '+': 'ONE_OR_MORE', |
| 307 '?': 'ZERO_OR_ONE', | 335 '?': 'ZERO_OR_ONE', |
| 308 '*': 'ZERO_OR_MORE', | 336 '*': 'ZERO_OR_MORE', |
| 309 } | 337 } |
| 310 | 338 |
| 311 @staticmethod | 339 @staticmethod |
| 312 def apply_modifier(modifier, graph): | 340 def apply_modifier(modifier, graph): |
| 313 return (NfaBuilder.__modifer_map[modifier], graph) | 341 return (NfaBuilder.__modifer_map[modifier], graph) |
| OLD | NEW |