| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 28 matching lines...) Expand all Loading... |
| 39 # of NfaBuilder called __x(). See __process() for mapping from | 39 # of NfaBuilder called __x(). See __process() for mapping from |
| 40 # operation to processing methods. | 40 # operation to processing methods. |
| 41 | 41 |
| 42 class NfaBuilder(object): | 42 class NfaBuilder(object): |
| 43 | 43 |
| 44 def __init__(self, encoding, character_classes, subtree_map): | 44 def __init__(self, encoding, character_classes, subtree_map): |
| 45 self.__node_number = 0 | 45 self.__node_number = 0 |
| 46 self.__encoding = encoding | 46 self.__encoding = encoding |
| 47 self.__character_classes = character_classes | 47 self.__character_classes = character_classes |
| 48 self.__states = [] | 48 self.__states = [] |
| 49 self.__global_end_node = None | 49 self.__global_end = self.__new_state() |
| 50 self.__operation_map = None | 50 self.__operation_map = None |
| 51 self.__subtree_map = subtree_map | 51 self.__subtree_map = subtree_map |
| 52 | 52 |
| 53 def __new_state(self): | 53 def __new_state(self): |
| 54 self.__node_number += 1 | 54 self.__node_number += 1 |
| 55 return NfaState() | 55 return NfaState() |
| 56 | 56 |
| 57 def __global_end(self): | |
| 58 if not self.__global_end_node: | |
| 59 self.__global_end_node = self.__new_state() | |
| 60 return self.__global_end_node | |
| 61 | |
| 62 def __or(self, *trees): | 57 def __or(self, *trees): |
| 63 start = self.__new_state() | 58 start = self.__new_state() |
| 64 ends = [] | 59 ends = [] |
| 65 for tree in trees: | 60 for tree in trees: |
| 66 (sub_start, sub_end) = self.__process(tree) | 61 (sub_start, sub_end) = self.__process(tree) |
| 67 start.add_epsilon_transition(sub_start) | 62 start.add_epsilon_transition(sub_start) |
| 68 ends += sub_end | 63 ends += sub_end |
| 69 start.close(None) | 64 start.close(None) |
| 70 return (start, ends) | 65 return (start, ends) |
| 71 | 66 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 82 start.add_epsilon_transition(node) | 77 start.add_epsilon_transition(node) |
| 83 self.__patch_ends(ends, start) | 78 self.__patch_ends(ends, start) |
| 84 return (start, [start]) | 79 return (start, [start]) |
| 85 | 80 |
| 86 def __zero_or_one(self, subtree): | 81 def __zero_or_one(self, subtree): |
| 87 (node, ends) = self.__process(subtree) | 82 (node, ends) = self.__process(subtree) |
| 88 start = self.__new_state() | 83 start = self.__new_state() |
| 89 start.add_epsilon_transition(node) | 84 start.add_epsilon_transition(node) |
| 90 return (start, ends + [start]) | 85 return (start, ends + [start]) |
| 91 | 86 |
| 92 def __repeat(self, param_min, param_max, subtree): | 87 def __repeat(self, subtree, param_min, param_max): |
| 93 'process regex of form subtree{param_min, param_max}' | 88 'process regex of form subtree{param_min, param_max}' |
| 94 (param_min, param_max) = (int(param_min), int(param_max)) | 89 (param_min, param_max) = (int(param_min), int(param_max)) |
| 95 assert param_min > 1 and param_min <= param_max | 90 assert param_min > 1 and param_min <= param_max |
| 96 (start, ends) = self.__process(subtree) | 91 (start, ends) = self.__process(subtree) |
| 97 # create a chain of param_min subtrees | 92 # create a chain of param_min subtrees |
| 98 for i in xrange(1, param_min): | 93 for i in xrange(1, param_min): |
| 99 (start2, ends2) = self.__process(subtree) | 94 (start2, ends2) = self.__process(subtree) |
| 100 self.__patch_ends(ends, start2) | 95 self.__patch_ends(ends, start2) |
| 101 ends = ends2 | 96 ends = ends2 |
| 102 if param_min == param_max: | 97 if param_min == param_max: |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 return self.__key_state(TransitionKey.character_class( | 135 return self.__key_state(TransitionKey.character_class( |
| 141 self.__encoding, Term('CLASS', subtree), self.__character_classes)) | 136 self.__encoding, Term('CLASS', subtree), self.__character_classes)) |
| 142 | 137 |
| 143 def __not_class(self, subtree): | 138 def __not_class(self, subtree): |
| 144 return self.__key_state(TransitionKey.character_class( | 139 return self.__key_state(TransitionKey.character_class( |
| 145 self.__encoding, Term('NOT_CLASS', subtree), self.__character_classes)) | 140 self.__encoding, Term('NOT_CLASS', subtree), self.__character_classes)) |
| 146 | 141 |
| 147 def __any(self): | 142 def __any(self): |
| 148 return self.__key_state(TransitionKey.any(self.__encoding)) | 143 return self.__key_state(TransitionKey.any(self.__encoding)) |
| 149 | 144 |
| 150 def __action(self, subtree, action_term): | 145 def __unique_key(self, name): |
| 146 return self.__key_state(TransitionKey.unique(name)) |
| 147 |
| 148 def __entry_action(self, action, precedence, subtree): |
| 151 (start, ends) = self.__process(subtree) | 149 (start, ends) = self.__process(subtree) |
| 152 action = Action.from_term(action_term) | 150 node = self.__new_state() |
| 153 end = self.__new_state() | 151 node.set_action(Action(action, precedence)) |
| 154 self.__patch_ends(ends, end) | 152 self.__patch_ends(ends, node) |
| 155 end.set_action(action) | 153 return (start, [node]) |
| 154 |
| 155 def __match_action(self, action, precedence, subtree): |
| 156 (start, ends) = self.__process(subtree) |
| 157 node = self.__new_state() |
| 158 node.set_action(Action(action, precedence)) |
| 156 # Force all match actions to be terminal. | 159 # Force all match actions to be terminal. |
| 157 if action.match_action(): | 160 node.close(self.__global_end) |
| 158 global_end = self.__global_end() | 161 # patch via a match edge into existing graph |
| 159 end.add_epsilon_transition(global_end) | 162 match_node = self.__new_state() |
| 160 return (start, [end]) | 163 match_node.add_transition(TransitionKey.omega(), node) |
| 164 self.__patch_ends(ends, match_node) |
| 165 return (start, [match_node]) |
| 161 | 166 |
| 162 def __continue(self, subtree, depth): | 167 def __continue(self, subtree, depth): |
| 163 'add an epsilon transitions to the start node of the current subtree' | 168 'add an epsilon transitions to the start node of the current subtree' |
| 164 (start, ends) = self.__process(subtree) | 169 (start, ends) = self.__process(subtree) |
| 165 index = -1 - min(int(depth), len(self.__states) - 1) | 170 index = -1 - min(int(depth), len(self.__states) - 1) |
| 166 state = self.__states[index] | 171 state = self.__states[index] |
| 167 if not state['start_node']: | 172 if not state['start_node']: |
| 168 state['start_node'] = self.__new_state() | 173 state['start_node'] = self.__new_state() |
| 169 self.__patch_ends(ends, state['start_node']) | 174 self.__patch_ends(ends, state['start_node']) |
| 170 return (start, []) | 175 return (start, []) |
| 171 | 176 |
| 172 def __unique_key(self, name): | |
| 173 return self.__key_state(TransitionKey.unique(name)) | |
| 174 | |
| 175 def __join(self, tree, name): | 177 def __join(self, tree, name): |
| 176 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name) | 178 (start, ends) = self.__subgraph(name) |
| 177 if tree: | 179 (new_start, new_ends) = self.__process(tree) |
| 178 (start, ends) = self.__process(tree) | 180 self.__patch_ends(new_ends, start) |
| 179 self.__patch_ends(ends, subtree_start) | 181 return (new_start, ends) |
| 180 else: | |
| 181 start = subtree_start | |
| 182 if subtree_end: | |
| 183 return (start, [subtree_end]) | |
| 184 else: | |
| 185 return (start, []) | |
| 186 | 182 |
| 187 def __get_method(self, name): | 183 def __get_method(self, name): |
| 188 'lazily build map of all private bound methods and return the one requested' | 184 'lazily build map of all private bound methods and return the one requested' |
| 189 if not self.__operation_map: | 185 if not self.__operation_map: |
| 190 prefix = "_NfaBuilder__" | 186 prefix = "_NfaBuilder__" |
| 191 self.__operation_map = {name[len(prefix):] : f for (name, f) in | 187 self.__operation_map = {name[len(prefix):] : f for (name, f) in |
| 192 filter(lambda (name, f): name.startswith(prefix), getmembers(self))} | 188 filter(lambda (name, f): name.startswith(prefix), getmembers(self))} |
| 193 return self.__operation_map[name] | 189 return self.__operation_map[name] |
| 194 | 190 |
| 195 def __process(self, term): | 191 def __process(self, term): |
| 196 assert isinstance(term, Term) | 192 assert isinstance(term, Term) |
| 197 method = self.__get_method(term.name().lower()) | 193 method = self.__get_method(term.name().lower()) |
| 198 return method(*term.args()) | 194 return method(*term.args()) |
| 199 | 195 |
| 200 def __patch_ends(self, ends, new_end): | 196 def __patch_ends(self, ends, new_end): |
| 201 for end in ends: | 197 for end in ends: |
| 202 end.close(new_end) | 198 end.close(new_end) |
| 203 | 199 |
| 204 def __push_state(self, name): | 200 def __push_state(self, name): |
| 205 for state in self.__states: | 201 for state in self.__states: |
| 206 assert state['name'] != name, 'recursive subgraph' | 202 assert state['name'] != name, 'recursive subgraph' |
| 207 self.__states.append({ | 203 self.__states.append({ |
| 208 'start_node' : None, | 204 'start_node' : None, |
| 209 'name' : name | 205 'name' : name |
| 210 }) | 206 }) |
| 211 | 207 |
| 212 def __pop_state(self): | 208 def __subgraph(self, name): |
| 213 return self.__states.pop() | |
| 214 | |
| 215 def __nfa(self, name): | |
| 216 tree = self.__subtree_map[name] | 209 tree = self.__subtree_map[name] |
| 217 start_node_number = self.__node_number | |
| 218 self.__push_state(name) | 210 self.__push_state(name) |
| 219 (start, ends) = self.__process(tree) | 211 (start, ends) = self.__process(tree) |
| 220 state = self.__pop_state() | 212 state = self.__states.pop() |
| 221 # move saved start node into start | 213 # move saved start node into start |
| 222 if state['start_node']: | 214 if state['start_node']: |
| 223 state['start_node'].close(start) | 215 state['start_node'].close(start) |
| 224 start = state['start_node'] | 216 start = state['start_node'] |
| 225 # Don't create an end node for the subgraph if it would be unused (ends can | 217 return (start, ends) |
| 226 # be an empty list e.g., in the case when everything inside the subgraph is | |
| 227 # "continue"). | |
| 228 end = None | |
| 229 if ends: | |
| 230 end = self.__new_state() | |
| 231 self.__patch_ends(ends, end) | |
| 232 return (start, end, self.__node_number - start_node_number) | |
| 233 | 218 |
| 234 @staticmethod | 219 @staticmethod |
| 235 def __compute_epsilon_closures(start_state): | 220 def add_action(tree, entry_action, match_action, precedence): |
| 236 def outer(node, state): | 221 if entry_action: |
| 237 def inner(node, closure): | 222 tree = Term('ENTRY_ACTION', entry_action, precedence, tree) |
| 238 closure.add(node) | 223 if match_action: |
| 239 return closure | 224 tree = Term('MATCH_ACTION', match_action, precedence, tree) |
| 240 is_epsilon = lambda k: k == TransitionKey.epsilon() | 225 return tree |
| 241 state_iter = lambda node : node.state_iter(key_filter = is_epsilon) | |
| 242 edge = set(state_iter(node)) | |
| 243 closure = Automaton.visit_states( | |
| 244 edge, inner, state_iter=state_iter, visit_state=set()) | |
| 245 node.set_epsilon_closure(closure) | |
| 246 Automaton.visit_states(set([start_state]), outer) | |
| 247 | |
| 248 @staticmethod | |
| 249 def __replace_catch_all(encoding, state): | |
| 250 catch_all = TransitionKey.unique('catch_all') | |
| 251 states = list(state.state_iter_for_key(catch_all)) | |
| 252 if not states: | |
| 253 return | |
| 254 f = lambda acc, state: acc | set(state.epsilon_closure_iter()) | |
| 255 reachable_states = reduce(f, states, set()) | |
| 256 f = lambda acc, state: acc | set(state.key_iter()) | |
| 257 keys = reduce(f, reachable_states, set()) | |
| 258 keys.discard(TransitionKey.epsilon()) | |
| 259 keys.discard(catch_all) | |
| 260 keys.discard(TransitionKey.unique('eos')) | |
| 261 inverse_key = TransitionKey.inverse_key(encoding, keys) | |
| 262 if not inverse_key: | |
| 263 inverse_key = TransitionKey.unique('no_match') | |
| 264 state.swap_key(catch_all, inverse_key) | |
| 265 | |
| 266 @staticmethod | |
| 267 def nfa(encoding, character_classes, subtree_map, name): | |
| 268 self = NfaBuilder(encoding, character_classes, subtree_map) | |
| 269 (start, end, nodes_created) = self.__nfa(name) | |
| 270 # close all ends | |
| 271 global_end_to_return = self.__global_end_node or end | |
| 272 assert global_end_to_return, "no terminal nodes, default tree continues" | |
| 273 if end and self.__global_end_node: | |
| 274 end.close(self.__global_end_node) | |
| 275 global_end_to_return.close(None) | |
| 276 # Prepare the nfa | |
| 277 self.__compute_epsilon_closures(start) | |
| 278 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) | |
| 279 Automaton.visit_states(set([start]), f) | |
| 280 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) | |
| 281 | |
| 282 @staticmethod | |
| 283 def add_action(term, action): | |
| 284 return Term('ACTION', term, action.to_term()) | |
| 285 | 226 |
| 286 @staticmethod | 227 @staticmethod |
| 287 def add_continue(tree, depth): | 228 def add_continue(tree, depth): |
| 288 return Term('CONTINUE', tree, depth) | 229 return Term('CONTINUE', tree, depth) |
| 289 | 230 |
| 290 @staticmethod | 231 @staticmethod |
| 291 def unique_key(name): | 232 def unique_key(name): |
| 292 return Term('UNIQUE_KEY', name) | 233 return Term('UNIQUE_KEY', name) |
| 293 | 234 |
| 294 @staticmethod | 235 @staticmethod |
| 295 def join_subtree(tree, subtree_name): | 236 def join_subtree(tree, subtree_name): |
| 237 if not tree: |
| 238 return Term('SUBGRAPH', subtree_name) |
| 296 return Term('JOIN', tree, subtree_name) | 239 return Term('JOIN', tree, subtree_name) |
| 297 | 240 |
| 298 __modifer_map = { | 241 __modifer_map = { |
| 299 '+': 'ONE_OR_MORE', | 242 '+': 'ONE_OR_MORE', |
| 300 '?': 'ZERO_OR_ONE', | 243 '?': 'ZERO_OR_ONE', |
| 301 '*': 'ZERO_OR_MORE', | 244 '*': 'ZERO_OR_MORE', |
| 302 } | 245 } |
| 303 | 246 |
| 304 @staticmethod | 247 @staticmethod |
| 305 def apply_modifier(modifier, term): | 248 def apply_modifier(modifier, term): |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 338 terms = list(NfaBuilder.__flatten_terms(terms, 'OR')) | 281 terms = list(NfaBuilder.__flatten_terms(terms, 'OR')) |
| 339 assert terms | 282 assert terms |
| 340 return terms[0] if len(terms) == 1 else Term('OR', *terms) | 283 return terms[0] if len(terms) == 1 else Term('OR', *terms) |
| 341 | 284 |
| 342 @staticmethod | 285 @staticmethod |
| 343 def cat_terms(terms): | 286 def cat_terms(terms): |
| 344 terms = NfaBuilder.__flatten_terms(terms, 'CAT') | 287 terms = NfaBuilder.__flatten_terms(terms, 'CAT') |
| 345 terms = list(NfaBuilder.__flatten_literals(terms)) | 288 terms = list(NfaBuilder.__flatten_literals(terms)) |
| 346 assert terms | 289 assert terms |
| 347 return terms[0] if len(terms) == 1 else Term('CAT', *terms) | 290 return terms[0] if len(terms) == 1 else Term('CAT', *terms) |
| 291 |
| 292 @staticmethod |
| 293 def nfa(encoding, character_classes, subtree_map, name): |
| 294 self = NfaBuilder(encoding, character_classes, subtree_map) |
| 295 (start, ends) = self.__subgraph(name) |
| 296 # close all ends |
| 297 end = self.__global_end |
| 298 end.close(None) |
| 299 # TODO(dcarney): patch in default action |
| 300 self.__patch_ends(ends, end) |
| 301 # Prepare the nfa |
| 302 self.__compute_epsilon_closures(start) |
| 303 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) |
| 304 Automaton.visit_states(set([start]), f) |
| 305 return Nfa(self.__encoding, start, end, self.__node_number) |
| 306 |
| 307 @staticmethod |
| 308 def __compute_epsilon_closures(start_state): |
| 309 def outer(node, state): |
| 310 def inner(node, closure): |
| 311 closure.add(node) |
| 312 return closure |
| 313 is_epsilon = lambda k: k == TransitionKey.epsilon() |
| 314 state_iter = lambda node : node.state_iter(key_filter = is_epsilon) |
| 315 edge = set(state_iter(node)) |
| 316 closure = Automaton.visit_states( |
| 317 edge, inner, state_iter=state_iter, visit_state=set()) |
| 318 node.set_epsilon_closure(closure) |
| 319 Automaton.visit_states(set([start_state]), outer) |
| 320 |
| 321 @staticmethod |
| 322 def __replace_catch_all(encoding, state): |
| 323 catch_all = TransitionKey.unique('catch_all') |
| 324 states = list(state.state_iter_for_key(catch_all)) |
| 325 if not states: |
| 326 return |
| 327 f = lambda acc, state: acc | set(state.epsilon_closure_iter()) |
| 328 reachable_states = reduce(f, states, set()) |
| 329 f = lambda acc, state: acc | set(state.key_iter()) |
| 330 keys = reduce(f, reachable_states, set()) |
| 331 keys.discard(TransitionKey.epsilon()) |
| 332 keys.discard(catch_all) |
| 333 keys.discard(TransitionKey.unique('eos')) |
| 334 inverse_key = TransitionKey.inverse_key(encoding, keys) |
| 335 if not inverse_key: |
| 336 inverse_key = TransitionKey.unique('no_match') |
| 337 state.swap_key(catch_all, inverse_key) |
| 338 |
| 339 @staticmethod |
| 340 def __install_omega_transitions(start_state, default_action): |
| 341 '''install a match transition, a backtrack transition, |
| 342 or a default transition into all nodes''' |
| 343 pass |
| OLD | NEW |