| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 # 1. Build a tree of operations on rules. | 34 # 1. Build a tree of operations on rules. |
| 35 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN). | 35 # Each node in the tree is a tuple (operation, subtree1, ... subtreeN). |
| 36 # Rule parser builds this tree by invoking static methods of NfaBuilder. | 36 # Rule parser builds this tree by invoking static methods of NfaBuilder. |
| 37 # 2. For each node, perform the operation of the node to produce an Nfa. | 37 # 2. For each node, perform the operation of the node to produce an Nfa. |
| 38 # If an operation is called X, then it is performed by the method | 38 # If an operation is called X, then it is performed by the method |
| 39 # of NfaBuilder called __x(). See __process() for mapping from | 39 # of NfaBuilder called __x(). See __process() for mapping from |
| 40 # operation to processing methods. | 40 # operation to processing methods. |
| 41 | 41 |
| 42 class NfaBuilder(object): | 42 class NfaBuilder(object): |
| 43 | 43 |
| 44 def __init__(self, encoding, character_classes): | 44 def __init__(self, encoding, character_classes, subtree_map): |
| 45 self.__node_number = 0 | 45 self.__node_number = 0 |
| 46 self.__operation_map = {} | |
| 47 self.__members = getmembers(self) | |
| 48 self.__encoding = encoding | 46 self.__encoding = encoding |
| 49 self.__character_classes = character_classes | 47 self.__character_classes = character_classes |
| 50 self.__states = [] | 48 self.__states = [] |
| 51 self.__global_end_node = None | 49 self.__global_end_node = None |
| 50 self.__operation_map = None |
| 51 self.__subtree_map = subtree_map |
| 52 | 52 |
| 53 def __new_state(self): | 53 def __new_state(self): |
| 54 self.__node_number += 1 | 54 self.__node_number += 1 |
| 55 return NfaState() | 55 return NfaState() |
| 56 | 56 |
| 57 def __global_end(self): | 57 def __global_end(self): |
| 58 if not self.__global_end_node: | 58 if not self.__global_end_node: |
| 59 self.__global_end_node = self.__new_state() | 59 self.__global_end_node = self.__new_state() |
| 60 return self.__global_end_node | 60 return self.__global_end_node |
| 61 | 61 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 82 self.__patch_ends(ends, start) | 82 self.__patch_ends(ends, start) |
| 83 return (start, [start]) | 83 return (start, [start]) |
| 84 | 84 |
| 85 def __zero_or_one(self, subtree): | 85 def __zero_or_one(self, subtree): |
| 86 (node, ends) = self.__process(subtree) | 86 (node, ends) = self.__process(subtree) |
| 87 start = self.__new_state() | 87 start = self.__new_state() |
| 88 start.add_epsilon_transition(node) | 88 start.add_epsilon_transition(node) |
| 89 return (start, ends + [start]) | 89 return (start, ends + [start]) |
| 90 | 90 |
| 91 def __repeat(self, param_min, param_max, subtree): | 91 def __repeat(self, param_min, param_max, subtree): |
| 92 param_min = int(param_min) | 92 'process regex of form subtree{param_min, param_max}' |
| 93 param_max = int(param_max) | 93 (param_min, param_max) = (int(param_min), int(param_max)) |
| 94 assert param_min > 1 and param_min <= param_max |
| 94 (start, ends) = self.__process(subtree) | 95 (start, ends) = self.__process(subtree) |
| 96 # create a chain of param_min subtrees |
| 95 for i in xrange(1, param_min): | 97 for i in xrange(1, param_min): |
| 96 (start2, ends2) = self.__process(subtree) | 98 (start2, ends2) = self.__process(subtree) |
| 97 self.__patch_ends(ends, start2) | 99 self.__patch_ends(ends, start2) |
| 98 ends = ends2 | 100 ends = ends2 |
| 99 if param_min == param_max: | 101 if param_min == param_max: |
| 100 return (start, ends) | 102 return (start, ends) |
| 101 | 103 # join in (param_max - param_min) optional subtrees |
| 102 midpoints = [] | 104 midpoints = [] |
| 103 for i in xrange(param_min, param_max): | 105 for i in xrange(param_min, param_max): |
| 104 midpoint = self.__new_state() | 106 midpoint = self.__new_state() |
| 105 self.__patch_ends(ends, midpoint) | 107 self.__patch_ends(ends, midpoint) |
| 106 (start2, ends) = self.__process(subtree) | 108 (start2, ends) = self.__process(subtree) |
| 107 midpoint.add_epsilon_transition(start2) | 109 midpoint.add_epsilon_transition(start2) |
| 108 midpoints.append(midpoint) | 110 midpoints.append(midpoint) |
| 109 | |
| 110 return (start, ends + midpoints) | 111 return (start, ends + midpoints) |
| 111 | 112 |
| 112 def __cat(self, left_tree, right_tree): | 113 def __cat(self, left_tree, right_tree): |
| 113 (left, right) = (self.__process(left_tree), self.__process(right_tree)) | 114 (left, right) = (self.__process(left_tree), self.__process(right_tree)) |
| 114 self.__patch_ends(left[1], right[0]) | 115 self.__patch_ends(left[1], right[0]) |
| 115 return (left[0], right[1]) | 116 return (left[0], right[1]) |
| 116 | 117 |
| 117 def __key_state(self, key): | 118 def __key_state(self, key): |
| 118 state = self.__new_state() | 119 state = self.__new_state() |
| 119 state.add_unclosed_transition(key) | 120 state.add_unclosed_transition(key) |
| (...skipping 13 matching lines...) Expand all Loading... |
| 133 | 134 |
| 134 def __any(self): | 135 def __any(self): |
| 135 return self.__key_state(TransitionKey.any(self.__encoding)) | 136 return self.__key_state(TransitionKey.any(self.__encoding)) |
| 136 | 137 |
| 137 def __action(self, subtree, action_term): | 138 def __action(self, subtree, action_term): |
| 138 (start, ends) = self.__process(subtree) | 139 (start, ends) = self.__process(subtree) |
| 139 action = Action.from_term(action_term) | 140 action = Action.from_term(action_term) |
| 140 end = self.__new_state() | 141 end = self.__new_state() |
| 141 self.__patch_ends(ends, end) | 142 self.__patch_ends(ends, end) |
| 142 end.set_action(action) | 143 end.set_action(action) |
| 144 # Force all match actions to be terminal. |
| 143 if action.match_action(): | 145 if action.match_action(): |
| 144 global_end = self.__global_end() | 146 global_end = self.__global_end() |
| 145 end.add_epsilon_transition(global_end) | 147 end.add_epsilon_transition(global_end) |
| 146 return (start, [end]) | 148 return (start, [end]) |
| 147 | 149 |
| 148 def __continue(self, subtree): | 150 def __continue(self, subtree): |
| 151 'add an epsilon transitions to the start node of the current subtree' |
| 149 (start, ends) = self.__process(subtree) | 152 (start, ends) = self.__process(subtree) |
| 150 state = self.__peek_state() | 153 state = self.__peek_state() |
| 151 if not state['start_node']: | 154 if not state['start_node']: |
| 152 state['start_node'] = self.__new_state() | 155 state['start_node'] = self.__new_state() |
| 153 self.__patch_ends(ends, state['start_node']) | 156 self.__patch_ends(ends, state['start_node']) |
| 154 return (start, []) | 157 return (start, []) |
| 155 | 158 |
| 156 def __unique_key(self, name): | 159 def __unique_key(self, name): |
| 157 return self.__key_state(TransitionKey.unique(name)) | 160 return self.__key_state(TransitionKey.unique(name)) |
| 158 | 161 |
| 159 def __join(self, tree, subtree): | 162 def __join(self, tree, name): |
| 160 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(subtree) | 163 (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name) |
| 161 (start, ends) = self.__process(tree) | 164 (start, ends) = self.__process(tree) |
| 162 self.__patch_ends(ends, subtree_start) | 165 self.__patch_ends(ends, subtree_start) |
| 163 if subtree_end: | 166 if subtree_end: |
| 164 return (start, [subtree_end]) | 167 return (start, [subtree_end]) |
| 165 else: | 168 else: |
| 166 return (start, []) | 169 return (start, []) |
| 167 | 170 |
| 171 def __get_method(self, name): |
| 172 'lazily build map of all private bound methods and return the one requested' |
| 173 if not self.__operation_map: |
| 174 prefix = "_NfaBuilder__" |
| 175 self.__operation_map = {name[len(prefix):] : f for (name, f) in |
| 176 filter(lambda (name, f): name.startswith(prefix), getmembers(self))} |
| 177 return self.__operation_map[name] |
| 178 |
| 168 def __process(self, term): | 179 def __process(self, term): |
| 169 assert isinstance(term, Term) | 180 assert isinstance(term, Term) |
| 170 method = "_NfaBuilder__" + term.name().lower() | 181 method = self.__get_method(term.name().lower()) |
| 171 if not method in self.__operation_map: | 182 return method(*term.args()) |
| 172 matches = filter(lambda (name, func): name == method, self.__members) | |
| 173 assert len(matches) == 1 | |
| 174 self.__operation_map[method] = matches[0][1] | |
| 175 return self.__operation_map[method](*term.args()) | |
| 176 | 183 |
| 177 def __patch_ends(self, ends, new_end): | 184 def __patch_ends(self, ends, new_end): |
| 178 for end in ends: | 185 for end in ends: |
| 179 end.close(new_end) | 186 end.close(new_end) |
| 180 | 187 |
| 181 def __push_state(self): | 188 def __push_state(self, name): |
| 189 for state in self.__states: |
| 190 assert state['name'] != name, 'recursive subgraph' |
| 182 self.__states.append({ | 191 self.__states.append({ |
| 183 'start_node' : None, | 192 'start_node' : None, |
| 193 'name' : name |
| 184 }) | 194 }) |
| 185 | 195 |
| 186 def __pop_state(self): | 196 def __pop_state(self): |
| 187 return self.__states.pop() | 197 return self.__states.pop() |
| 188 | 198 |
| 189 def __peek_state(self): | 199 def __peek_state(self): |
| 190 return self.__states[-1] | 200 return self.__states[-1] |
| 191 | 201 |
| 192 def __nfa(self, term): | 202 def __nfa(self, name): |
| 203 tree = self.__subtree_map[name] |
| 193 start_node_number = self.__node_number | 204 start_node_number = self.__node_number |
| 194 self.__push_state() | 205 self.__push_state(name) |
| 195 (start, ends) = self.__process(term) | 206 (start, ends) = self.__process(tree) |
| 196 state = self.__pop_state() | 207 state = self.__pop_state() |
| 208 # move saved start node into start |
| 197 if state['start_node']: | 209 if state['start_node']: |
| 198 state['start_node'].close(start) | 210 state['start_node'].close(start) |
| 199 start = state['start_node'] | 211 start = state['start_node'] |
| 200 # Don't create an end node for the subgraph if it would be unused (ends can | 212 # Don't create an end node for the subgraph if it would be unused (ends can |
| 201 # be an empty list e.g., in the case when everything inside the subgraph is | 213 # be an empty list e.g., in the case when everything inside the subgraph is |
| 202 # "continue"). | 214 # "continue"). |
| 203 end = None | 215 end = None |
| 204 if ends: | 216 if ends: |
| 205 end = self.__new_state() | 217 end = self.__new_state() |
| 206 self.__patch_ends(ends, end) | 218 self.__patch_ends(ends, end) |
| (...skipping 25 matching lines...) Expand all Loading... |
| 232 keys = reduce(f, reachable_states, set()) | 244 keys = reduce(f, reachable_states, set()) |
| 233 keys.discard(TransitionKey.epsilon()) | 245 keys.discard(TransitionKey.epsilon()) |
| 234 keys.discard(catch_all) | 246 keys.discard(catch_all) |
| 235 keys.discard(TransitionKey.unique('eos')) | 247 keys.discard(TransitionKey.unique('eos')) |
| 236 inverse_key = TransitionKey.inverse_key(encoding, keys) | 248 inverse_key = TransitionKey.inverse_key(encoding, keys) |
| 237 if inverse_key: | 249 if inverse_key: |
| 238 transitions[inverse_key] = transitions[catch_all] | 250 transitions[inverse_key] = transitions[catch_all] |
| 239 del transitions[catch_all] | 251 del transitions[catch_all] |
| 240 | 252 |
| 241 @staticmethod | 253 @staticmethod |
| 242 def nfa(encoding, character_classes, rule_term): | 254 def nfa(encoding, character_classes, subtree_map, name): |
| 243 | 255 self = NfaBuilder(encoding, character_classes, subtree_map) |
| 244 self = NfaBuilder(encoding, character_classes) | 256 (start, end, nodes_created) = self.__nfa(name) |
| 245 (start, end, nodes_created) = self.__nfa(rule_term) | 257 # close all ends |
| 246 | |
| 247 global_end_to_return = self.__global_end_node or end | 258 global_end_to_return = self.__global_end_node or end |
| 248 assert global_end_to_return | 259 assert global_end_to_return, "no terminal nodes, default tree continues" |
| 249 if end and self.__global_end_node: | 260 if end and self.__global_end_node: |
| 250 end.close(self.__global_end_node) | 261 end.close(self.__global_end_node) |
| 251 | |
| 252 global_end_to_return.close(None) | 262 global_end_to_return.close(None) |
| 253 | 263 # Prepare the nfa |
| 254 self.__compute_epsilon_closures(start) | 264 self.__compute_epsilon_closures(start) |
| 255 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) | 265 f = lambda node, state: self.__replace_catch_all(self.__encoding, node) |
| 256 Automaton.visit_states(set([start]), f) | 266 Automaton.visit_states(set([start]), f) |
| 257 | |
| 258 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) | 267 return Nfa(self.__encoding, start, global_end_to_return, nodes_created) |
| 259 | 268 |
| 260 @staticmethod | 269 @staticmethod |
| 261 def add_action(term, action): | 270 def add_action(term, action): |
| 262 return Term('ACTION', term, action.to_term()) | 271 return Term('ACTION', term, action.to_term()) |
| 263 | 272 |
| 264 @staticmethod | 273 @staticmethod |
| 265 def add_continue(term): | 274 def add_continue(term): |
| 266 return Term('CONTINUE', term) | 275 return Term('CONTINUE', term) |
| 267 | 276 |
| 268 @staticmethod | 277 @staticmethod |
| 269 def unique_key(name): | 278 def unique_key(name): |
| 270 return Term('UNIQUE_KEY', name) | 279 return Term('UNIQUE_KEY', name) |
| 271 | 280 |
| 272 @staticmethod | 281 @staticmethod |
| 273 def join_subgraph(tree, subtree): | 282 def join_subtree(tree, subtree_name): |
| 274 return Term('JOIN', tree, subtree) | 283 return Term('JOIN', tree, subtree_name) |
| 275 | 284 |
| 276 @staticmethod | 285 @staticmethod |
| 277 def or_terms(terms): | 286 def or_terms(terms): |
| 278 return reduce(lambda acc, g: Term('OR', acc, g), terms) | 287 return reduce(lambda acc, g: Term('OR', acc, g), terms) |
| 279 | 288 |
| 280 @staticmethod | 289 @staticmethod |
| 281 def cat_terms(terms): | 290 def cat_terms(terms): |
| 282 return reduce(lambda acc, g: Term('CAT', acc, g), terms) | 291 return reduce(lambda acc, g: Term('CAT', acc, g), terms) |
| 283 | 292 |
| 284 __modifer_map = { | 293 __modifer_map = { |
| 285 '+': 'ONE_OR_MORE', | 294 '+': 'ONE_OR_MORE', |
| 286 '?': 'ZERO_OR_ONE', | 295 '?': 'ZERO_OR_ONE', |
| 287 '*': 'ZERO_OR_MORE', | 296 '*': 'ZERO_OR_MORE', |
| 288 } | 297 } |
| 289 | 298 |
| 290 @staticmethod | 299 @staticmethod |
| 291 def apply_modifier(modifier, term): | 300 def apply_modifier(modifier, term): |
| 292 return Term(NfaBuilder.__modifer_map[modifier], term) | 301 return Term(NfaBuilder.__modifer_map[modifier], term) |
| OLD | NEW |