| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 from transition_keys import TransitionKey | 28 from transition_keys import TransitionKey |
| 29 from automaton import Term, Action | 29 from automaton import Term, Action, Automaton |
| 30 from dfa import Dfa | 30 from dfa import Dfa |
| 31 | 31 |
| 32 # --- Optimization: Replacing tokens with gotos --- | 32 # --- Optimization: Replacing tokens with gotos --- |
| 33 # | 33 # |
| 34 # This optimization reduces the code size for grammars which have constructs | 34 # This optimization reduces the code size for grammars which have constructs |
| 35 # similar to keywords and identifiers. Consider the following grammar: | 35 # similar to keywords and identifiers. Consider the following grammar: |
| 36 # | 36 # |
| 37 # "baz" <|token(BAZ)|> | 37 # "baz" <|token(BAZ)|> |
| 38 # "bazz" <|token(BAZZ)|> | 38 # "bazz" <|token(BAZZ)|> |
| 39 # | 39 # |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the | 136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the |
| 137 # transition from s2 is defined for the superkey, but that is ok. | 137 # transition from s2 is defined for the superkey, but that is ok. |
| 138 | 138 |
| 139 class DfaOptimizer(object): | 139 class DfaOptimizer(object): |
| 140 | 140 |
| 141 def __init__(self, dfa, log): | 141 def __init__(self, dfa, log): |
| 142 self.__dfa = dfa | 142 self.__dfa = dfa |
| 143 self.__log = log | 143 self.__log = log |
| 144 | 144 |
| 145 @staticmethod | 145 @staticmethod |
| 146 def transistions_match(encoding, incoming_key, incoming_state, state): | 146 def __transistions_match(encoding, incoming_key, incoming_state, state): |
| 147 keys = set(state.key_iter()) | 147 keys = set(state.key_iter()) |
| 148 keys.add(incoming_key) | 148 keys.add(incoming_key) |
| 149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys) | 149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys) |
| 150 for key in disjoint_keys: | 150 for key in disjoint_keys: |
| 151 if not incoming_key.is_superset_of_key(key): | 151 if not incoming_key.is_superset_of_key(key): |
| 152 continue | 152 continue |
| 153 transition_state = state.transition_state_for_key(key) | 153 transition_state = state.transition_state_for_key(key) |
| 154 if incoming_state.transition_state_for_key(key) != transition_state: | 154 if incoming_state.transition_state_for_key(key) != transition_state: |
| 155 return False | 155 return False |
| 156 return True | 156 return True |
| 157 | 157 |
| 158 def replace_tokens_with_gotos(self): | 158 @staticmethod |
| 159 def __build_incoming_transitions(dfa): |
| 160 incoming_transitions = {} |
| 161 def f(state, visitor_state): |
| 162 for key, transition_state in state.key_state_iter(): |
| 163 if not transition_state in incoming_transitions: |
| 164 incoming_transitions[transition_state] = [] |
| 165 incoming_transitions[transition_state].append((key, state)) |
| 166 dfa.visit_all_states(f) |
| 167 return incoming_transitions |
| 168 |
| 169 @staticmethod |
| 170 def __match_action(state): |
| 171 state = state.omega_transition() |
| 172 state = state if state and state.transition_count() == 0 else None |
| 173 return Action.empty_action() if not state else state.action() |
| 174 |
| 175 @staticmethod |
| 176 def __build_replacement_map(dfa): |
| 177 replacements = {} |
| 178 encoding = dfa.encoding() |
| 179 incoming_transitions = DfaOptimizer.__build_incoming_transitions(dfa) |
| 180 replacement_targets = set([]) |
| 181 # TODO(dcarney): this should be an ordered walk |
| 182 for state, incoming in incoming_transitions.items(): |
| 183 if len(incoming) < 10: |
| 184 continue |
| 185 # the states action will be replaced, so we just don't optimize if there |
| 186 # is one |
| 187 if state.action(): # TODO(dcarney): allow this via action chaining |
| 188 continue |
| 189 # We only perform this optimization on 'token' actions |
| 190 match_action = DfaOptimizer.__match_action(state) |
| 191 if not match_action.name() == 'token': |
| 192 continue |
| 193 assert state.omega_transition() in dfa.terminal_set() |
| 194 # we can drop incoming edges for states with a match action |
| 195 # of either 'token' or 'harmony_token' |
| 196 def is_replacement_candidate(state): |
| 197 action = DfaOptimizer.__match_action(state) |
| 198 return action.name() == 'token' or action.name() == 'harmony_token' |
| 199 for (incoming_key, incoming_state) in incoming: |
| 200 # check to see if incoming edges are matched by an outgoing edge |
| 201 if (incoming_state == state or # drop self edges |
| 202 incoming_key == TransitionKey.omega() or |
| 203 not is_replacement_candidate(incoming_state) or |
| 204 not DfaOptimizer.__transistions_match( |
| 205 encoding, incoming_key, incoming_state, state)): |
| 206 continue |
| 207 assert (not incoming_state in replacements or |
| 208 replacements[incoming_state][0] == state) |
| 209 # set this transition as to be removed |
| 210 if not incoming_state in replacements: |
| 211 replacements[incoming_state] = (state, set()) |
| 212 replacements[incoming_state][1].add(incoming_key) |
| 213 replacement_targets.add(state) |
| 214 # now see if we can gather other transistions into the goto |
| 215 for key in incoming_state.key_iter(): |
| 216 if (key != TransitionKey.omega() and |
| 217 key not in replacements[incoming_state][1] and |
| 218 DfaOptimizer.__transistions_match( |
| 219 encoding, key, incoming_state, state)): |
| 220 # this transition can be removed |
| 221 replacements[incoming_state][1].add(key) |
| 222 return (replacement_targets, replacements) |
| 223 |
| 224 @staticmethod |
| 225 def __new_state(is_terminal, action): |
| 226 return { |
| 227 'transitions' : {}, |
| 228 'terminal' : is_terminal, |
| 229 'action' : action, |
| 230 } |
| 231 |
| 232 @staticmethod |
| 233 def __new_state_name(state): |
| 234 return str(state.node_number()) |
| 235 |
| 236 @staticmethod |
| 237 def __split_target_state(state): |
| 238 old_name = DfaOptimizer.__new_state_name(state) |
| 239 old_match_action = DfaOptimizer.__match_action(state) |
| 240 assert old_match_action.name() == 'token', 'unimplemented' |
| 241 precedence = old_match_action.precedence() |
| 242 match_action = Action(Term('do_stored_token'), precedence) |
| 243 head_action = Action( |
| 244 Term('store_token', old_match_action.term().args()[0]), precedence) |
| 245 tail_action = Action(Term('no_op'), precedence) |
| 246 head = DfaOptimizer.__new_state(False, head_action) |
| 247 tail = DfaOptimizer.__new_state(False, tail_action) |
| 248 match = DfaOptimizer.__new_state(True, match_action) |
| 249 head['transitions'][TransitionKey.omega()] = old_name + '_tail' |
| 250 tail['transitions'][TransitionKey.omega()] = old_name + '_match' |
| 251 return (head, tail, match) |
| 252 |
| 253 # generate a store action to replace an existing action |
| 254 @staticmethod |
| 255 def __replacement_action(state, transition_state): |
| 256 old_action = DfaOptimizer.__match_action(state) |
| 257 assert old_action |
| 258 transition_action = DfaOptimizer.__match_action(transition_state).term() |
| 259 if old_action.term() == transition_action: |
| 260 # no need to store token |
| 261 return Action.empty_action() |
| 262 new_name = 'store_' + old_action.name() |
| 263 return Action( |
| 264 Term(new_name, *old_action.term().args()), old_action.precedence()) |
| 265 |
| 266 @staticmethod |
| 267 def __apply_jump(counters, state, new_state, target_state): |
| 268 # generate a new action for the new omega transition |
| 269 new_action = DfaOptimizer.__replacement_action(state, target_state) |
| 270 # determine new jump target |
| 271 jump_target = DfaOptimizer.__new_state_name(target_state) |
| 272 if not new_action: |
| 273 # just jump to entry of target_state, which will be split |
| 274 new_state['transitions'][TransitionKey.omega()] = jump_target |
| 275 counters['goto_start'] += 1 |
| 276 return None |
| 277 counters[new_action.name()] += 1 |
| 278 # install new match state |
| 279 match_state_id = DfaOptimizer.__new_state_name(state) + '_match' |
| 280 new_state['transitions'][TransitionKey.omega()] = match_state_id |
| 281 match_state = DfaOptimizer.__new_state(False, new_action) |
| 282 match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail' |
| 283 return match_state |
| 284 |
| 285 @staticmethod |
| 286 def __remove_orphaned_states(states, orphanable, start_name): |
| 287 seen = set([]) |
| 288 def visit(state_id, acc): |
| 289 seen.add(state_id) |
| 290 def state_iter(state_id): |
| 291 return states[state_id]['transitions'].itervalues() |
| 292 Automaton.visit_states(set([start_name]), visit, state_iter=state_iter) |
| 293 def f(name, state): |
| 294 assert name in seen or name in orphanable |
| 295 return name in seen |
| 296 return {k: v for k, v in states.iteritems() if f(k, v)} |
| 297 |
| 298 def __replace_tokens_with_gotos(self): |
| 159 '''replace states with no entry action and match action of type 'token(X)' | 299 '''replace states with no entry action and match action of type 'token(X)' |
| 160 with new states with entry action store_token(X) and match action | 300 with new states with entry action store_token(X) and match action |
| 161 goto(state_id) which has (far) fewer transitions''' | 301 goto(state_id) which has (far) fewer transitions''' |
| 162 | 302 |
| 163 dfa = self.__dfa | 303 dfa = self.__dfa |
| 164 encoding = dfa.encoding() | 304 (replacement_targets, replacements) = self.__build_replacement_map(dfa) |
| 165 | 305 if not replacement_targets: |
| 166 incoming_transitions = {} | 306 return dfa |
| 167 def build_incoming_transitions(state, visitor_state): | |
| 168 for key, transition_state in state.key_state_iter(): | |
| 169 if not transition_state in incoming_transitions: | |
| 170 incoming_transitions[transition_state] = [] | |
| 171 incoming_transitions[transition_state].append((key,state)) | |
| 172 dfa.visit_all_states(build_incoming_transitions) | |
| 173 | |
| 174 def is_replacement_candidate(state): | |
| 175 return (state.action().match_action().name() == 'token' or | |
| 176 state.action().match_action().name() == 'harmony_token') | |
| 177 | |
| 178 replacements = {} | |
| 179 for state, incoming in incoming_transitions.items(): | |
| 180 if len(incoming) < 10: | |
| 181 continue | |
| 182 if not is_replacement_candidate(state): | |
| 183 continue | |
| 184 # check to see if incoming edges are matched by an outgoing edge | |
| 185 def match_filter((incoming_key, incoming_state)): | |
| 186 return (incoming_state != state and # drop self transitions | |
| 187 is_replacement_candidate(incoming_state) and | |
| 188 self.transistions_match( | |
| 189 encoding, incoming_key, incoming_state, state)) | |
| 190 for incoming_key_state in incoming: | |
| 191 if not match_filter(incoming_key_state): | |
| 192 continue | |
| 193 (incoming_key, incoming_state) = incoming_key_state | |
| 194 # set this transition as to be replaced | |
| 195 if not incoming_state in replacements: | |
| 196 replacements[incoming_state] = {} | |
| 197 replacements[incoming_state][incoming_key] = state | |
| 198 # now see if we can gather other transistions into the goto | |
| 199 for key in incoming_state.key_iter(): | |
| 200 if key == incoming_key: | |
| 201 continue | |
| 202 if not self.transistions_match( | |
| 203 encoding, key, incoming_state, state): | |
| 204 continue | |
| 205 # this transition can be removed | |
| 206 replacements[incoming_state][key] = None | |
| 207 if not replacements: | |
| 208 return | |
| 209 # perform replacement | 307 # perform replacement |
| 210 states = {} | 308 states = {} |
| 211 name = lambda state : str(state.node_number()) | 309 name = DfaOptimizer.__new_state_name |
| 212 counters = { | 310 counters = { |
| 213 'removals' : 0, | 311 'removals' : 0, |
| 214 'goto_start' : 0, | 312 'goto_start' : 0, |
| 215 'store_token_and_goto' : 0, | 313 'store_token' : 0, |
| 216 'store_harmony_token_and_goto' : 0, | 314 'store_harmony_token' : 0, |
| 315 'split_state' : 0 |
| 217 } | 316 } |
| 218 store_states = set([]) | |
| 219 # generate a store action to replace an existing action | |
| 220 def replacement_action(old_action, transition_state): | |
| 221 assert old_action.match_action() | |
| 222 state_id = name(transition_state) | |
| 223 if old_action.match_action().name() == 'token': | |
| 224 old_token = old_action.match_action().args()[0] | |
| 225 if (transition_state.action().match_action().name() == 'token' and | |
| 226 transition_state.action().match_action().args()[0] == old_token): | |
| 227 # no need to store token | |
| 228 match_action = Term('goto_start', state_id) | |
| 229 counters['goto_start'] += 1 | |
| 230 else: | |
| 231 counters['store_token_and_goto'] += 1 | |
| 232 match_action = Term('store_token_and_goto', old_token, state_id) | |
| 233 elif old_action.match_action().name() == 'harmony_token': | |
| 234 new_args = list(old_action.match_action().args()) + [state_id] | |
| 235 match_action = Term('store_harmony_token_and_goto', *new_args) | |
| 236 counters['store_harmony_token_and_goto'] += 1 | |
| 237 else: | |
| 238 raise Exception(old_action.match_action()) | |
| 239 return Action(old_action.entry_action(), | |
| 240 match_action, | |
| 241 old_action.precedence()) | |
| 242 # map the old state to the new state, with fewer transitions and | 317 # map the old state to the new state, with fewer transitions and |
| 243 # goto actions | 318 # goto actions |
| 319 orphanable = set([]) |
| 244 def replace_state(state, acc): | 320 def replace_state(state, acc): |
| 245 new_state = { | 321 if state in replacements: |
| 246 'transitions' : {}, | 322 target_state = replacements[state][0] |
| 247 'terminal' : state in self.__dfa.terminal_set(), | 323 deletions = replacements[state][1] |
| 248 'action' : state.action(), | 324 else: |
| 249 } | 325 deletions = {} |
| 250 states[name(state)] = new_state | 326 # this is a replacement target, so we split the state |
| 251 state_replacements = replacements[state] if state in replacements else {} | 327 if state in replacement_targets: |
| 328 assert not deletions |
| 329 (head, tail, match) = DfaOptimizer.__split_target_state(state) |
| 330 new_state = tail |
| 331 states[name(state)] = head |
| 332 states[name(state) + '_tail'] = tail |
| 333 states[name(state) + '_match'] = match |
| 334 counters['split_state'] += 1 |
| 335 else: |
| 336 new_state = self.__new_state(state in self.__dfa.terminal_set(), |
| 337 state.action()) |
| 338 states[name(state)] = new_state |
| 339 assert not TransitionKey.omega() in deletions |
| 252 for (transition_key, transition_state) in state.key_state_iter(): | 340 for (transition_key, transition_state) in state.key_state_iter(): |
| 253 if not transition_key in state_replacements: | 341 # just copy these transitions |
| 342 if transition_key != TransitionKey.omega(): |
| 343 if not transition_key in deletions: |
| 344 new_state['transitions'][transition_key] = name(transition_state) |
| 345 continue |
| 346 else: |
| 347 # drop these transitions |
| 348 deletions.remove(transition_key) |
| 349 counters['removals'] += 1 |
| 350 continue |
| 351 if transition_key in new_state['transitions']: |
| 352 # this is a split state, omega has already been assigned |
| 353 # mark old match state as orphanable |
| 354 orphanable.add(name(transition_state)) |
| 355 elif not state in replacements: |
| 356 # no replacement |
| 254 new_state['transitions'][transition_key] = name(transition_state) | 357 new_state['transitions'][transition_key] = name(transition_state) |
| 255 continue | 358 else: |
| 256 replacement = state_replacements[transition_key] | 359 # mark old omega state as orphanable |
| 257 del state_replacements[transition_key] | 360 orphanable.add(name(transition_state)) |
| 258 # just drop the transition | 361 match_state = DfaOptimizer.__apply_jump( |
| 259 if replacement == None: | 362 counters, state, new_state, target_state) |
| 260 counters['removals'] += 1 | 363 if match_state: |
| 261 continue | 364 states[name(state) + '_match'] = match_state |
| 262 assert replacement == transition_state | 365 assert not deletions |
| 263 # do goto replacement | |
| 264 new_state['action'] = replacement_action(state.action(), replacement) | |
| 265 # will need to patch up transition_state | |
| 266 store_states.add(name(transition_state)) | |
| 267 assert not state_replacements | |
| 268 self.__dfa.visit_all_states(replace_state) | 366 self.__dfa.visit_all_states(replace_state) |
| 269 # now patch up all states with stores | |
| 270 start_name = name(self.__dfa.start_state()) | 367 start_name = name(self.__dfa.start_state()) |
| 271 for state_id in store_states: | 368 states = self.__remove_orphaned_states(states, orphanable, start_name) |
| 272 old_action = states[state_id]['action'] | 369 # dump stats |
| 273 assert not old_action.entry_action() | |
| 274 assert old_action.match_action().name() == 'token', 'unimplemented' | |
| 275 entry_action = Term('store_token', old_action.match_action().args()[0]) | |
| 276 match_action = Term('do_stored_token', state_id) | |
| 277 precedence = old_action.precedence() | |
| 278 states[state_id]['action'] = Action( | |
| 279 entry_action, match_action, precedence) | |
| 280 # The state might be only reachable via gotos; make sure it's connected in | |
| 281 # the state graph by adding a bogus transition from the start state. This | |
| 282 # transition doens't match any character. | |
| 283 states[start_name]['transitions'][ | |
| 284 TransitionKey.unique('no_match')] = state_id | |
| 285 | |
| 286 if self.__log: | 370 if self.__log: |
| 287 print 'goto_start inserted %s' % counters['goto_start'] | 371 print 'goto_start inserted %s' % counters['goto_start'] |
| 288 print 'store_token_and_goto inserted %s' % ( | 372 print 'store_token inserted %s' % ( |
| 289 counters['store_token_and_goto']) | 373 counters['store_token']) |
| 290 print 'store_harmony_token_and_goto %s' % ( | 374 print 'store_harmony_token %s' % ( |
| 291 counters['store_harmony_token_and_goto']) | 375 counters['store_harmony_token']) |
| 292 print 'transitions removed %s' % counters['removals'] | 376 print 'transitions removed %s' % counters['removals'] |
| 293 print 'do_stored_token actions added %s' % len(store_states) | 377 print 'states split %s' % counters['split_state'] |
| 294 self.__dfa = Dfa(self.__dfa.encoding(), start_name, states) | 378 return Dfa(self.__dfa.encoding(), start_name, states) |
| 295 | 379 |
| 296 @staticmethod | 380 @staticmethod |
| 297 def optimize(dfa, log): | 381 def optimize(dfa, log): |
| 298 optimizer = DfaOptimizer(dfa, log) | 382 optimizer = DfaOptimizer(dfa, log) |
| 299 optimizer.replace_tokens_with_gotos() | 383 return optimizer.__replace_tokens_with_gotos() |
| 300 return optimizer.__dfa | |
| OLD | NEW |