| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 import jinja2 | 30 import jinja2 |
| 31 from dfa import Dfa | 31 from dfa import Dfa |
| 32 from transition_keys import TransitionKey | 32 from transition_keys import TransitionKey |
| 33 | 33 |
| 34 class CodeGenerator: | 34 class CodeGenerator: |
| 35 | 35 |
| 36 def __init__(self, | 36 def __init__(self, |
| 37 rule_processor, | 37 rule_processor, |
| 38 minimize_default = True, | 38 minimize_default = True, |
| 39 inline = True, | 39 inline = True, |
| 40 switching = True, |
| 40 debug_print = False, | 41 debug_print = False, |
| 41 log = False): | 42 log = False): |
| 42 if minimize_default: | 43 if minimize_default: |
| 43 dfa = rule_processor.default_automata().minimal_dfa() | 44 dfa = rule_processor.default_automata().minimal_dfa() |
| 44 else: | 45 else: |
| 45 dfa = rule_processor.default_automata().dfa() | 46 dfa = rule_processor.default_automata().dfa() |
| 46 self.__dfa = dfa | 47 self.__dfa = dfa |
| 47 self.__default_action = rule_processor.default_action | 48 self.__default_action = rule_processor.default_action |
| 48 self.__debug_print = debug_print | 49 self.__debug_print = debug_print |
| 49 self.__start_node_number = self.__dfa.start_state().node_number() | 50 self.__start_node_number = self.__dfa.start_state().node_number() |
| 50 self.__log = log | 51 self.__log = log |
| 51 self.__inline = inline | 52 self.__inline = inline |
| 53 self.__switching = switching |
| 52 | 54 |
| 53 def __state_cmp(self, left, right): | 55 def __state_cmp(self, left, right): |
| 54 if left['original_node_number'] == self.__start_node_number: | 56 if left['original_node_number'] == self.__start_node_number: |
| 55 return -1 | 57 return -1 |
| 56 if right['original_node_number'] == self.__start_node_number: | 58 if right['original_node_number'] == self.__start_node_number: |
| 57 return 1 | 59 return 1 |
| 58 c = cmp(len(left['disjoint_keys']), len(right['disjoint_keys'])) | 60 c = cmp(len(left['disjoint_keys']), len(right['disjoint_keys'])) |
| 59 if c: | 61 if c: |
| 60 return c | 62 return c |
| 61 c = cmp(left['disjoint_keys'], right['disjoint_keys']) | 63 c = cmp(left['disjoint_keys'], right['disjoint_keys']) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 90 return 1 | 92 return 1 |
| 91 # TODO store numeric values and cmp | 93 # TODO store numeric values and cmp |
| 92 return cmp(left[1], right[1]) | 94 return cmp(left[1], right[1]) |
| 93 | 95 |
| 94 @staticmethod | 96 @staticmethod |
| 95 def __transform_state(state): | 97 def __transform_state(state): |
| 96 # action data | 98 # action data |
| 97 action = state.action() | 99 action = state.action() |
| 98 entry_action = None if not action else action.entry_action() | 100 entry_action = None if not action else action.entry_action() |
| 99 match_action = None if not action else action.match_action() | 101 match_action = None if not action else action.match_action() |
| 100 # compute disjoint ranges | |
| 101 keys = TransitionKey.disjoint_keys(list(state.key_iter())) | |
| 102 def f(key): | |
| 103 ranges = list(key.range_iter()) | |
| 104 assert len(ranges) == 1 | |
| 105 return ranges[0] | |
| 106 keys = sorted(map(f, keys), CodeGenerator.__range_cmp) | |
| 107 # generate ordered transitions | 102 # generate ordered transitions |
| 108 transitions = map(lambda (k, v) : (k, v.node_number()), | 103 transitions = map(lambda (k, v) : (k, v.node_number()), |
| 109 state.transitions().items()) | 104 state.transitions().items()) |
| 110 def cmp(left, right): | 105 def cmp(left, right): |
| 111 return TransitionKey.canonical_compare(left[0], right[0]) | 106 return TransitionKey.canonical_compare(left[0], right[0]) |
| 112 transitions = sorted(transitions, cmp) | 107 transitions = sorted(transitions, cmp) |
| 108 # map transition keys to disjoint ranges |
| 109 disjoint_keys = {'value' : []} |
| 110 def f((key, state)): |
| 111 ranges = list(key.range_iter()) |
| 112 disjoint_keys['value'] += ranges |
| 113 return (ranges, state) |
| 114 transitions = map(f, transitions) |
| 115 disjoint_keys = sorted(disjoint_keys['value'], CodeGenerator.__range_cmp) |
| 113 # dictionary object representing state | 116 # dictionary object representing state |
| 114 return { | 117 return { |
| 115 'node_number' : state.node_number(), | 118 'node_number' : state.node_number(), |
| 116 'original_node_number' : state.node_number(), | 119 'original_node_number' : state.node_number(), |
| 117 'transitions' : transitions, | 120 'transitions' : transitions, |
| 118 'disjoint_keys' : keys, | 121 'switch_transitions' : [], |
| 122 'disjoint_keys' : disjoint_keys, |
| 119 'inline' : None, | 123 'inline' : None, |
| 120 'depth' : None, | 124 'depth' : None, |
| 121 'action' : action, | 125 'action' : action, |
| 122 'entry_action' : entry_action, | 126 'entry_action' : entry_action, |
| 123 'match_action' : match_action, | 127 'match_action' : match_action, |
| 124 } | 128 } |
| 125 | 129 |
| 126 @staticmethod | 130 @staticmethod |
| 127 def __compute_depths(node_number, depth, id_map): | 131 def __compute_depths(node_number, depth, id_map): |
| 128 state = id_map[node_number] | 132 state = id_map[node_number] |
| 129 if state['depth'] != None: | 133 if state['depth'] != None: |
| 130 return | 134 return |
| 131 state['depth'] = depth | 135 state['depth'] = depth |
| 132 for (k, transition_node) in state['transitions']: | 136 for (k, transition_node) in state['transitions']: |
| 133 CodeGenerator.__compute_depths(transition_node, depth + 1, id_map) | 137 CodeGenerator.__compute_depths(transition_node, depth + 1, id_map) |
| 134 | 138 |
| 135 @staticmethod | 139 @staticmethod |
| 136 def __set_inline(count, state): | 140 def __set_inline(count, state): |
| 137 assert state['inline'] == None | 141 assert state['inline'] == None |
| 138 inline = False | 142 inline = False |
| 139 if not state['transitions']: | 143 if not state['transitions']: |
| 140 inline = True | 144 inline = True |
| 145 # TODO add in 1 or 2 element if blocks |
| 141 state['inline'] = inline | 146 state['inline'] = inline |
| 142 return count + 1 if inline else count | 147 return count + 1 if inline else count |
| 143 | 148 |
| 149 @staticmethod |
| 150 def __split_transitions(split_count, state): |
| 151 assert not state['switch_transitions'] |
| 152 (class_keys, distinct_keys, ranges) = (0, 0, 0) |
| 153 for (t, r) in state['disjoint_keys']: |
| 154 if t == 'CLASS': |
| 155 class_keys += 1 |
| 156 elif t == 'LATIN_1': |
| 157 distinct_keys += r[1] - r[0] + 1 |
| 158 ranges += 1 |
| 159 else: |
| 160 raise Exception() |
| 161 if distinct_keys <= 7 or float(distinct_keys)/float(ranges) >= 7.0: |
| 162 return split_count |
| 163 switch_transitions = [] |
| 164 if_transitions = [] |
| 165 for (ranges, node_id) in state['transitions']: |
| 166 i = [] |
| 167 for r in ranges: |
| 168 if r[0] == 'CLASS': |
| 169 i.append(r) |
| 170 else: |
| 171 for x in range(r[1][0], r[1][1] + 1): |
| 172 switch_transitions.append((x, node_id)) |
| 173 if i: |
| 174 if_transitions.append((i, node_id)) |
| 175 state['transitions'] = if_transitions |
| 176 state['switch_transitions'] = switch_transitions |
| 177 return split_count + 1 |
| 178 |
| 144 def __canonicalize_traversal(self): | 179 def __canonicalize_traversal(self): |
| 145 dfa_states = [] | 180 dfa_states = [] |
| 146 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) | 181 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) |
| 147 dfa_states = map(CodeGenerator.__transform_state, dfa_states) | 182 dfa_states = map(CodeGenerator.__transform_state, dfa_states) |
| 148 id_map = {x['original_node_number'] : x for x in dfa_states} | 183 id_map = {x['original_node_number'] : x for x in dfa_states} |
| 149 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map) | 184 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map) |
| 150 dfa_states = sorted(dfa_states, cmp=self.__state_cmp) | 185 dfa_states = sorted(dfa_states, cmp=self.__state_cmp) |
| 151 # remap all node numbers | 186 # remap all node numbers |
| 152 for i, state in enumerate(dfa_states): | 187 for i, state in enumerate(dfa_states): |
| 153 state['node_number'] = i | 188 state['node_number'] = i |
| 154 def f((key, original_node_number)): | 189 def f((key, original_node_number)): |
| 155 return (key, id_map[original_node_number]['node_number']) | 190 return (key, id_map[original_node_number]['node_number']) |
| 156 for state in dfa_states: | 191 for state in dfa_states: |
| 157 state['transitions'] = map(f, state['transitions']) | 192 state['transitions'] = map(f, state['transitions']) |
| 158 assert id_map[self.__start_node_number]['node_number'] == 0 | 193 assert id_map[self.__start_node_number]['node_number'] == 0 |
| 159 assert len(dfa_states) == self.__dfa.node_count() | 194 assert len(dfa_states) == self.__dfa.node_count() |
| 160 # set nodes to inline | 195 # set nodes to inline |
| 161 if self.__inline: | 196 if self.__inline: |
| 162 inlined = reduce(CodeGenerator.__set_inline, dfa_states, 0) | 197 inlined = reduce(CodeGenerator.__set_inline, dfa_states, 0) |
| 163 if self.__log: | 198 if self.__log: |
| 164 print "inlined %s" % inlined | 199 print "inlined %s" % inlined |
| 165 elif self.__log: | 200 elif self.__log: |
| 166 print "no inlining" | 201 print "no inlining" |
| 202 # split transitions |
| 203 if self.__switching: |
| 204 switched = reduce(CodeGenerator.__split_transitions, dfa_states, 0) |
| 205 if self.__log: |
| 206 print "switched states %s" % inlined |
| 207 elif self.__log: |
| 208 print "no switching" |
| 209 # store states |
| 167 self.__dfa_states = dfa_states | 210 self.__dfa_states = dfa_states |
| 168 | 211 |
| 169 def process(self): | 212 def process(self): |
| 170 | 213 |
| 171 self.__canonicalize_traversal() | 214 self.__canonicalize_traversal() |
| 172 | 215 |
| 173 dfa_states = self.__dfa_states | 216 dfa_states = self.__dfa_states |
| 174 | 217 |
| 175 default_action = self.__default_action | 218 default_action = self.__default_action |
| 176 assert(default_action and default_action.match_action()) | 219 assert(default_action and default_action.match_action()) |
| 177 default_action = default_action.match_action() | 220 default_action = default_action.match_action() |
| 178 | 221 |
| 179 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 222 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 180 template_env = jinja2.Environment( | 223 template_env = jinja2.Environment( |
| 181 loader = jinja2.PackageLoader('lexer_generator', '.'), | 224 loader = jinja2.PackageLoader('lexer_generator', '.'), |
| 182 undefined = jinja2.StrictUndefined) | 225 undefined = jinja2.StrictUndefined) |
| 183 template = template_env.get_template('code_generator.jinja') | 226 template = template_env.get_template('code_generator.jinja') |
| 184 | 227 |
| 185 return template.render( | 228 return template.render( |
| 186 start_node_number = 0, | 229 start_node_number = 0, |
| 187 debug_print = self.__debug_print, | 230 debug_print = self.__debug_print, |
| 188 default_action = default_action, | 231 default_action = default_action, |
| 189 dfa_states = dfa_states) | 232 dfa_states = dfa_states) |
| OLD | NEW |