| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 122 distinct_keys += r[1] - r[0] + 1 | 122 distinct_keys += r[1] - r[0] + 1 |
| 123 ranges += 1 | 123 ranges += 1 |
| 124 else: | 124 else: |
| 125 raise Exception() | 125 raise Exception() |
| 126 return { | 126 return { |
| 127 'node_number' : state.node_number(), | 127 'node_number' : state.node_number(), |
| 128 'original_node_number' : state.node_number(), | 128 'original_node_number' : state.node_number(), |
| 129 'transitions' : transitions, | 129 'transitions' : transitions, |
| 130 'switch_transitions' : [], | 130 'switch_transitions' : [], |
| 131 'deferred_transitions' : [], | 131 'deferred_transitions' : [], |
| 132 'long_char_transitions' : [], |
| 132 'disjoint_keys' : disjoint_keys, | 133 'disjoint_keys' : disjoint_keys, |
| 133 'inline' : None, | 134 'inline' : None, |
| 134 'depth' : None, | 135 'depth' : None, |
| 135 'action' : action, | 136 'action' : action, |
| 136 'entry_action' : entry_action, | 137 'entry_action' : entry_action, |
| 137 'match_action' : match_action, | 138 'match_action' : match_action, |
| 138 'class_keys' : class_keys, | 139 'class_keys' : class_keys, |
| 139 'distinct_keys' : distinct_keys, | 140 'distinct_keys' : distinct_keys, |
| 140 'ranges' : ranges | 141 'ranges' : ranges |
| 141 } | 142 } |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 if_transitions.append((i, node_id)) | 192 if_transitions.append((i, node_id)) |
| 192 if s: | 193 if s: |
| 193 switch_transitions.append((s, node_id)) | 194 switch_transitions.append((s, node_id)) |
| 194 if d: | 195 if d: |
| 195 deferred_transitions.append((d, node_id)) | 196 deferred_transitions.append((d, node_id)) |
| 196 state['transitions'] = if_transitions | 197 state['transitions'] = if_transitions |
| 197 state['switch_transitions'] = switch_transitions | 198 state['switch_transitions'] = switch_transitions |
| 198 state['deferred_transitions'] = deferred_transitions | 199 state['deferred_transitions'] = deferred_transitions |
| 199 return split_count + (0 if no_switch else 1) | 200 return split_count + (0 if no_switch else 1) |
| 200 | 201 |
| 202 __call_map = { |
| 203 'non_primary_whitespace' : 'IsWhiteSpace', |
| 204 'non_primary_letter' : 'IsLetter', |
| 205 'non_primary_identifier_part_not_letter' : 'IsIdentifierPartNotLetter', |
| 206 'non_primary_line_terminator' : 'IsLineTerminator', |
| 207 } |
| 208 |
| 209 def __rewrite_deferred_transitions(self, state): |
| 210 assert not state['long_char_transitions'] |
| 211 transitions = state['deferred_transitions'] |
| 212 if not transitions: |
| 213 return |
| 214 encoding = self.__dfa.encoding() |
| 215 bom = 'byte_order_mark' |
| 216 catch_all = 'non_primary_everything_else' |
| 217 all_classes = set(encoding.class_name_iter()) |
| 218 fast_classes = set(['eos', 'zero']) |
| 219 call_classes = all_classes - fast_classes - set([bom, catch_all]) |
| 220 def remap_transition(class_name): |
| 221 if class_name in call_classes: |
| 222 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name]) |
| 223 if class_name == bom: |
| 224 return ('LONG_CHAR_CLASS', class_name) |
| 225 raise Exception(class_name) |
| 226 fast_transitions = [] |
| 227 long_class_transitions = [] |
| 228 long_class_map = {} |
| 229 catchall_transition = None |
| 230 # loop through and remove catch_all_transitions |
| 231 for (classes, transition_node_id) in transitions: |
| 232 ft = [] |
| 233 lct = [] |
| 234 has_catch_all = False |
| 235 for (class_type, class_name) in classes: |
| 236 if class_name in fast_classes: |
| 237 ft.append((class_type, class_name)) |
| 238 else: |
| 239 assert not class_name in long_class_map |
| 240 long_class_map[class_name] = transition_node_id |
| 241 if class_name == catch_all: |
| 242 assert not has_catch_all |
| 243 assert catchall_transition == None |
| 244 has_catch_all = True |
| 245 else: |
| 246 lct.append(remap_transition(class_name)) |
| 247 if ft: |
| 248 fast_transitions.append((ft, transition_node_id)) |
| 249 if has_catch_all: |
| 250 catchall_transition = (lct, transition_node_id) |
| 251 elif lct: |
| 252 long_class_transitions.append((lct, transition_node_id)) |
| 253 # all transitions are fast |
| 254 if not long_class_map: |
| 255 return |
| 256 if catchall_transition: |
| 257 catchall_transitions = all_classes - fast_classes |
| 258 for class_name in long_class_map.iterkeys(): |
| 259 catchall_transitions.remove(class_name) |
| 260 assert not catchall_transitions, "class inversion not unimplemented" |
| 261 # split deferred transitions |
| 262 state['deferred_transitions'] = fast_transitions |
| 263 if catchall_transition: |
| 264 catchall_transition = [ |
| 265 ([('LONG_CHAR_CLASS', 'catch_all')], catchall_transition[1])] |
| 266 else: |
| 267 catchall_transition = [] |
| 268 state['long_char_transitions'] = (long_class_transitions + |
| 269 catchall_transition) # must be last |
| 270 |
| 201 def __canonicalize_traversal(self): | 271 def __canonicalize_traversal(self): |
| 202 dfa_states = [] | 272 dfa_states = [] |
| 203 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) | 273 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) |
| 204 encoding = self.__dfa.encoding() | 274 encoding = self.__dfa.encoding() |
| 205 f = lambda state : CodeGenerator.__transform_state(encoding, state) | 275 f = lambda state : CodeGenerator.__transform_state(encoding, state) |
| 206 dfa_states = map(f, dfa_states) | 276 dfa_states = map(f, dfa_states) |
| 207 id_map = {x['original_node_number'] : x for x in dfa_states} | 277 id_map = {x['original_node_number'] : x for x in dfa_states} |
| 208 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map) | 278 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map) |
| 209 dfa_states = sorted(dfa_states, cmp=self.__state_cmp) | 279 dfa_states = sorted(dfa_states, cmp=self.__state_cmp) |
| 210 # remap all node numbers | 280 # remap all node numbers |
| (...skipping 11 matching lines...) Expand all Loading... |
| 222 if self.__inline: | 292 if self.__inline: |
| 223 inlined = reduce(self.__set_inline, dfa_states, 0) | 293 inlined = reduce(self.__set_inline, dfa_states, 0) |
| 224 if self.__log: | 294 if self.__log: |
| 225 print "%s states inlined" % inlined | 295 print "%s states inlined" % inlined |
| 226 elif self.__log: | 296 elif self.__log: |
| 227 print "no inlining" | 297 print "no inlining" |
| 228 # split transitions | 298 # split transitions |
| 229 switched = reduce(self.__split_transitions, dfa_states, 0) | 299 switched = reduce(self.__split_transitions, dfa_states, 0) |
| 230 if self.__log: | 300 if self.__log: |
| 231 print "%s states use switch (instead of if)" % switched | 301 print "%s states use switch (instead of if)" % switched |
| 302 # rewrite deferred transitions |
| 303 for state in dfa_states: |
| 304 self.__rewrite_deferred_transitions(state) |
| 232 | 305 |
| 233 def process(self): | 306 def process(self): |
| 234 | 307 |
| 235 self.__canonicalize_traversal() | 308 self.__canonicalize_traversal() |
| 236 | 309 |
| 237 dfa_states = self.__dfa_states | 310 dfa_states = self.__dfa_states |
| 238 | 311 |
| 239 default_action = self.__default_action | 312 default_action = self.__default_action |
| 240 assert(default_action and default_action.match_action()) | 313 assert(default_action and default_action.match_action()) |
| 241 default_action = default_action.match_action() | 314 default_action = default_action.match_action() |
| 242 | 315 |
| 243 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 316 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 244 template_env = jinja2.Environment( | 317 template_env = jinja2.Environment( |
| 245 loader = jinja2.PackageLoader('lexer_generator', '.'), | 318 loader = jinja2.PackageLoader('lexer_generator', '.'), |
| 246 undefined = jinja2.StrictUndefined) | 319 undefined = jinja2.StrictUndefined) |
| 247 template = template_env.get_template('code_generator.jinja') | 320 template = template_env.get_template('code_generator.jinja') |
| 248 | 321 |
| 249 encoding = self.__dfa.encoding() | 322 encoding = self.__dfa.encoding() |
| 250 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} | 323 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} |
| 251 char_type = char_types[encoding.name()] | 324 char_type = char_types[encoding.name()] |
| 252 | 325 |
| 253 return template.render( | 326 return template.render( |
| 254 start_node_number = 0, | 327 start_node_number = 0, |
| 255 debug_print = self.__debug_print, | 328 debug_print = self.__debug_print, |
| 256 default_action = default_action, | 329 default_action = default_action, |
| 257 dfa_states = dfa_states, | 330 dfa_states = dfa_states, |
| 258 encoding = encoding.name(), | 331 encoding = encoding.name(), |
| 259 char_type = char_type, | 332 char_type = char_type, |
| 260 upper_bound = encoding.primary_range()[1]) | 333 upper_bound = encoding.primary_range()[1]) |
| OLD | NEW |