| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import os | 28 import os |
| 29 import sys | 29 import sys |
| 30 import jinja2 | 30 import jinja2 |
| 31 from copy import deepcopy |
| 31 from dfa import Dfa | 32 from dfa import Dfa |
| 32 from transition_keys import TransitionKey | 33 from transition_keys import TransitionKey |
| 33 | 34 |
| 34 class CodeGenerator: | 35 class CodeGenerator: |
| 35 | 36 |
| 36 def __init__(self, | 37 def __init__(self, |
| 37 rule_processor, | 38 rule_processor, |
| 38 minimize_default = True, | 39 minimize_default = True, |
| 39 inline = True, | 40 inline = True, |
| 40 switching = True, | 41 switching = True, |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 93 transitions.append((keys, transition_id)) | 94 transitions.append((keys, transition_id)) |
| 94 # eos_transitions is for a followup cl | 95 # eos_transitions is for a followup cl |
| 95 return { | 96 return { |
| 96 'node_number' : None, | 97 'node_number' : None, |
| 97 'original_node_number' : state.node_number(), | 98 'original_node_number' : state.node_number(), |
| 98 'transitions' : transitions, | 99 'transitions' : transitions, |
| 99 # flags for code generator | 100 # flags for code generator |
| 100 'can_elide_read' : len(transitions) == 0, | 101 'can_elide_read' : len(transitions) == 0, |
| 101 'is_eos_handler' : False, | 102 'is_eos_handler' : False, |
| 102 'inline' : None, | 103 'inline' : None, |
| 104 'must_not_inline' : False, |
| 103 # transitions for code generator | 105 # transitions for code generator |
| 104 'if_transitions' : [], | 106 'if_transitions' : [], |
| 105 'switch_transitions' : [], | 107 'switch_transitions' : [], |
| 106 'deferred_transitions' : [], | 108 'deferred_transitions' : [], |
| 107 'long_char_transitions' : [], | 109 'long_char_transitions' : [], |
| 108 'unique_transitions' : unique_transitions, | 110 'unique_transitions' : unique_transitions, |
| 109 # state actions | 111 # state actions |
| 110 'entry_action' : entry_action, | 112 'entry_action' : entry_action, |
| 111 'match_action' : match_action, | 113 'match_action' : match_action, |
| 112 # statistics for states | 114 # statistics for states |
| 113 'class_keys' : class_keys, | 115 'class_keys' : class_keys, |
| 114 'distinct_keys' : distinct_keys, | 116 'distinct_keys' : distinct_keys, |
| 115 'ranges' : ranges, | 117 'ranges' : ranges, |
| 116 # record of which entry points will be needed | 118 # record of which entry points will be needed |
| 117 'entry_points' : {k : False for k in CodeGenerator.__jump_labels} | 119 'entry_points' : {k : False for k in CodeGenerator.__jump_labels} |
| 118 } | 120 } |
| 119 | 121 |
| 120 def __register_jump(self, node_id, label): | 122 def __register_jump(self, node_id, label): |
| 121 assert label in CodeGenerator.__jump_labels | 123 if label != 'inline': |
| 122 state = self.__dfa_states[node_id]['entry_points'][label] = True | 124 assert label in CodeGenerator.__jump_labels |
| 125 state = self.__dfa_states[node_id]['entry_points'][label] = True |
| 123 self.__jump_table.append((node_id, label)) | 126 self.__jump_table.append((node_id, label)) |
| 124 return len(self.__jump_table) - 1 | 127 return len(self.__jump_table) - 1 |
| 125 | 128 |
| 126 def __set_inline(self, count, state): | 129 def __set_inline(self, count, state): |
| 127 assert state['inline'] == None | 130 assert state['inline'] == None |
| 128 inline = False | 131 inline = False |
| 129 # inline terminal states | 132 # inline terminal states |
| 130 if not state['transitions']: | 133 if state['must_not_inline']: |
| 134 inline = False |
| 135 elif not state['transitions']: |
| 131 inline = True | 136 inline = True |
| 132 # inline next to terminal states with 1 or 2 transitions | 137 # inline next to terminal states with 1 or 2 transitions |
| 133 elif state['distinct_keys'] < 3 and state['class_keys'] == 0: | 138 elif state['distinct_keys'] < 3 and state['class_keys'] == 0: |
| 134 inline = True | 139 inline = True |
| 135 # ensure state terminates in 1 step | 140 # ensure state terminates in 1 step |
| 136 for key, state_id in state['transitions']: | 141 for key, state_id in state['transitions']: |
| 137 if self.__dfa_states[state_id]['transitions']: | 142 if self.__dfa_states[state_id]['transitions']: |
| 138 inline = False | 143 inline = False |
| 139 break | 144 break |
| 140 state['inline'] = inline | 145 state['inline'] = inline |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 283 'store_harmony_token_and_goto', | 288 'store_harmony_token_and_goto', |
| 284 'store_token_and_goto', | 289 'store_token_and_goto', |
| 285 'goto_start']) | 290 'goto_start']) |
| 286 for state in states: | 291 for state in states: |
| 287 if not state['match_action']: | 292 if not state['match_action']: |
| 288 continue | 293 continue |
| 289 action = state['match_action'][0] | 294 action = state['match_action'][0] |
| 290 if action in mapped_actions: | 295 if action in mapped_actions: |
| 291 value = state['match_action'][1] | 296 value = state['match_action'][1] |
| 292 target_id = goto_map[value[-1]] | 297 target_id = goto_map[value[-1]] |
| 298 states[target_id]['must_not_inline'] = True |
| 293 if action != 'goto_start': | 299 if action != 'goto_start': |
| 294 state['can_elide_read'] = False | 300 state['can_elide_read'] = False |
| 295 label = 'after_entry_code' | 301 label = 'after_entry_code' |
| 296 else: | 302 else: |
| 297 states[target_id]['can_elide_read'] = False | 303 states[target_id]['can_elide_read'] = False |
| 298 label = 'state_entry' | 304 label = 'state_entry' |
| 299 jump_label = self.__register_jump(target_id, label) | 305 jump_label = self.__register_jump(target_id, label) |
| 300 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label])) | 306 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label])) |
| 301 | 307 |
| 302 def __rewrite_transitions_to_jumps(self): | 308 def __inlined_state(self, target_id): |
| 309 state = deepcopy(self.__dfa_states[target_id]) |
| 310 state['node_number'] = len(self.__dfa_states) |
| 311 self.__dfa_states.append(state) |
| 312 # mark inline as none, to be correctly handled below |
| 313 state['inline'] = None |
| 314 # clear transitions, they shouldn't be used anymore |
| 315 state['transitions'] = [] |
| 316 # clear entry points |
| 317 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels} |
| 318 return state['node_number'] |
| 319 |
| 320 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in): |
| 321 # order here should match the order of code generation |
| 303 transition_names = [ | 322 transition_names = [ |
| 323 'switch_transitions', |
| 304 'if_transitions', | 324 'if_transitions', |
| 305 'switch_transitions', | 325 'long_char_transitions', |
| 306 'deferred_transitions', | 326 'deferred_transitions'] |
| 307 'long_char_transitions'] | 327 end_offset = start_id + count |
| 308 def f((key, target_id)): | 328 assert len(self.__dfa_states) == end_offset |
| 309 return (key, self.__register_jump(target_id, 'state_entry')) | 329 total_nodes_created = 0 |
| 310 for state in self.__dfa_states: | 330 for state_id in range(start_id, end_offset): |
| 331 state = self.__dfa_states[state_id] |
| 332 # this will be ignored during code generation, |
| 333 # and it's needed as a template, so don't rewrite |
| 334 if state['inline'] == True: |
| 335 continue |
| 336 # these is a new inline state, now mark as inline and rewrite |
| 337 if state['inline'] == None: |
| 338 state['inline'] = True |
| 339 inline_mapping = inline_mapping_in.copy() |
| 340 def generate_jump((key, target_id)): |
| 341 jump_type = 'state_entry' |
| 342 if self.__dfa_states[target_id]['inline']: |
| 343 # generate at most one inline state for all transitions |
| 344 if not target_id in inline_mapping: |
| 345 inline_mapping[target_id] = self.__inlined_state(target_id) |
| 346 jump_type = 'inline' |
| 347 target_id = inline_mapping[target_id] |
| 348 else: |
| 349 assert not target_id in inline_mapping |
| 350 return (key, self.__register_jump(target_id, jump_type)) |
| 311 for name in transition_names: | 351 for name in transition_names: |
| 312 state[name] = map(f, state[name]) | 352 state[name] = map(generate_jump, state[name]) |
| 313 | 353 # now rewrite all nodes created |
| 314 def __mark_entry_points(self): | 354 nodes_created = len(inline_mapping) - len(inline_mapping_in) |
| 315 # mark the entry point in case there are implicit jumps to it | 355 assert len(self.__dfa_states) == ( |
| 316 self.__dfa_states[0]['entry_points']['state_entry'] = True | 356 end_offset + total_nodes_created + nodes_created) |
| 317 # inlined states can write no labels | 357 if nodes_created == 0: |
| 318 for state in self.__dfa_states: | 358 continue |
| 319 entry_points = state['entry_points'] | 359 created = self.__rewrite_transitions_to_jumps( |
| 320 if state['inline']: | 360 end_offset + total_nodes_created, nodes_created, inline_mapping) |
| 321 for k in entry_points.keys(): | 361 total_nodes_created += nodes_created + created |
| 322 entry_points[k] = False | 362 return total_nodes_created |
| 323 | 363 |
| 324 def process(self): | 364 def process(self): |
| 325 | 365 |
| 326 self.__build_dfa_states() | 366 self.__build_dfa_states() |
| 327 dfa_states = self.__dfa_states | 367 dfa_states = self.__dfa_states |
| 328 # split transitions | 368 # split transitions |
| 329 switched = reduce(self.__split_transitions, dfa_states, 0) | 369 switched = reduce(self.__split_transitions, dfa_states, 0) |
| 330 if self.__log: | 370 if self.__log: |
| 331 print "%s states use switch (instead of if)" % switched | 371 print "%s states use switch (instead of if)" % switched |
| 332 # rewrite deferred transitions | 372 # rewrite deferred transitions |
| 333 for state in dfa_states: | 373 for state in dfa_states: |
| 334 self.__rewrite_deferred_transitions(state) | 374 self.__rewrite_deferred_transitions(state) |
| 335 # rewrite gotos | 375 # rewrite gotos |
| 336 self.__rewrite_gotos() | 376 self.__rewrite_gotos() |
| 337 # rewrite transitions to use jumps | |
| 338 self.__rewrite_transitions_to_jumps() | |
| 339 # set nodes to inline | 377 # set nodes to inline |
| 340 if self.__inline: | 378 if self.__inline: |
| 341 inlined = reduce(self.__set_inline, dfa_states, 0) | 379 inlined = reduce(self.__set_inline, dfa_states, 0) |
| 342 if self.__log: | 380 if self.__log: |
| 343 print "%s states inlined" % inlined | 381 print "%s states inlined" % inlined |
| 344 elif self.__log: | 382 # rewrite transitions to use jumps |
| 345 print "no inlining" | 383 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {}) |
| 346 # mark all the entry points that will be used | 384 if self.__log: |
| 347 self.__mark_entry_points() | 385 print "%s inlined nodes created" % inlined_nodes |
| 386 # mark the entry point in case there are implicit jumps to it |
| 387 self.__dfa_states[0]['entry_points']['state_entry'] = True |
| 348 | 388 |
| 349 default_action = self.__default_action | 389 default_action = self.__default_action |
| 350 assert(default_action and default_action.match_action()) | 390 assert(default_action and default_action.match_action()) |
| 351 default_action = default_action.match_action() | 391 default_action = default_action.match_action() |
| 352 | 392 |
| 353 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 393 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 354 template_env = jinja2.Environment( | 394 template_env = jinja2.Environment( |
| 355 loader = jinja2.PackageLoader('lexer_generator', '.'), | 395 loader = jinja2.PackageLoader('lexer_generator', '.'), |
| 356 undefined = jinja2.StrictUndefined) | 396 undefined = jinja2.StrictUndefined) |
| 357 template = template_env.get_template('code_generator.jinja') | 397 template = template_env.get_template('code_generator.jinja') |
| 358 | 398 |
| 359 encoding = self.__dfa.encoding() | 399 encoding = self.__dfa.encoding() |
| 360 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} | 400 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} |
| 361 char_type = char_types[encoding.name()] | 401 char_type = char_types[encoding.name()] |
| 362 | 402 |
| 363 return template.render( | 403 return template.render( |
| 364 start_node_number = 0, | 404 start_node_number = 0, |
| 365 debug_print = self.__debug_print, | 405 debug_print = self.__debug_print, |
| 366 default_action = default_action, | 406 default_action = default_action, |
| 367 dfa_states = dfa_states, | 407 dfa_states = dfa_states, |
| 368 jump_table = self.__jump_table, | 408 jump_table = self.__jump_table, |
| 369 encoding = encoding.name(), | 409 encoding = encoding.name(), |
| 370 char_type = char_type, | 410 char_type = char_type, |
| 371 upper_bound = encoding.primary_range()[1]) | 411 upper_bound = encoding.primary_range()[1]) |
| OLD | NEW |