tools/lexer_generator/code_generator.py - Issue 145583002: Experimental parser: perform inlining in python

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 145583002: Experimental parser: perform inlining in python (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 10 matching lines...) Expand all Loading...
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import os	28 import os

29 import sys	29 import sys

30 import jinja2	30 import jinja2

	31 from copy import deepcopy

31 from dfa import Dfa	32 from dfa import Dfa

32 from transition_keys import TransitionKey	33 from transition_keys import TransitionKey

33	34

34 class CodeGenerator:	35 class CodeGenerator:

35	36

36 def __init__(self,	37 def __init__(self,

37 rule_processor,	38 rule_processor,

38 minimize_default = True,	39 minimize_default = True,

39 inline = True,	40 inline = True,

40 switching = True,	41 switching = True,

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 transitions.append((keys, transition_id))	94 transitions.append((keys, transition_id))

94 # eos_transitions is for a followup cl	95 # eos_transitions is for a followup cl

95 return {	96 return {

96 'node_number' : None,	97 'node_number' : None,

97 'original_node_number' : state.node_number(),	98 'original_node_number' : state.node_number(),

98 'transitions' : transitions,	99 'transitions' : transitions,

99 # flags for code generator	100 # flags for code generator

100 'can_elide_read' : len(transitions) == 0,	101 'can_elide_read' : len(transitions) == 0,

101 'is_eos_handler' : False,	102 'is_eos_handler' : False,

102 'inline' : None,	103 'inline' : None,

	104 'must_not_inline' : False,

103 # transitions for code generator	105 # transitions for code generator

104 'if_transitions' : [],	106 'if_transitions' : [],

105 'switch_transitions' : [],	107 'switch_transitions' : [],

106 'deferred_transitions' : [],	108 'deferred_transitions' : [],

107 'long_char_transitions' : [],	109 'long_char_transitions' : [],

108 'unique_transitions' : unique_transitions,	110 'unique_transitions' : unique_transitions,

109 # state actions	111 # state actions

110 'entry_action' : entry_action,	112 'entry_action' : entry_action,

111 'match_action' : match_action,	113 'match_action' : match_action,

112 # statistics for states	114 # statistics for states

113 'class_keys' : class_keys,	115 'class_keys' : class_keys,

114 'distinct_keys' : distinct_keys,	116 'distinct_keys' : distinct_keys,

115 'ranges' : ranges,	117 'ranges' : ranges,

116 # record of which entry points will be needed	118 # record of which entry points will be needed

117 'entry_points' : {k : False for k in CodeGenerator.__jump_labels}	119 'entry_points' : {k : False for k in CodeGenerator.__jump_labels}

118 }	120 }

119	121

120 def __register_jump(self, node_id, label):	122 def __register_jump(self, node_id, label):

121 assert label in CodeGenerator.__jump_labels	123 if label != 'inline':

122 state = self.__dfa_states[node_id]['entry_points'][label] = True	124 assert label in CodeGenerator.__jump_labels

	125 state = self.__dfa_states[node_id]['entry_points'][label] = True

123 self.__jump_table.append((node_id, label))	126 self.__jump_table.append((node_id, label))

124 return len(self.__jump_table) - 1	127 return len(self.__jump_table) - 1

125	128

126 def __set_inline(self, count, state):	129 def __set_inline(self, count, state):

127 assert state['inline'] == None	130 assert state['inline'] == None

128 inline = False	131 inline = False

129 # inline terminal states	132 # inline terminal states

130 if not state['transitions']:	133 if state['must_not_inline']:

	134 inline = False

	135 elif not state['transitions']:

131 inline = True	136 inline = True

132 # inline next to terminal states with 1 or 2 transitions	137 # inline next to terminal states with 1 or 2 transitions

133 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:	138 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:

134 inline = True	139 inline = True

135 # ensure state terminates in 1 step	140 # ensure state terminates in 1 step

136 for key, state_id in state['transitions']:	141 for key, state_id in state['transitions']:

137 if self.__dfa_states[state_id]['transitions']:	142 if self.__dfa_states[state_id]['transitions']:

138 inline = False	143 inline = False

139 break	144 break

140 state['inline'] = inline	145 state['inline'] = inline

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 'store_harmony_token_and_goto',	288 'store_harmony_token_and_goto',

284 'store_token_and_goto',	289 'store_token_and_goto',

285 'goto_start'])	290 'goto_start'])

286 for state in states:	291 for state in states:

287 if not state['match_action']:	292 if not state['match_action']:

288 continue	293 continue

289 action = state['match_action'][0]	294 action = state['match_action'][0]

290 if action in mapped_actions:	295 if action in mapped_actions:

291 value = state['match_action'][1]	296 value = state['match_action'][1]

292 target_id = goto_map[value[-1]]	297 target_id = goto_map[value[-1]]

	298 states[target_id]['must_not_inline'] = True

293 if action != 'goto_start':	299 if action != 'goto_start':

294 state['can_elide_read'] = False	300 state['can_elide_read'] = False

295 label = 'after_entry_code'	301 label = 'after_entry_code'

296 else:	302 else:

297 states[target_id]['can_elide_read'] = False	303 states[target_id]['can_elide_read'] = False

298 label = 'state_entry'	304 label = 'state_entry'

299 jump_label = self.__register_jump(target_id, label)	305 jump_label = self.__register_jump(target_id, label)

300 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))	306 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))

301	307

302 def __rewrite_transitions_to_jumps(self):	308 def __inlined_state(self, target_id):

	309 state = deepcopy(self.__dfa_states[target_id])

	310 state['node_number'] = len(self.__dfa_states)

	311 self.__dfa_states.append(state)

	312 # mark inline as none, to be correctly handled below

	313 state['inline'] = None

	314 # clear transitions, they shouldn't be used anymore

	315 state['transitions'] = []

	316 # clear entry points

	317 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}

	318 return state['node_number']

	319

	320 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in):

	321 # order here should match the order of code generation

303 transition_names = [	322 transition_names = [

	323 'switch_transitions',

304 'if_transitions',	324 'if_transitions',

305 'switch_transitions',	325 'long_char_transitions',

306 'deferred_transitions',	326 'deferred_transitions']

307 'long_char_transitions']	327 end_offset = start_id + count

308 def f((key, target_id)):	328 assert len(self.__dfa_states) == end_offset

309 return (key, self.__register_jump(target_id, 'state_entry'))	329 total_nodes_created = 0

310 for state in self.__dfa_states:	330 for state_id in range(start_id, end_offset):

	331 state = self.__dfa_states[state_id]

	332 # this will be ignored during code generation,

	333 # and it's needed as a template, so don't rewrite

	334 if state['inline'] == True:

	335 continue

	336 # these is a new inline state, now mark as inline and rewrite

	337 if state['inline'] == None:

	338 state['inline'] = True

	339 inline_mapping = inline_mapping_in.copy()

	340 def generate_jump((key, target_id)):

	341 jump_type = 'state_entry'

	342 if self.__dfa_states[target_id]['inline']:

	343 # generate at most one inline state for all transitions

	344 if not target_id in inline_mapping:

	345 inline_mapping[target_id] = self.__inlined_state(target_id)

	346 jump_type = 'inline'

	347 target_id = inline_mapping[target_id]

	348 else:

	349 assert not target_id in inline_mapping

	350 return (key, self.__register_jump(target_id, jump_type))

311 for name in transition_names:	351 for name in transition_names:

312 state[name] = map(f, state[name])	352 state[name] = map(generate_jump, state[name])

313	353 # now rewrite all nodes created

314 def __mark_entry_points(self):	354 nodes_created = len(inline_mapping) - len(inline_mapping_in)

315 # mark the entry point in case there are implicit jumps to it	355 assert len(self.__dfa_states) == (

316 self.__dfa_states[0]['entry_points']['state_entry'] = True	356 end_offset + total_nodes_created + nodes_created)

317 # inlined states can write no labels	357 if nodes_created == 0:

318 for state in self.__dfa_states:	358 continue

319 entry_points = state['entry_points']	359 created = self.__rewrite_transitions_to_jumps(

320 if state['inline']:	360 end_offset + total_nodes_created, nodes_created, inline_mapping)

321 for k in entry_points.keys():	361 total_nodes_created += nodes_created + created

322 entry_points[k] = False	362 return total_nodes_created

323	363

324 def process(self):	364 def process(self):

325	365

326 self.__build_dfa_states()	366 self.__build_dfa_states()

327 dfa_states = self.__dfa_states	367 dfa_states = self.__dfa_states

328 # split transitions	368 # split transitions

329 switched = reduce(self.__split_transitions, dfa_states, 0)	369 switched = reduce(self.__split_transitions, dfa_states, 0)

330 if self.__log:	370 if self.__log:

331 print "%s states use switch (instead of if)" % switched	371 print "%s states use switch (instead of if)" % switched

332 # rewrite deferred transitions	372 # rewrite deferred transitions

333 for state in dfa_states:	373 for state in dfa_states:

334 self.__rewrite_deferred_transitions(state)	374 self.__rewrite_deferred_transitions(state)

335 # rewrite gotos	375 # rewrite gotos

336 self.__rewrite_gotos()	376 self.__rewrite_gotos()

337 # rewrite transitions to use jumps

338 self.__rewrite_transitions_to_jumps()

339 # set nodes to inline	377 # set nodes to inline

340 if self.__inline:	378 if self.__inline:

341 inlined = reduce(self.__set_inline, dfa_states, 0)	379 inlined = reduce(self.__set_inline, dfa_states, 0)

342 if self.__log:	380 if self.__log:

343 print "%s states inlined" % inlined	381 print "%s states inlined" % inlined

344 elif self.__log:	382 # rewrite transitions to use jumps

345 print "no inlining"	383 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {})

346 # mark all the entry points that will be used	384 if self.__log:

347 self.__mark_entry_points()	385 print "%s inlined nodes created" % inlined_nodes

	386 # mark the entry point in case there are implicit jumps to it

	387 self.__dfa_states[0]['entry_points']['state_entry'] = True

348	388

349 default_action = self.__default_action	389 default_action = self.__default_action

350 assert(default_action and default_action.match_action())	390 assert(default_action and default_action.match_action())

351 default_action = default_action.match_action()	391 default_action = default_action.match_action()

352	392

353 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))	393 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

354 template_env = jinja2.Environment(	394 template_env = jinja2.Environment(

355 loader = jinja2.PackageLoader('lexer_generator', '.'),	395 loader = jinja2.PackageLoader('lexer_generator', '.'),

356 undefined = jinja2.StrictUndefined)	396 undefined = jinja2.StrictUndefined)

357 template = template_env.get_template('code_generator.jinja')	397 template = template_env.get_template('code_generator.jinja')

358	398

359 encoding = self.__dfa.encoding()	399 encoding = self.__dfa.encoding()

360 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}	400 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}

361 char_type = char_types[encoding.name()]	401 char_type = char_types[encoding.name()]

362	402

363 return template.render(	403 return template.render(

364 start_node_number = 0,	404 start_node_number = 0,

365 debug_print = self.__debug_print,	405 debug_print = self.__debug_print,

366 default_action = default_action,	406 default_action = default_action,

367 dfa_states = dfa_states,	407 dfa_states = dfa_states,

368 jump_table = self.__jump_table,	408 jump_table = self.__jump_table,

369 encoding = encoding.name(),	409 encoding = encoding.name(),

370 char_type = char_type,	410 char_type = char_type,

371 upper_bound = encoding.primary_range()[1])	411 upper_bound = encoding.primary_range()[1])

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »