tools/lexer_generator/code_generator.py - Issue 144603003: Experimental parser: better eos handling

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 144603003: Experimental parser: better eos handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 dfa = rule_processor.default_automata().dfa()	47 dfa = rule_processor.default_automata().dfa()

48 self.__dfa = dfa	48 self.__dfa = dfa

49 self.__default_action = rule_processor.default_action	49 self.__default_action = rule_processor.default_action

50 self.__debug_print = debug_print	50 self.__debug_print = debug_print

51 self.__start_node_number = self.__dfa.start_state().node_number()	51 self.__start_node_number = self.__dfa.start_state().node_number()

52 self.__log = log	52 self.__log = log

53 self.__inline = inline	53 self.__inline = inline

54 self.__switching = switching	54 self.__switching = switching

55 self.__jump_table = []	55 self.__jump_table = []

56	56

57 __jump_labels = ['state_entry', 'after_entry_code']	57 __jump_labels = ['state_entry', 'after_entry_code', 'before_match']

58	58

59 @staticmethod	59 @staticmethod

60 def __transform_state(encoding, state):	60 def __transform_state(encoding, state):

61 # action data	61 # action data

62 action = state.action()	62 action = state.action()

63 entry_action = None if not action else action.entry_action()	63 entry_action = None if not action else action.entry_action()

64 match_action = None if not action else action.match_action()	64 match_action = None if not action else action.match_action()

65 # generate ordered transitions	65 # generate ordered transitions

66 transitions = map(lambda (k, v) : (k, v.node_number()),	66 transitions = map(lambda (k, v) : (k, v.node_number()),

67 state.transitions().items())	67 state.transitions().items())

(...skipping 17 matching lines...) Expand all Loading...
85 ranges += 1	85 ranges += 1

86 keys.append((t, r))	86 keys.append((t, r))

87 elif t == 'UNIQUE':	87 elif t == 'UNIQUE':

88 assert r == 'no_match' or r == 'eos'	88 assert r == 'no_match' or r == 'eos'

89 assert r not in unique_transitions	89 assert r not in unique_transitions

90 unique_transitions[r] = transition_id	90 unique_transitions[r] = transition_id

91 else:	91 else:

92 raise Exception()	92 raise Exception()

93 if keys:	93 if keys:

94 transitions.append((keys, transition_id))	94 transitions.append((keys, transition_id))

95 # eos_transitions is for a followup cl

96 return {	95 return {

97 'node_number' : None,	96 'node_number' : None,

98 'original_node_number' : state.node_number(),	97 'original_node_number' : state.node_number(),

99 'transitions' : transitions,	98 'transitions' : transitions,

100 # flags for code generator	99 # flags for code generator

101 'can_elide_read' : len(transitions) == 0,	100 'can_elide_read' : len(transitions) == 0,

102 'is_eos_handler' : False,	101 'is_eos_handler' : False,

103 'inline' : None,	102 'inline' : None,

104 'must_not_inline' : False,	103 'must_not_inline' : False,

105 # transitions for code generator	104 # transitions for code generator

(...skipping 16 matching lines...) Expand all Loading...
122 def __register_jump(self, node_id, label):	121 def __register_jump(self, node_id, label):

123 if label != 'inline':	122 if label != 'inline':

124 assert label in CodeGenerator.__jump_labels	123 assert label in CodeGenerator.__jump_labels

125 state = self.__dfa_states[node_id]['entry_points'][label] = True	124 state = self.__dfa_states[node_id]['entry_points'][label] = True

126 self.__jump_table.append((node_id, label))	125 self.__jump_table.append((node_id, label))

127 return len(self.__jump_table) - 1	126 return len(self.__jump_table) - 1

128	127

129 def __set_inline(self, count, state):	128 def __set_inline(self, count, state):

130 assert state['inline'] == None	129 assert state['inline'] == None

131 inline = False	130 inline = False

132 # inline terminal states

133 if state['must_not_inline']:	131 if state['must_not_inline']:

134 inline = False	132 inline = False

	133 # inline terminal states

135 elif not state['transitions']:	134 elif not state['transitions']:

136 inline = True	135 inline = True

137 # inline next to terminal states with 1 or 2 transitions	136 # inline next to terminal states with 1 or 2 transitions

138 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:	137 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:

139 inline = True	138 inline = True

140 # ensure state terminates in 1 step	139 # ensure state terminates in 1 step

141 for key, state_id in state['transitions']:	140 for key, state_id in state['transitions']:

142 if self.__dfa_states[state_id]['transitions']:	141 if self.__dfa_states[state_id]['transitions']:

143 inline = False	142 inline = False

144 break	143 break

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
186	185

187 def __rewrite_deferred_transitions(self, state):	186 def __rewrite_deferred_transitions(self, state):

188 assert not state['long_char_transitions']	187 assert not state['long_char_transitions']

189 transitions = state['deferred_transitions']	188 transitions = state['deferred_transitions']

190 if not transitions:	189 if not transitions:

191 return	190 return

192 encoding = self.__dfa.encoding()	191 encoding = self.__dfa.encoding()

193 bom = 'byte_order_mark'	192 bom = 'byte_order_mark'

194 catch_all = 'non_primary_everything_else'	193 catch_all = 'non_primary_everything_else'

195 all_classes = set(encoding.class_name_iter())	194 all_classes = set(encoding.class_name_iter())

196 fast_classes = set(['eos', 'zero'])	195 fast_classes = set([])

197 call_classes = all_classes - fast_classes - set([bom, catch_all])	196 call_classes = all_classes - fast_classes - set([bom, catch_all])

198 def remap_transition(class_name):	197 def remap_transition(class_name):

199 if class_name in call_classes:	198 if class_name in call_classes:

200 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])	199 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])

201 if class_name == bom:	200 if class_name == bom:

202 return ('LONG_CHAR_CLASS', class_name)	201 return ('LONG_CHAR_CLASS', class_name)

203 raise Exception(class_name)	202 raise Exception(class_name)

204 fast_transitions = []	203 fast_transitions = []

205 long_class_transitions = []	204 long_class_transitions = []

206 long_class_map = {}	205 long_class_map = {}

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
251 current_node = id_map[current_node_number]	250 current_node = id_map[current_node_number]

252 if current_node['node_number'] != None:	251 if current_node['node_number'] != None:

253 return	252 return

254 current_node['node_number'] = len(dfa_states)	253 current_node['node_number'] = len(dfa_states)

255 dfa_states.append(current_node)	254 dfa_states.append(current_node)

256 for (key, node_number) in current_node['transitions']:	255 for (key, node_number) in current_node['transitions']:

257 CodeGenerator.__reorder(node_number, id_map, dfa_states)	256 CodeGenerator.__reorder(node_number, id_map, dfa_states)

258 for node_number in current_node['unique_transitions'].values():	257 for node_number in current_node['unique_transitions'].values():

259 CodeGenerator.__reorder(node_number, id_map, dfa_states)	258 CodeGenerator.__reorder(node_number, id_map, dfa_states)

260	259

	260 @staticmethod

	261 def __mark_eos_states(dfa_states, eos_states):

	262 for state_id in eos_states:

	263 state = dfa_states[state_id]

	264 state['is_eos_handler'] = True

	265 state['must_not_inline'] = True

	266 assert state['match_action']

	267 assert not state['entry_action']

	268 assert not state['transitions']

	269 assert not state['unique_transitions']

	270

261 def __build_dfa_states(self):	271 def __build_dfa_states(self):

262 dfa_states = []	272 dfa_states = []

263 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))	273 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))

264 encoding = self.__dfa.encoding()	274 encoding = self.__dfa.encoding()

265 f = lambda state : CodeGenerator.__transform_state(encoding, state)	275 f = lambda state : CodeGenerator.__transform_state(encoding, state)

266 dfa_states = map(f, dfa_states)	276 dfa_states = map(f, dfa_states)

267 id_map = {x['original_node_number'] : x for x in dfa_states}	277 id_map = {x['original_node_number'] : x for x in dfa_states}

268 dfa_states = []	278 dfa_states = []

269 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states)	279 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states)

	280 # store states

	281 eos_states = set([])

270 def f((key, original_node_number)):	282 def f((key, original_node_number)):

271 return (key, id_map[original_node_number]['node_number'])	283 return (key, id_map[original_node_number]['node_number'])

272 for state in dfa_states:	284 for state in dfa_states:

273 state['transitions'] = map(f, state['transitions'])	285 state['transitions'] = map(f, state['transitions'])

	286 state['unique_transitions'] = {k : id_map[v]['node_number']

	287 for k, v in state['unique_transitions'].items()}

	288 if 'eos' in state['unique_transitions']:

	289 eos_states.add(state['unique_transitions']['eos'])

274 assert id_map[self.__start_node_number]['node_number'] == 0	290 assert id_map[self.__start_node_number]['node_number'] == 0

275 assert len(dfa_states) == self.__dfa.node_count()	291 assert len(dfa_states) == self.__dfa.node_count()

276 # store states	292 # mark eos states

	293 self.__mark_eos_states(dfa_states, eos_states)

277 self.__dfa_states = dfa_states	294 self.__dfa_states = dfa_states

278	295

279 def __rewrite_gotos(self):	296 def __rewrite_gotos(self):

280 goto_map = {}	297 goto_map = {}

281 states = self.__dfa_states	298 states = self.__dfa_states

282 for state in states:	299 for state in states:

283 if (state['match_action'] and	300 if (state['match_action'] and

284 state['match_action'][0] == 'do_stored_token'):	301 state['match_action'][0] == 'do_stored_token'):

285 assert not state['match_action'][1] in goto_map	302 assert not state['match_action'][1] in goto_map

286 goto_map[state['match_action'][1]] = state['node_number']	303 goto_map[state['match_action'][1]] = state['node_number']

(...skipping 17 matching lines...) Expand all Loading...
304 label = 'state_entry'	321 label = 'state_entry'

305 jump_label = self.__register_jump(target_id, label)	322 jump_label = self.__register_jump(target_id, label)

306 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))	323 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))

307	324

308 def __inlined_state(self, target_id):	325 def __inlined_state(self, target_id):

309 state = deepcopy(self.__dfa_states[target_id])	326 state = deepcopy(self.__dfa_states[target_id])

310 state['node_number'] = len(self.__dfa_states)	327 state['node_number'] = len(self.__dfa_states)

311 self.__dfa_states.append(state)	328 self.__dfa_states.append(state)

312 # mark inline as none, to be correctly handled below	329 # mark inline as none, to be correctly handled below

313 state['inline'] = None	330 state['inline'] = None

314 # clear transitions, they shouldn't be used anymore

315 state['transitions'] = []

316 # clear entry points	331 # clear entry points

317 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}	332 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}

318 return state['node_number']	333 return state['node_number']

319	334

320 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in):	335 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in):

321 # order here should match the order of code generation	336 # order here should match the order of code generation

322 transition_names = [	337 transition_names = [

323 'switch_transitions',	338 'switch_transitions',

324 'if_transitions',	339 'if_transitions',

325 'long_char_transitions',	340 'long_char_transitions',

(...skipping 17 matching lines...) Expand all Loading...
343 # generate at most one inline state for all transitions	358 # generate at most one inline state for all transitions

344 if not target_id in inline_mapping:	359 if not target_id in inline_mapping:

345 inline_mapping[target_id] = self.__inlined_state(target_id)	360 inline_mapping[target_id] = self.__inlined_state(target_id)

346 jump_type = 'inline'	361 jump_type = 'inline'

347 target_id = inline_mapping[target_id]	362 target_id = inline_mapping[target_id]

348 else:	363 else:

349 assert not target_id in inline_mapping	364 assert not target_id in inline_mapping

350 return (key, self.__register_jump(target_id, jump_type))	365 return (key, self.__register_jump(target_id, jump_type))

351 for name in transition_names:	366 for name in transition_names:

352 state[name] = map(generate_jump, state[name])	367 state[name] = map(generate_jump, state[name])

	368 if 'eos' in state['unique_transitions']:

	369 # eos state is not inlined

	370 eos_state_id = state['unique_transitions']['eos']

	371 jump = self.__register_jump(eos_state_id, 'state_entry')

	372 state['unique_transitions']['eos'] = jump

	373 elif state['transitions']:

	374 jump = self.__register_jump(state_id, 'before_match')

	375 state['jump_before_match'] = jump

353 # now rewrite all nodes created	376 # now rewrite all nodes created

354 nodes_created = len(inline_mapping) - len(inline_mapping_in)	377 nodes_created = len(inline_mapping) - len(inline_mapping_in)

355 assert len(self.__dfa_states) == (	378 assert len(self.__dfa_states) == (

356 end_offset + total_nodes_created + nodes_created)	379 end_offset + total_nodes_created + nodes_created)

357 if nodes_created == 0:	380 if nodes_created == 0:

358 continue	381 continue

359 created = self.__rewrite_transitions_to_jumps(	382 created = self.__rewrite_transitions_to_jumps(

360 end_offset + total_nodes_created, nodes_created, inline_mapping)	383 end_offset + total_nodes_created, nodes_created, inline_mapping)

361 total_nodes_created += nodes_created + created	384 total_nodes_created += nodes_created + created

362 return total_nodes_created	385 return total_nodes_created

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
402	425

403 return template.render(	426 return template.render(

404 start_node_number = 0,	427 start_node_number = 0,

405 debug_print = self.__debug_print,	428 debug_print = self.__debug_print,

406 default_action = default_action,	429 default_action = default_action,

407 dfa_states = dfa_states,	430 dfa_states = dfa_states,

408 jump_table = self.__jump_table,	431 jump_table = self.__jump_table,

409 encoding = encoding.name(),	432 encoding = encoding.name(),

410 char_type = char_type,	433 char_type = char_type,

411 upper_bound = encoding.primary_range()[1])	434 upper_bound = encoding.primary_range()[1])

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/code_generator_test.py » ('j') | no next file with comments »