tools/lexer_generator/code_generator.py - Issue 160443002: Experimental parser: remove match actions

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
42 switching = True,	42 switching = True,

43 debug_print = False,	43 debug_print = False,

44 log = False):	44 log = False):

45 if minimize_default:	45 if minimize_default:

46 dfa = rule_processor.default_automata().minimal_dfa()	46 dfa = rule_processor.default_automata().minimal_dfa()

47 else:	47 else:

48 dfa = rule_processor.default_automata().dfa()	48 dfa = rule_processor.default_automata().dfa()

49 self.__dfa = dfa	49 self.__dfa = dfa

50 self.__default_action = rule_processor.default_action()	50 self.__default_action = rule_processor.default_action()

51 self.__debug_print = debug_print	51 self.__debug_print = debug_print

52 self.__start_node_number = self.__dfa.start_state().node_number()

53 self.__log = log	52 self.__log = log

54 self.__inline = inline	53 self.__inline = inline

55 self.__switching = switching	54 self.__switching = switching

56 self.__jump_table = []	55 self.__jump_table = []

57	56

58 __jump_labels = ['state_entry', 'after_entry_code']	57 __jump_labels = ['state_entry', 'after_entry_code']

59	58

60 @staticmethod	59 @staticmethod

61 def __transform_state(encoding, state):	60 def __transform_state(encoding, state):

62 # action data	61 # action data

63 action = state.action()

64 entry_action = action.entry_action()

65 match_action = action.match_action()

66 # generate ordered transitions	62 # generate ordered transitions

67 transitions = map(lambda (k, v) : (k, v.node_number()),	63 transitions = map(lambda (k, v) : (k, v.node_number()),

68 state.key_state_iter())	64 state.key_state_iter())

69 transitions = sorted(	65 transitions = sorted(

70 transitions, cmp = TransitionKey.compare, key = lambda x : x[0])	66 transitions, cmp = TransitionKey.compare, key = lambda x : x[0])

71 # map transition keys to disjoint ranges and collect stats	67 # map transition keys to disjoint ranges and collect stats

72 disjoint_keys = []	68 disjoint_keys = []

73 unique_transitions = {}	69 unique_transitions = {}

74 old_transitions = transitions	70 old_transitions = transitions

75 transitions = []	71 transitions = []

76 (class_keys, distinct_keys, ranges) = (0, 0, 0)	72 (class_keys, distinct_keys, ranges) = (0, 0, 0)

77 zero_transition = None	73 zero_transition = None

	74 omega_transition = None

78 total_transitions = 0	75 total_transitions = 0

79 for key, transition_id in old_transitions:	76 for key, transition_id in old_transitions:

80 keys = []	77 keys = []

81 for (t, r) in key.range_iter(encoding):	78 for (t, r) in key.range_iter(encoding):

82 if t == 'CLASS':	79 if t == 'CLASS':

83 class_keys += 1	80 class_keys += 1

84 keys.append((t, r))	81 keys.append((t, r))

85 elif t == 'PRIMARY_RANGE':	82 elif t == 'PRIMARY_RANGE':

86 distinct_keys += r[1] - r[0] + 1	83 distinct_keys += r[1] - r[0] + 1

87 ranges += 1	84 ranges += 1

88 # split 0 out of range	85 # split 0 out of range

89 assert r[0] >= 0	86 assert r[0] >= 0

90 if r[0] == 0:	87 if r[0] == 0:

91 assert zero_transition == None	88 assert zero_transition == None

92 zero_transition = transition_id	89 zero_transition = transition_id

93 if r[0] == r[1]:	90 if r[0] == r[1]:

94 continue	91 continue

95 r = (r[0] + 1, r[1])	92 r = (r[0] + 1, r[1])

96 keys.append((t, r))	93 keys.append((t, r))

97 elif t == 'UNIQUE':	94 elif t == 'UNIQUE':

98 assert r == 'no_match' or r == 'eos'	95 assert r == 'eos'

99 assert r not in unique_transitions	96 assert r not in unique_transitions

100 unique_transitions[r] = transition_id	97 unique_transitions[r] = transition_id

101 if r != 'no_match':	98 if r != 'no_match':

102 total_transitions += 1	99 total_transitions += 1

	100 elif t == 'OMEGA':

	101 assert not omega_transition

	102 omega_transition = transition_id

	103 assert transition_id # TODO(dcarney): fix

	104 total_transitions += 1

103 else:	105 else:

104 raise Exception()	106 raise Exception()

105 if keys:	107 if keys:

106 transitions.append((keys, transition_id))	108 transitions.append((keys, transition_id))

107 # delay zero transition until last	109 # delay zero transition until after all encoded keys

108 if zero_transition != None:	110 if zero_transition != None:

109 transitions.append(([('PRIMARY_RANGE', (0, 0))], transition_id))	111 transitions.append(([('PRIMARY_RANGE', (0, 0))], transition_id))

110 ranges += 1	112 ranges += 1

111 total_transitions += len(transitions)	113 total_transitions += len(transitions)

112 return {	114 return {

113 'node_number' : None,	115 'node_number' : None,

114 'original_node_number' : state.node_number(),	116 'original_node_number' : state.node_number(),

115 'transitions' : transitions,	117 'transitions' : transitions,

116 # flags for code generator	118 # flags for code generator

117 'can_elide_read' : total_transitions == 0,	119 'elide_read' : (total_transitions == 0 or

	120 (total_transitions == 1 and omega_transition)),

118 'is_eos_handler' : False,	121 'is_eos_handler' : False,

119 'inline' : False,	122 'inline' : False,

120 'must_not_inline' : False,	123 'must_not_inline' : False,

121 # transitions for code generator	124 # transitions for code generator

122 'if_transitions' : [],	125 'if_transitions' : [],

123 'switch_transitions' : [],	126 'switch_transitions' : [],

124 'deferred_transitions' : [],	127 'deferred_transitions' : [],

125 'unique_transitions' : unique_transitions,	128 'unique_transitions' : unique_transitions,

	129 'omega_transition' : omega_transition,

126 # state actions	130 # state actions

127 'entry_action' : entry_action,	131 'action' : state.action(),

128 'match_action' : match_action,

129 # statistics for state	132 # statistics for state

130 'total_transitions' : total_transitions,	133 'total_transitions' : total_transitions,

131 'class_keys' : class_keys,	134 'class_keys' : class_keys,

132 'distinct_keys' : distinct_keys,	135 'distinct_keys' : distinct_keys,

133 'ranges' : ranges,	136 'ranges' : ranges,

134 # record of which entry points will be needed	137 # record of which entry points will be needed

135 'entry_points' : {k : False for k in CodeGenerator.__jump_labels}	138 'entry_points' : {k : False for k in CodeGenerator.__jump_labels}

136 }	139 }

137	140

138 def __register_jump(self, node_id, label):	141 def __register_jump(self, node_id, label):

139 if label != 'inline':	142 if label != 'inline':

140 assert label in CodeGenerator.__jump_labels	143 assert label in CodeGenerator.__jump_labels

141 state = self.__dfa_states[node_id]['entry_points'][label] = True	144 state = self.__dfa_states[node_id]['entry_points'][label] = True

142 self.__jump_table.append((node_id, label))	145 self.__jump_table.append((node_id, label))

143 return len(self.__jump_table) - 1	146 return len(self.__jump_table) - 1

144	147

	148 def __terminates_immediately(self, state_id):

	149 transition_count = self.__dfa_states[state_id]['total_transitions']

	150 if transition_count == 0:

	151 return True

	152 omega_transition_id = self.__dfa_states[state_id]['omega_transition']

	153 if transition_count == 1 and omega_transition_id:

	154 return self.__terminates_immediately(omega_transition_id)

	155 return False

	156

145 def __set_inline(self, count, state):	157 def __set_inline(self, count, state):

146 inline = False	158 inline = False

147 if state['must_not_inline']:	159 if state['must_not_inline']:

148 inline = False	160 inline = False

149 # inline terminal states	161 # inline terminal states

150 elif not state['total_transitions']:	162 elif self.__terminates_immediately(state['node_number']):

151 inline = True	163 inline = True

152 # inline next to terminal states with 1 or 2 transitions	164 # inline next to terminal states with 1 or 2 transitions

153 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:	165 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:

154 inline = True	166 inline = True

155 # ensure state terminates in 1 step	167 # ensure state terminates in 1 step, excluding omega transitions

156 for key, state_id in state['transitions']:	168 for key, state_id in state['transitions']:

157 if self.__dfa_states[state_id]['total_transitions']:	169 if not self.__terminates_immediately(state_id):

158 inline = False	170 inline = False

159 break	171 break

160 state['inline'] = inline	172 state['inline'] = inline

161 return count + 1 if inline else count	173 return count + 1 if inline else count

162	174

163 def __split_transitions(self, split_count, state):	175 def __split_transitions(self, split_count, state):

164 '''Goes through the transitions for 'state' and decides which of them should	176 '''Goes through the transitions for 'state' and decides which of them should

165 use the if statement and which should use the switch statement.'''	177 use the if statement and which should use the switch statement.'''

166 assert not state['switch_transitions']	178 assert not state['switch_transitions']

167 (distinct_keys, ranges) = (state['distinct_keys'], state['ranges'])	179 (distinct_keys, ranges) = (state['distinct_keys'], state['ranges'])

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
249 def __reorder(current_node_number, id_map, dfa_states):	261 def __reorder(current_node_number, id_map, dfa_states):

250 current_node = id_map[current_node_number]	262 current_node = id_map[current_node_number]

251 if current_node['node_number'] != None:	263 if current_node['node_number'] != None:

252 return	264 return

253 current_node['node_number'] = len(dfa_states)	265 current_node['node_number'] = len(dfa_states)

254 dfa_states.append(current_node)	266 dfa_states.append(current_node)

255 for (key, node_number) in current_node['transitions']:	267 for (key, node_number) in current_node['transitions']:

256 CodeGenerator.__reorder(node_number, id_map, dfa_states)	268 CodeGenerator.__reorder(node_number, id_map, dfa_states)

257 for node_number in current_node['unique_transitions'].values():	269 for node_number in current_node['unique_transitions'].values():

258 CodeGenerator.__reorder(node_number, id_map, dfa_states)	270 CodeGenerator.__reorder(node_number, id_map, dfa_states)

	271 if current_node['omega_transition'] != None:

	272 CodeGenerator.__reorder(

	273 current_node['omega_transition'], id_map, dfa_states)

259	274

260 @staticmethod	275 @staticmethod

261 def __mark_eos_states(dfa_states, eos_states):	276 def __mark_eos_states(dfa_states, eos_states):

262 for state_id in eos_states:	277 for state_id in eos_states:

263 state = dfa_states[state_id]	278 state = dfa_states[state_id]

264 state['is_eos_handler'] = True	279 state['is_eos_handler'] = True

265 state['must_not_inline'] = True	280 state['must_not_inline'] = True

266 assert state['match_action']	281 # TODO(dcarney): bring back

267 assert not state['entry_action']	282 # assert state['action']

268 assert not state['total_transitions']	283 # assert not state['total_transitions']

269	284

270 def __build_dfa_states(self):	285 def __build_dfa_states(self):

271 dfa_states = []	286 dfa_states = []

272 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))	287 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))

273 encoding = self.__dfa.encoding()	288 encoding = self.__dfa.encoding()

274 f = lambda state : CodeGenerator.__transform_state(encoding, state)	289 f = lambda state : CodeGenerator.__transform_state(encoding, state)

275 dfa_states = map(f, dfa_states)	290 dfa_states = map(f, dfa_states)

276 id_map = {x['original_node_number'] : x for x in dfa_states}	291 id_map = {x['original_node_number'] : x for x in dfa_states}

277 dfa_states = []	292 dfa_states = []

278 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states)	293 start_node_number = self.__dfa.start_state().node_number()

	294 CodeGenerator.__reorder(start_node_number, id_map, dfa_states)

279 # store states	295 # store states

280 eos_states = set([])	296 eos_states = set([])

	297 remap = lambda state_id : id_map[state_id]['node_number']

281 def f((key, original_node_number)):	298 def f((key, original_node_number)):

282 return (key, id_map[original_node_number]['node_number'])	299 return (key, remap(original_node_number))

283 for state in dfa_states:	300 for state in dfa_states:

284 state['transitions'] = map(f, state['transitions'])	301 state['transitions'] = map(f, state['transitions'])

285 state['unique_transitions'] = {k : id_map[v]['node_number']	302 state['unique_transitions'] = {k : remap(v)

286 for k, v in state['unique_transitions'].items()}	303 for k, v in state['unique_transitions'].items()}

	304 if state['omega_transition'] != None:

	305 state['omega_transition'] = remap(state['omega_transition'])

287 if 'eos' in state['unique_transitions']:	306 if 'eos' in state['unique_transitions']:

288 eos_states.add(state['unique_transitions']['eos'])	307 eos_states.add(state['unique_transitions']['eos'])

289 assert id_map[self.__start_node_number]['node_number'] == 0	308 assert id_map[start_node_number]['node_number'] == 0

290 assert len(dfa_states) == self.__dfa.node_count()	309 assert len(dfa_states) == self.__dfa.node_count()

291 # mark eos states	310 # mark eos states

292 self.__mark_eos_states(dfa_states, eos_states)	311 self.__mark_eos_states(dfa_states, eos_states)

293 self.__dfa_states = dfa_states	312 self.__dfa_states = dfa_states

294	313

295 def __rewrite_gotos(self):

296 goto_map = {}

297 states = self.__dfa_states

298 for state in states:

299 if (state['match_action'].name() == 'do_stored_token'):

300 assert not state['match_action'].args()[0] in goto_map

301 goto_map[state['match_action'].args()[0]] = state['node_number']

302 mapped_actions = set([

303 'store_harmony_token_and_goto',

304 'store_token_and_goto',

305 'goto_start'])

306 for state in states:

307 if not state['match_action']:

308 continue

309 action = state['match_action'].name()

310 if action in mapped_actions:

311 value = state['match_action'].args()

312 target_id = goto_map[value[-1]]

313 states[target_id]['must_not_inline'] = True

314 if action != 'goto_start':

315 state['can_elide_read'] = False

316 label = 'after_entry_code'

317 else:

318 states[target_id]['can_elide_read'] = False

319 label = 'state_entry'

320 jump_label = self.__register_jump(target_id, label)

321 new_args = list(value[:-1]) + [str(jump_label)]

322 state['match_action'] = Term(action, *new_args)

323

324 def __inlined_state(self, target_id):	314 def __inlined_state(self, target_id):

325 state = deepcopy(self.__dfa_states[target_id])	315 state = deepcopy(self.__dfa_states[target_id])

326 state['node_number'] = len(self.__dfa_states)	316 state['node_number'] = len(self.__dfa_states)

327 self.__dfa_states.append(state)	317 self.__dfa_states.append(state)

328 # mark as just generated, so it will correctly rewritten	318 # mark as just generated, so it will correctly rewritten

329 state['just_generated_inline_state'] = True	319 state['just_generated_inline_state'] = True

330 # clear entry points	320 # clear entry points

331 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}	321 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}

332 return state['node_number']	322 return state['node_number']

333	323

(...skipping 23 matching lines...) Expand all Loading...
357 # generate at most one inline state for all transitions	347 # generate at most one inline state for all transitions

358 if not target_id in inline_mapping:	348 if not target_id in inline_mapping:

359 inline_mapping[target_id] = self.__inlined_state(target_id)	349 inline_mapping[target_id] = self.__inlined_state(target_id)

360 jump_type = 'inline'	350 jump_type = 'inline'

361 target_id = inline_mapping[target_id]	351 target_id = inline_mapping[target_id]

362 else:	352 else:

363 assert not target_id in inline_mapping	353 assert not target_id in inline_mapping

364 return (key, self.__register_jump(target_id, jump_type))	354 return (key, self.__register_jump(target_id, jump_type))

365 for name in transition_names:	355 for name in transition_names:

366 state[name] = map(generate_jump, state[name])	356 state[name] = map(generate_jump, state[name])

	357 if state['omega_transition'] != None:

	358 state['omega_transition'] = generate_jump(

	359 (None, state['omega_transition']))[1]

367 if 'eos' in state['unique_transitions']:	360 if 'eos' in state['unique_transitions']:

368 eos_state_id = state['unique_transitions']['eos']	361 eos_state_id = state['unique_transitions']['eos']

369 # eos state is not inlined, don't need to look in map	362 # eos state is not inlined, don't need to look in map

370 assert not self.__dfa_states[eos_state_id]['inline']	363 assert not self.__dfa_states[eos_state_id]['inline']

371 jump = self.__register_jump(eos_state_id, 'state_entry')	364 jump = self.__register_jump(eos_state_id, 'state_entry')

372 state['unique_transitions']['eos'] = jump	365 state['unique_transitions']['eos'] = jump

373 # now rewrite all nodes created	366 # now rewrite all nodes created

374 nodes_created = len(inline_mapping) - len(inline_mapping_in)	367 nodes_created = len(inline_mapping) - len(inline_mapping_in)

375 assert len(self.__dfa_states) == (	368 assert len(self.__dfa_states) == (

376 end_offset + total_nodes_created + nodes_created)	369 end_offset + total_nodes_created + nodes_created)

377 if nodes_created == 0:	370 if nodes_created == 0:

378 continue	371 continue

379 created = self.__rewrite_transitions_to_jumps(	372 created = self.__rewrite_transitions_to_jumps(

380 end_offset + total_nodes_created, nodes_created, inline_mapping)	373 end_offset + total_nodes_created, nodes_created, inline_mapping)

381 total_nodes_created += nodes_created + created	374 total_nodes_created += nodes_created + created

382 return total_nodes_created	375 return total_nodes_created

383	376

384 def process(self):	377 def process(self):

385	378

386 self.__build_dfa_states()	379 self.__build_dfa_states()

387 dfa_states = self.__dfa_states	380 dfa_states = self.__dfa_states

388 # split transitions	381 # split transitions

389 switched = reduce(self.__split_transitions, dfa_states, 0)	382 switched = reduce(self.__split_transitions, dfa_states, 0)

390 if self.__log:	383 if self.__log:

391 print "%s states use switch (instead of if)" % switched	384 print "%s states use switch (instead of if)" % switched

392 # rewrite deferred transitions	385 # rewrite deferred transitions

393 for state in dfa_states:	386 for state in dfa_states:

394 self.__rewrite_deferred_transitions(state)	387 self.__rewrite_deferred_transitions(state)

395 # rewrite gotos

396 self.__rewrite_gotos()

397 # set nodes to inline	388 # set nodes to inline

398 if self.__inline:	389 if self.__inline:

399 inlined = reduce(self.__set_inline, dfa_states, 0)	390 inlined = reduce(self.__set_inline, dfa_states, 0)

400 if self.__log:	391 if self.__log:

401 print "%s states inlined" % inlined	392 print "%s states inlined" % inlined

402 # rewrite transitions to use jumps	393 # rewrite transitions to use jumps

403 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {})	394 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {})

404 if self.__log:	395 if self.__log:

405 print "%s inlined nodes created" % inlined_nodes	396 print "%s inlined nodes created" % inlined_nodes

406 # mark the entry point in case there are implicit jumps to it	397 # mark the entry point in case there are implicit jumps to it

407 self.__dfa_states[0]['entry_points']['state_entry'] = True	398 self.__dfa_states[0]['entry_points']['state_entry'] = True

408	399

409 default_action = self.__default_action	400 default_action = self.__default_action

410 assert(default_action and default_action.match_action())	401 assert default_action

411 default_action = default_action.match_action()

412	402

413 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))	403 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

414 template_env = jinja2.Environment(	404 template_env = jinja2.Environment(

415 loader = jinja2.PackageLoader('lexer_generator', '.'),	405 loader = jinja2.PackageLoader('lexer_generator', '.'),

416 undefined = jinja2.StrictUndefined)	406 undefined = jinja2.StrictUndefined)

417 template = template_env.get_template('code_generator.jinja')	407 template = template_env.get_template('code_generator.jinja')

418	408

419 encoding = self.__dfa.encoding()	409 encoding = self.__dfa.encoding()

420 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}	410 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}

421 char_type = char_types[encoding.name()]	411 char_type = char_types[encoding.name()]

422	412

423 return template.render(	413 return template.render(

424 start_node_number = 0,	414 start_node_number = 0,

425 debug_print = self.__debug_print,	415 debug_print = self.__debug_print,

426 default_action = default_action,	416 default_action = default_action,

427 dfa_states = dfa_states,	417 dfa_states = dfa_states,

428 jump_table = self.__jump_table,	418 jump_table = self.__jump_table,

429 encoding = encoding.name(),	419 encoding = encoding.name(),

430 char_type = char_type,	420 char_type = char_type,

431 upper_bound = encoding.primary_range()[1])	421 upper_bound = encoding.primary_range()[1])

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/dfa.py » ('j') | no next file with comments »