Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/dfa.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
42 switching = True, 42 switching = True,
43 debug_print = False, 43 debug_print = False,
44 log = False): 44 log = False):
45 if minimize_default: 45 if minimize_default:
46 dfa = rule_processor.default_automata().minimal_dfa() 46 dfa = rule_processor.default_automata().minimal_dfa()
47 else: 47 else:
48 dfa = rule_processor.default_automata().dfa() 48 dfa = rule_processor.default_automata().dfa()
49 self.__dfa = dfa 49 self.__dfa = dfa
50 self.__default_action = rule_processor.default_action() 50 self.__default_action = rule_processor.default_action()
51 self.__debug_print = debug_print 51 self.__debug_print = debug_print
52 self.__start_node_number = self.__dfa.start_state().node_number()
53 self.__log = log 52 self.__log = log
54 self.__inline = inline 53 self.__inline = inline
55 self.__switching = switching 54 self.__switching = switching
56 self.__jump_table = [] 55 self.__jump_table = []
57 56
58 __jump_labels = ['state_entry', 'after_entry_code'] 57 __jump_labels = ['state_entry', 'after_entry_code']
59 58
60 @staticmethod 59 @staticmethod
61 def __transform_state(encoding, state): 60 def __transform_state(encoding, state):
62 # action data 61 # action data
63 action = state.action()
64 entry_action = action.entry_action()
65 match_action = action.match_action()
66 # generate ordered transitions 62 # generate ordered transitions
67 transitions = map(lambda (k, v) : (k, v.node_number()), 63 transitions = map(lambda (k, v) : (k, v.node_number()),
68 state.key_state_iter()) 64 state.key_state_iter())
69 transitions = sorted( 65 transitions = sorted(
70 transitions, cmp = TransitionKey.compare, key = lambda x : x[0]) 66 transitions, cmp = TransitionKey.compare, key = lambda x : x[0])
71 # map transition keys to disjoint ranges and collect stats 67 # map transition keys to disjoint ranges and collect stats
72 disjoint_keys = [] 68 disjoint_keys = []
73 unique_transitions = {} 69 unique_transitions = {}
74 old_transitions = transitions 70 old_transitions = transitions
75 transitions = [] 71 transitions = []
76 (class_keys, distinct_keys, ranges) = (0, 0, 0) 72 (class_keys, distinct_keys, ranges) = (0, 0, 0)
77 zero_transition = None 73 zero_transition = None
74 omega_transition = None
78 total_transitions = 0 75 total_transitions = 0
79 for key, transition_id in old_transitions: 76 for key, transition_id in old_transitions:
80 keys = [] 77 keys = []
81 for (t, r) in key.range_iter(encoding): 78 for (t, r) in key.range_iter(encoding):
82 if t == 'CLASS': 79 if t == 'CLASS':
83 class_keys += 1 80 class_keys += 1
84 keys.append((t, r)) 81 keys.append((t, r))
85 elif t == 'PRIMARY_RANGE': 82 elif t == 'PRIMARY_RANGE':
86 distinct_keys += r[1] - r[0] + 1 83 distinct_keys += r[1] - r[0] + 1
87 ranges += 1 84 ranges += 1
88 # split 0 out of range 85 # split 0 out of range
89 assert r[0] >= 0 86 assert r[0] >= 0
90 if r[0] == 0: 87 if r[0] == 0:
91 assert zero_transition == None 88 assert zero_transition == None
92 zero_transition = transition_id 89 zero_transition = transition_id
93 if r[0] == r[1]: 90 if r[0] == r[1]:
94 continue 91 continue
95 r = (r[0] + 1, r[1]) 92 r = (r[0] + 1, r[1])
96 keys.append((t, r)) 93 keys.append((t, r))
97 elif t == 'UNIQUE': 94 elif t == 'UNIQUE':
98 assert r == 'no_match' or r == 'eos' 95 assert r == 'eos'
99 assert r not in unique_transitions 96 assert r not in unique_transitions
100 unique_transitions[r] = transition_id 97 unique_transitions[r] = transition_id
101 if r != 'no_match': 98 if r != 'no_match':
102 total_transitions += 1 99 total_transitions += 1
100 elif t == 'OMEGA':
101 assert not omega_transition
102 omega_transition = transition_id
103 assert transition_id # TODO(dcarney): fix
104 total_transitions += 1
103 else: 105 else:
104 raise Exception() 106 raise Exception()
105 if keys: 107 if keys:
106 transitions.append((keys, transition_id)) 108 transitions.append((keys, transition_id))
107 # delay zero transition until last 109 # delay zero transition until after all encoded keys
108 if zero_transition != None: 110 if zero_transition != None:
109 transitions.append(([('PRIMARY_RANGE', (0, 0))], transition_id)) 111 transitions.append(([('PRIMARY_RANGE', (0, 0))], transition_id))
110 ranges += 1 112 ranges += 1
111 total_transitions += len(transitions) 113 total_transitions += len(transitions)
112 return { 114 return {
113 'node_number' : None, 115 'node_number' : None,
114 'original_node_number' : state.node_number(), 116 'original_node_number' : state.node_number(),
115 'transitions' : transitions, 117 'transitions' : transitions,
116 # flags for code generator 118 # flags for code generator
117 'can_elide_read' : total_transitions == 0, 119 'elide_read' : (total_transitions == 0 or
120 (total_transitions == 1 and omega_transition)),
118 'is_eos_handler' : False, 121 'is_eos_handler' : False,
119 'inline' : False, 122 'inline' : False,
120 'must_not_inline' : False, 123 'must_not_inline' : False,
121 # transitions for code generator 124 # transitions for code generator
122 'if_transitions' : [], 125 'if_transitions' : [],
123 'switch_transitions' : [], 126 'switch_transitions' : [],
124 'deferred_transitions' : [], 127 'deferred_transitions' : [],
125 'unique_transitions' : unique_transitions, 128 'unique_transitions' : unique_transitions,
129 'omega_transition' : omega_transition,
126 # state actions 130 # state actions
127 'entry_action' : entry_action, 131 'action' : state.action(),
128 'match_action' : match_action,
129 # statistics for state 132 # statistics for state
130 'total_transitions' : total_transitions, 133 'total_transitions' : total_transitions,
131 'class_keys' : class_keys, 134 'class_keys' : class_keys,
132 'distinct_keys' : distinct_keys, 135 'distinct_keys' : distinct_keys,
133 'ranges' : ranges, 136 'ranges' : ranges,
134 # record of which entry points will be needed 137 # record of which entry points will be needed
135 'entry_points' : {k : False for k in CodeGenerator.__jump_labels} 138 'entry_points' : {k : False for k in CodeGenerator.__jump_labels}
136 } 139 }
137 140
138 def __register_jump(self, node_id, label): 141 def __register_jump(self, node_id, label):
139 if label != 'inline': 142 if label != 'inline':
140 assert label in CodeGenerator.__jump_labels 143 assert label in CodeGenerator.__jump_labels
141 state = self.__dfa_states[node_id]['entry_points'][label] = True 144 state = self.__dfa_states[node_id]['entry_points'][label] = True
142 self.__jump_table.append((node_id, label)) 145 self.__jump_table.append((node_id, label))
143 return len(self.__jump_table) - 1 146 return len(self.__jump_table) - 1
144 147
148 def __terminates_immediately(self, state_id):
149 transition_count = self.__dfa_states[state_id]['total_transitions']
150 if transition_count == 0:
151 return True
152 omega_transition_id = self.__dfa_states[state_id]['omega_transition']
153 if transition_count == 1 and omega_transition_id:
154 return self.__terminates_immediately(omega_transition_id)
155 return False
156
145 def __set_inline(self, count, state): 157 def __set_inline(self, count, state):
146 inline = False 158 inline = False
147 if state['must_not_inline']: 159 if state['must_not_inline']:
148 inline = False 160 inline = False
149 # inline terminal states 161 # inline terminal states
150 elif not state['total_transitions']: 162 elif self.__terminates_immediately(state['node_number']):
151 inline = True 163 inline = True
152 # inline next to terminal states with 1 or 2 transitions 164 # inline next to terminal states with 1 or 2 transitions
153 elif state['distinct_keys'] < 3 and state['class_keys'] == 0: 165 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:
154 inline = True 166 inline = True
155 # ensure state terminates in 1 step 167 # ensure state terminates in 1 step, excluding omega transitions
156 for key, state_id in state['transitions']: 168 for key, state_id in state['transitions']:
157 if self.__dfa_states[state_id]['total_transitions']: 169 if not self.__terminates_immediately(state_id):
158 inline = False 170 inline = False
159 break 171 break
160 state['inline'] = inline 172 state['inline'] = inline
161 return count + 1 if inline else count 173 return count + 1 if inline else count
162 174
163 def __split_transitions(self, split_count, state): 175 def __split_transitions(self, split_count, state):
164 '''Goes through the transitions for 'state' and decides which of them should 176 '''Goes through the transitions for 'state' and decides which of them should
165 use the if statement and which should use the switch statement.''' 177 use the if statement and which should use the switch statement.'''
166 assert not state['switch_transitions'] 178 assert not state['switch_transitions']
167 (distinct_keys, ranges) = (state['distinct_keys'], state['ranges']) 179 (distinct_keys, ranges) = (state['distinct_keys'], state['ranges'])
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 def __reorder(current_node_number, id_map, dfa_states): 261 def __reorder(current_node_number, id_map, dfa_states):
250 current_node = id_map[current_node_number] 262 current_node = id_map[current_node_number]
251 if current_node['node_number'] != None: 263 if current_node['node_number'] != None:
252 return 264 return
253 current_node['node_number'] = len(dfa_states) 265 current_node['node_number'] = len(dfa_states)
254 dfa_states.append(current_node) 266 dfa_states.append(current_node)
255 for (key, node_number) in current_node['transitions']: 267 for (key, node_number) in current_node['transitions']:
256 CodeGenerator.__reorder(node_number, id_map, dfa_states) 268 CodeGenerator.__reorder(node_number, id_map, dfa_states)
257 for node_number in current_node['unique_transitions'].values(): 269 for node_number in current_node['unique_transitions'].values():
258 CodeGenerator.__reorder(node_number, id_map, dfa_states) 270 CodeGenerator.__reorder(node_number, id_map, dfa_states)
271 if current_node['omega_transition'] != None:
272 CodeGenerator.__reorder(
273 current_node['omega_transition'], id_map, dfa_states)
259 274
260 @staticmethod 275 @staticmethod
261 def __mark_eos_states(dfa_states, eos_states): 276 def __mark_eos_states(dfa_states, eos_states):
262 for state_id in eos_states: 277 for state_id in eos_states:
263 state = dfa_states[state_id] 278 state = dfa_states[state_id]
264 state['is_eos_handler'] = True 279 state['is_eos_handler'] = True
265 state['must_not_inline'] = True 280 state['must_not_inline'] = True
266 assert state['match_action'] 281 # TODO(dcarney): bring back
267 assert not state['entry_action'] 282 # assert state['action']
268 assert not state['total_transitions'] 283 # assert not state['total_transitions']
269 284
270 def __build_dfa_states(self): 285 def __build_dfa_states(self):
271 dfa_states = [] 286 dfa_states = []
272 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) 287 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))
273 encoding = self.__dfa.encoding() 288 encoding = self.__dfa.encoding()
274 f = lambda state : CodeGenerator.__transform_state(encoding, state) 289 f = lambda state : CodeGenerator.__transform_state(encoding, state)
275 dfa_states = map(f, dfa_states) 290 dfa_states = map(f, dfa_states)
276 id_map = {x['original_node_number'] : x for x in dfa_states} 291 id_map = {x['original_node_number'] : x for x in dfa_states}
277 dfa_states = [] 292 dfa_states = []
278 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states) 293 start_node_number = self.__dfa.start_state().node_number()
294 CodeGenerator.__reorder(start_node_number, id_map, dfa_states)
279 # store states 295 # store states
280 eos_states = set([]) 296 eos_states = set([])
297 remap = lambda state_id : id_map[state_id]['node_number']
281 def f((key, original_node_number)): 298 def f((key, original_node_number)):
282 return (key, id_map[original_node_number]['node_number']) 299 return (key, remap(original_node_number))
283 for state in dfa_states: 300 for state in dfa_states:
284 state['transitions'] = map(f, state['transitions']) 301 state['transitions'] = map(f, state['transitions'])
285 state['unique_transitions'] = {k : id_map[v]['node_number'] 302 state['unique_transitions'] = {k : remap(v)
286 for k, v in state['unique_transitions'].items()} 303 for k, v in state['unique_transitions'].items()}
304 if state['omega_transition'] != None:
305 state['omega_transition'] = remap(state['omega_transition'])
287 if 'eos' in state['unique_transitions']: 306 if 'eos' in state['unique_transitions']:
288 eos_states.add(state['unique_transitions']['eos']) 307 eos_states.add(state['unique_transitions']['eos'])
289 assert id_map[self.__start_node_number]['node_number'] == 0 308 assert id_map[start_node_number]['node_number'] == 0
290 assert len(dfa_states) == self.__dfa.node_count() 309 assert len(dfa_states) == self.__dfa.node_count()
291 # mark eos states 310 # mark eos states
292 self.__mark_eos_states(dfa_states, eos_states) 311 self.__mark_eos_states(dfa_states, eos_states)
293 self.__dfa_states = dfa_states 312 self.__dfa_states = dfa_states
294 313
295 def __rewrite_gotos(self):
296 goto_map = {}
297 states = self.__dfa_states
298 for state in states:
299 if (state['match_action'].name() == 'do_stored_token'):
300 assert not state['match_action'].args()[0] in goto_map
301 goto_map[state['match_action'].args()[0]] = state['node_number']
302 mapped_actions = set([
303 'store_harmony_token_and_goto',
304 'store_token_and_goto',
305 'goto_start'])
306 for state in states:
307 if not state['match_action']:
308 continue
309 action = state['match_action'].name()
310 if action in mapped_actions:
311 value = state['match_action'].args()
312 target_id = goto_map[value[-1]]
313 states[target_id]['must_not_inline'] = True
314 if action != 'goto_start':
315 state['can_elide_read'] = False
316 label = 'after_entry_code'
317 else:
318 states[target_id]['can_elide_read'] = False
319 label = 'state_entry'
320 jump_label = self.__register_jump(target_id, label)
321 new_args = list(value[:-1]) + [str(jump_label)]
322 state['match_action'] = Term(action, *new_args)
323
324 def __inlined_state(self, target_id): 314 def __inlined_state(self, target_id):
325 state = deepcopy(self.__dfa_states[target_id]) 315 state = deepcopy(self.__dfa_states[target_id])
326 state['node_number'] = len(self.__dfa_states) 316 state['node_number'] = len(self.__dfa_states)
327 self.__dfa_states.append(state) 317 self.__dfa_states.append(state)
328 # mark as just generated, so it will correctly rewritten 318 # mark as just generated, so it will correctly rewritten
329 state['just_generated_inline_state'] = True 319 state['just_generated_inline_state'] = True
330 # clear entry points 320 # clear entry points
331 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels} 321 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}
332 return state['node_number'] 322 return state['node_number']
333 323
(...skipping 23 matching lines...) Expand all
357 # generate at most one inline state for all transitions 347 # generate at most one inline state for all transitions
358 if not target_id in inline_mapping: 348 if not target_id in inline_mapping:
359 inline_mapping[target_id] = self.__inlined_state(target_id) 349 inline_mapping[target_id] = self.__inlined_state(target_id)
360 jump_type = 'inline' 350 jump_type = 'inline'
361 target_id = inline_mapping[target_id] 351 target_id = inline_mapping[target_id]
362 else: 352 else:
363 assert not target_id in inline_mapping 353 assert not target_id in inline_mapping
364 return (key, self.__register_jump(target_id, jump_type)) 354 return (key, self.__register_jump(target_id, jump_type))
365 for name in transition_names: 355 for name in transition_names:
366 state[name] = map(generate_jump, state[name]) 356 state[name] = map(generate_jump, state[name])
357 if state['omega_transition'] != None:
358 state['omega_transition'] = generate_jump(
359 (None, state['omega_transition']))[1]
367 if 'eos' in state['unique_transitions']: 360 if 'eos' in state['unique_transitions']:
368 eos_state_id = state['unique_transitions']['eos'] 361 eos_state_id = state['unique_transitions']['eos']
369 # eos state is not inlined, don't need to look in map 362 # eos state is not inlined, don't need to look in map
370 assert not self.__dfa_states[eos_state_id]['inline'] 363 assert not self.__dfa_states[eos_state_id]['inline']
371 jump = self.__register_jump(eos_state_id, 'state_entry') 364 jump = self.__register_jump(eos_state_id, 'state_entry')
372 state['unique_transitions']['eos'] = jump 365 state['unique_transitions']['eos'] = jump
373 # now rewrite all nodes created 366 # now rewrite all nodes created
374 nodes_created = len(inline_mapping) - len(inline_mapping_in) 367 nodes_created = len(inline_mapping) - len(inline_mapping_in)
375 assert len(self.__dfa_states) == ( 368 assert len(self.__dfa_states) == (
376 end_offset + total_nodes_created + nodes_created) 369 end_offset + total_nodes_created + nodes_created)
377 if nodes_created == 0: 370 if nodes_created == 0:
378 continue 371 continue
379 created = self.__rewrite_transitions_to_jumps( 372 created = self.__rewrite_transitions_to_jumps(
380 end_offset + total_nodes_created, nodes_created, inline_mapping) 373 end_offset + total_nodes_created, nodes_created, inline_mapping)
381 total_nodes_created += nodes_created + created 374 total_nodes_created += nodes_created + created
382 return total_nodes_created 375 return total_nodes_created
383 376
384 def process(self): 377 def process(self):
385 378
386 self.__build_dfa_states() 379 self.__build_dfa_states()
387 dfa_states = self.__dfa_states 380 dfa_states = self.__dfa_states
388 # split transitions 381 # split transitions
389 switched = reduce(self.__split_transitions, dfa_states, 0) 382 switched = reduce(self.__split_transitions, dfa_states, 0)
390 if self.__log: 383 if self.__log:
391 print "%s states use switch (instead of if)" % switched 384 print "%s states use switch (instead of if)" % switched
392 # rewrite deferred transitions 385 # rewrite deferred transitions
393 for state in dfa_states: 386 for state in dfa_states:
394 self.__rewrite_deferred_transitions(state) 387 self.__rewrite_deferred_transitions(state)
395 # rewrite gotos
396 self.__rewrite_gotos()
397 # set nodes to inline 388 # set nodes to inline
398 if self.__inline: 389 if self.__inline:
399 inlined = reduce(self.__set_inline, dfa_states, 0) 390 inlined = reduce(self.__set_inline, dfa_states, 0)
400 if self.__log: 391 if self.__log:
401 print "%s states inlined" % inlined 392 print "%s states inlined" % inlined
402 # rewrite transitions to use jumps 393 # rewrite transitions to use jumps
403 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {}) 394 inlined_nodes = self.__rewrite_transitions_to_jumps(0, len(dfa_states), {})
404 if self.__log: 395 if self.__log:
405 print "%s inlined nodes created" % inlined_nodes 396 print "%s inlined nodes created" % inlined_nodes
406 # mark the entry point in case there are implicit jumps to it 397 # mark the entry point in case there are implicit jumps to it
407 self.__dfa_states[0]['entry_points']['state_entry'] = True 398 self.__dfa_states[0]['entry_points']['state_entry'] = True
408 399
409 default_action = self.__default_action 400 default_action = self.__default_action
410 assert(default_action and default_action.match_action()) 401 assert default_action
411 default_action = default_action.match_action()
412 402
413 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 403 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
414 template_env = jinja2.Environment( 404 template_env = jinja2.Environment(
415 loader = jinja2.PackageLoader('lexer_generator', '.'), 405 loader = jinja2.PackageLoader('lexer_generator', '.'),
416 undefined = jinja2.StrictUndefined) 406 undefined = jinja2.StrictUndefined)
417 template = template_env.get_template('code_generator.jinja') 407 template = template_env.get_template('code_generator.jinja')
418 408
419 encoding = self.__dfa.encoding() 409 encoding = self.__dfa.encoding()
420 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} 410 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}
421 char_type = char_types[encoding.name()] 411 char_type = char_types[encoding.name()]
422 412
423 return template.render( 413 return template.render(
424 start_node_number = 0, 414 start_node_number = 0,
425 debug_print = self.__debug_print, 415 debug_print = self.__debug_print,
426 default_action = default_action, 416 default_action = default_action,
427 dfa_states = dfa_states, 417 dfa_states = dfa_states,
428 jump_table = self.__jump_table, 418 jump_table = self.__jump_table,
429 encoding = encoding.name(), 419 encoding = encoding.name(),
430 char_type = char_type, 420 char_type = char_type,
431 upper_bound = encoding.primary_range()[1]) 421 upper_bound = encoding.primary_range()[1])
OLDNEW
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/dfa.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698