Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(205)

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 144603003: Experimental parser: better eos handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 dfa = rule_processor.default_automata().dfa() 47 dfa = rule_processor.default_automata().dfa()
48 self.__dfa = dfa 48 self.__dfa = dfa
49 self.__default_action = rule_processor.default_action 49 self.__default_action = rule_processor.default_action
50 self.__debug_print = debug_print 50 self.__debug_print = debug_print
51 self.__start_node_number = self.__dfa.start_state().node_number() 51 self.__start_node_number = self.__dfa.start_state().node_number()
52 self.__log = log 52 self.__log = log
53 self.__inline = inline 53 self.__inline = inline
54 self.__switching = switching 54 self.__switching = switching
55 self.__jump_table = [] 55 self.__jump_table = []
56 56
57 __jump_labels = ['state_entry', 'after_entry_code'] 57 __jump_labels = ['state_entry', 'after_entry_code', 'before_match']
58 58
59 @staticmethod 59 @staticmethod
60 def __transform_state(encoding, state): 60 def __transform_state(encoding, state):
61 # action data 61 # action data
62 action = state.action() 62 action = state.action()
63 entry_action = None if not action else action.entry_action() 63 entry_action = None if not action else action.entry_action()
64 match_action = None if not action else action.match_action() 64 match_action = None if not action else action.match_action()
65 # generate ordered transitions 65 # generate ordered transitions
66 transitions = map(lambda (k, v) : (k, v.node_number()), 66 transitions = map(lambda (k, v) : (k, v.node_number()),
67 state.transitions().items()) 67 state.transitions().items())
(...skipping 17 matching lines...) Expand all
85 ranges += 1 85 ranges += 1
86 keys.append((t, r)) 86 keys.append((t, r))
87 elif t == 'UNIQUE': 87 elif t == 'UNIQUE':
88 assert r == 'no_match' or r == 'eos' 88 assert r == 'no_match' or r == 'eos'
89 assert r not in unique_transitions 89 assert r not in unique_transitions
90 unique_transitions[r] = transition_id 90 unique_transitions[r] = transition_id
91 else: 91 else:
92 raise Exception() 92 raise Exception()
93 if keys: 93 if keys:
94 transitions.append((keys, transition_id)) 94 transitions.append((keys, transition_id))
95 # eos_transitions is for a followup cl
96 return { 95 return {
97 'node_number' : None, 96 'node_number' : None,
98 'original_node_number' : state.node_number(), 97 'original_node_number' : state.node_number(),
99 'transitions' : transitions, 98 'transitions' : transitions,
100 # flags for code generator 99 # flags for code generator
101 'can_elide_read' : len(transitions) == 0, 100 'can_elide_read' : len(transitions) == 0,
102 'is_eos_handler' : False, 101 'is_eos_handler' : False,
103 'inline' : None, 102 'inline' : None,
104 'must_not_inline' : False, 103 'must_not_inline' : False,
105 # transitions for code generator 104 # transitions for code generator
(...skipping 16 matching lines...) Expand all
122 def __register_jump(self, node_id, label): 121 def __register_jump(self, node_id, label):
123 if label != 'inline': 122 if label != 'inline':
124 assert label in CodeGenerator.__jump_labels 123 assert label in CodeGenerator.__jump_labels
125 state = self.__dfa_states[node_id]['entry_points'][label] = True 124 state = self.__dfa_states[node_id]['entry_points'][label] = True
126 self.__jump_table.append((node_id, label)) 125 self.__jump_table.append((node_id, label))
127 return len(self.__jump_table) - 1 126 return len(self.__jump_table) - 1
128 127
129 def __set_inline(self, count, state): 128 def __set_inline(self, count, state):
130 assert state['inline'] == None 129 assert state['inline'] == None
131 inline = False 130 inline = False
132 # inline terminal states
133 if state['must_not_inline']: 131 if state['must_not_inline']:
134 inline = False 132 inline = False
133 # inline terminal states
135 elif not state['transitions']: 134 elif not state['transitions']:
136 inline = True 135 inline = True
137 # inline next to terminal states with 1 or 2 transitions 136 # inline next to terminal states with 1 or 2 transitions
138 elif state['distinct_keys'] < 3 and state['class_keys'] == 0: 137 elif state['distinct_keys'] < 3 and state['class_keys'] == 0:
139 inline = True 138 inline = True
140 # ensure state terminates in 1 step 139 # ensure state terminates in 1 step
141 for key, state_id in state['transitions']: 140 for key, state_id in state['transitions']:
142 if self.__dfa_states[state_id]['transitions']: 141 if self.__dfa_states[state_id]['transitions']:
143 inline = False 142 inline = False
144 break 143 break
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 185
187 def __rewrite_deferred_transitions(self, state): 186 def __rewrite_deferred_transitions(self, state):
188 assert not state['long_char_transitions'] 187 assert not state['long_char_transitions']
189 transitions = state['deferred_transitions'] 188 transitions = state['deferred_transitions']
190 if not transitions: 189 if not transitions:
191 return 190 return
192 encoding = self.__dfa.encoding() 191 encoding = self.__dfa.encoding()
193 bom = 'byte_order_mark' 192 bom = 'byte_order_mark'
194 catch_all = 'non_primary_everything_else' 193 catch_all = 'non_primary_everything_else'
195 all_classes = set(encoding.class_name_iter()) 194 all_classes = set(encoding.class_name_iter())
196 fast_classes = set(['eos', 'zero']) 195 fast_classes = set([])
197 call_classes = all_classes - fast_classes - set([bom, catch_all]) 196 call_classes = all_classes - fast_classes - set([bom, catch_all])
198 def remap_transition(class_name): 197 def remap_transition(class_name):
199 if class_name in call_classes: 198 if class_name in call_classes:
200 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name]) 199 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])
201 if class_name == bom: 200 if class_name == bom:
202 return ('LONG_CHAR_CLASS', class_name) 201 return ('LONG_CHAR_CLASS', class_name)
203 raise Exception(class_name) 202 raise Exception(class_name)
204 fast_transitions = [] 203 fast_transitions = []
205 long_class_transitions = [] 204 long_class_transitions = []
206 long_class_map = {} 205 long_class_map = {}
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 current_node = id_map[current_node_number] 250 current_node = id_map[current_node_number]
252 if current_node['node_number'] != None: 251 if current_node['node_number'] != None:
253 return 252 return
254 current_node['node_number'] = len(dfa_states) 253 current_node['node_number'] = len(dfa_states)
255 dfa_states.append(current_node) 254 dfa_states.append(current_node)
256 for (key, node_number) in current_node['transitions']: 255 for (key, node_number) in current_node['transitions']:
257 CodeGenerator.__reorder(node_number, id_map, dfa_states) 256 CodeGenerator.__reorder(node_number, id_map, dfa_states)
258 for node_number in current_node['unique_transitions'].values(): 257 for node_number in current_node['unique_transitions'].values():
259 CodeGenerator.__reorder(node_number, id_map, dfa_states) 258 CodeGenerator.__reorder(node_number, id_map, dfa_states)
260 259
260 @staticmethod
261 def __mark_eos_states(dfa_states, eos_states):
262 for state_id in eos_states:
263 state = dfa_states[state_id]
264 state['is_eos_handler'] = True
265 state['must_not_inline'] = True
266 assert state['match_action']
267 assert not state['entry_action']
268 assert not state['transitions']
269 assert not state['unique_transitions']
270
261 def __build_dfa_states(self): 271 def __build_dfa_states(self):
262 dfa_states = [] 272 dfa_states = []
263 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) 273 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))
264 encoding = self.__dfa.encoding() 274 encoding = self.__dfa.encoding()
265 f = lambda state : CodeGenerator.__transform_state(encoding, state) 275 f = lambda state : CodeGenerator.__transform_state(encoding, state)
266 dfa_states = map(f, dfa_states) 276 dfa_states = map(f, dfa_states)
267 id_map = {x['original_node_number'] : x for x in dfa_states} 277 id_map = {x['original_node_number'] : x for x in dfa_states}
268 dfa_states = [] 278 dfa_states = []
269 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states) 279 CodeGenerator.__reorder(self.__start_node_number, id_map, dfa_states)
280 # store states
281 eos_states = set([])
270 def f((key, original_node_number)): 282 def f((key, original_node_number)):
271 return (key, id_map[original_node_number]['node_number']) 283 return (key, id_map[original_node_number]['node_number'])
272 for state in dfa_states: 284 for state in dfa_states:
273 state['transitions'] = map(f, state['transitions']) 285 state['transitions'] = map(f, state['transitions'])
286 state['unique_transitions'] = {k : id_map[v]['node_number']
287 for k, v in state['unique_transitions'].items()}
288 if 'eos' in state['unique_transitions']:
289 eos_states.add(state['unique_transitions']['eos'])
274 assert id_map[self.__start_node_number]['node_number'] == 0 290 assert id_map[self.__start_node_number]['node_number'] == 0
275 assert len(dfa_states) == self.__dfa.node_count() 291 assert len(dfa_states) == self.__dfa.node_count()
276 # store states 292 # mark eos states
293 self.__mark_eos_states(dfa_states, eos_states)
277 self.__dfa_states = dfa_states 294 self.__dfa_states = dfa_states
278 295
279 def __rewrite_gotos(self): 296 def __rewrite_gotos(self):
280 goto_map = {} 297 goto_map = {}
281 states = self.__dfa_states 298 states = self.__dfa_states
282 for state in states: 299 for state in states:
283 if (state['match_action'] and 300 if (state['match_action'] and
284 state['match_action'][0] == 'do_stored_token'): 301 state['match_action'][0] == 'do_stored_token'):
285 assert not state['match_action'][1] in goto_map 302 assert not state['match_action'][1] in goto_map
286 goto_map[state['match_action'][1]] = state['node_number'] 303 goto_map[state['match_action'][1]] = state['node_number']
(...skipping 17 matching lines...) Expand all
304 label = 'state_entry' 321 label = 'state_entry'
305 jump_label = self.__register_jump(target_id, label) 322 jump_label = self.__register_jump(target_id, label)
306 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label])) 323 state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))
307 324
308 def __inlined_state(self, target_id): 325 def __inlined_state(self, target_id):
309 state = deepcopy(self.__dfa_states[target_id]) 326 state = deepcopy(self.__dfa_states[target_id])
310 state['node_number'] = len(self.__dfa_states) 327 state['node_number'] = len(self.__dfa_states)
311 self.__dfa_states.append(state) 328 self.__dfa_states.append(state)
312 # mark inline as none, to be correctly handled below 329 # mark inline as none, to be correctly handled below
313 state['inline'] = None 330 state['inline'] = None
314 # clear transitions, they shouldn't be used anymore
315 state['transitions'] = []
316 # clear entry points 331 # clear entry points
317 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels} 332 state['entry_points'] = {k : False for k in CodeGenerator.__jump_labels}
318 return state['node_number'] 333 return state['node_number']
319 334
320 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in): 335 def __rewrite_transitions_to_jumps(self, start_id, count, inline_mapping_in):
321 # order here should match the order of code generation 336 # order here should match the order of code generation
322 transition_names = [ 337 transition_names = [
323 'switch_transitions', 338 'switch_transitions',
324 'if_transitions', 339 'if_transitions',
325 'long_char_transitions', 340 'long_char_transitions',
(...skipping 17 matching lines...) Expand all
343 # generate at most one inline state for all transitions 358 # generate at most one inline state for all transitions
344 if not target_id in inline_mapping: 359 if not target_id in inline_mapping:
345 inline_mapping[target_id] = self.__inlined_state(target_id) 360 inline_mapping[target_id] = self.__inlined_state(target_id)
346 jump_type = 'inline' 361 jump_type = 'inline'
347 target_id = inline_mapping[target_id] 362 target_id = inline_mapping[target_id]
348 else: 363 else:
349 assert not target_id in inline_mapping 364 assert not target_id in inline_mapping
350 return (key, self.__register_jump(target_id, jump_type)) 365 return (key, self.__register_jump(target_id, jump_type))
351 for name in transition_names: 366 for name in transition_names:
352 state[name] = map(generate_jump, state[name]) 367 state[name] = map(generate_jump, state[name])
368 if 'eos' in state['unique_transitions']:
369 # eos state is not inlined
370 eos_state_id = state['unique_transitions']['eos']
371 jump = self.__register_jump(eos_state_id, 'state_entry')
372 state['unique_transitions']['eos'] = jump
373 elif state['transitions']:
374 jump = self.__register_jump(state_id, 'before_match')
375 state['jump_before_match'] = jump
353 # now rewrite all nodes created 376 # now rewrite all nodes created
354 nodes_created = len(inline_mapping) - len(inline_mapping_in) 377 nodes_created = len(inline_mapping) - len(inline_mapping_in)
355 assert len(self.__dfa_states) == ( 378 assert len(self.__dfa_states) == (
356 end_offset + total_nodes_created + nodes_created) 379 end_offset + total_nodes_created + nodes_created)
357 if nodes_created == 0: 380 if nodes_created == 0:
358 continue 381 continue
359 created = self.__rewrite_transitions_to_jumps( 382 created = self.__rewrite_transitions_to_jumps(
360 end_offset + total_nodes_created, nodes_created, inline_mapping) 383 end_offset + total_nodes_created, nodes_created, inline_mapping)
361 total_nodes_created += nodes_created + created 384 total_nodes_created += nodes_created + created
362 return total_nodes_created 385 return total_nodes_created
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 425
403 return template.render( 426 return template.render(
404 start_node_number = 0, 427 start_node_number = 0,
405 debug_print = self.__debug_print, 428 debug_print = self.__debug_print,
406 default_action = default_action, 429 default_action = default_action,
407 dfa_states = dfa_states, 430 dfa_states = dfa_states,
408 jump_table = self.__jump_table, 431 jump_table = self.__jump_table,
409 encoding = encoding.name(), 432 encoding = encoding.name(),
410 char_type = char_type, 433 char_type = char_type,
411 upper_bound = encoding.primary_range()[1]) 434 upper_bound = encoding.primary_range()[1])
OLDNEW
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/code_generator_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698