Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 83243004: Experimental parser: cleanup code requiring calls to the unicode cache (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 distinct_keys += r[1] - r[0] + 1 122 distinct_keys += r[1] - r[0] + 1
123 ranges += 1 123 ranges += 1
124 else: 124 else:
125 raise Exception() 125 raise Exception()
126 return { 126 return {
127 'node_number' : state.node_number(), 127 'node_number' : state.node_number(),
128 'original_node_number' : state.node_number(), 128 'original_node_number' : state.node_number(),
129 'transitions' : transitions, 129 'transitions' : transitions,
130 'switch_transitions' : [], 130 'switch_transitions' : [],
131 'deferred_transitions' : [], 131 'deferred_transitions' : [],
132 'long_char_transitions' : [],
132 'disjoint_keys' : disjoint_keys, 133 'disjoint_keys' : disjoint_keys,
133 'inline' : None, 134 'inline' : None,
134 'depth' : None, 135 'depth' : None,
135 'action' : action, 136 'action' : action,
136 'entry_action' : entry_action, 137 'entry_action' : entry_action,
137 'match_action' : match_action, 138 'match_action' : match_action,
138 'class_keys' : class_keys, 139 'class_keys' : class_keys,
139 'distinct_keys' : distinct_keys, 140 'distinct_keys' : distinct_keys,
140 'ranges' : ranges 141 'ranges' : ranges
141 } 142 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 if_transitions.append((i, node_id)) 192 if_transitions.append((i, node_id))
192 if s: 193 if s:
193 switch_transitions.append((s, node_id)) 194 switch_transitions.append((s, node_id))
194 if d: 195 if d:
195 deferred_transitions.append((d, node_id)) 196 deferred_transitions.append((d, node_id))
196 state['transitions'] = if_transitions 197 state['transitions'] = if_transitions
197 state['switch_transitions'] = switch_transitions 198 state['switch_transitions'] = switch_transitions
198 state['deferred_transitions'] = deferred_transitions 199 state['deferred_transitions'] = deferred_transitions
199 return split_count + (0 if no_switch else 1) 200 return split_count + (0 if no_switch else 1)
200 201
202 __call_map = {
203 'non_primary_whitespace' : 'IsWhiteSpace',
204 'non_primary_letter' : 'IsLetter',
205 'non_primary_identifier_part_not_letter' : 'IsIdentifierPartNotLetter',
206 'non_primary_line_terminator' : 'IsLineTerminator',
207 }
208
209 def __rewrite_deferred_transitions(self, state):
210 assert not state['long_char_transitions']
211 transitions = state['deferred_transitions']
212 if not transitions:
213 return
214 encoding = self.__dfa.encoding()
215 bom = 'byte_order_mark'
216 catch_all = 'non_primary_everything_else'
217 all_classes = set(encoding.class_name_iter())
218 fast_classes = set(['eos', 'zero'])
219 call_classes = all_classes - fast_classes - set([bom, catch_all])
220 def remap_transition(class_name):
221 if class_name in call_classes:
222 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])
223 if class_name == bom:
224 return ('LONG_CHAR_CLASS', class_name)
225 raise Exception(class_name)
226 fast_transitions = []
227 long_class_transitions = []
228 long_class_map = {}
229 catchall_transition = None
230 # loop through and remove catch_all_transitions
231 for (classes, transition_node_id) in transitions:
232 ft = []
233 lct = []
234 has_catch_all = False
235 for (class_type, class_name) in classes:
236 if class_name in fast_classes:
237 ft.append((class_type, class_name))
238 else:
239 assert not class_name in long_class_map
240 long_class_map[class_name] = transition_node_id
241 if class_name == catch_all:
242 assert not has_catch_all
243 assert catchall_transition == None
244 has_catch_all = True
245 else:
246 lct.append(remap_transition(class_name))
247 if ft:
248 fast_transitions.append((ft, transition_node_id))
249 if has_catch_all:
250 catchall_transition = (lct, transition_node_id)
251 elif lct:
252 long_class_transitions.append((lct, transition_node_id))
253 # all transitions are fast
254 if not long_class_map:
255 return
256 if catchall_transition:
257 catchall_transitions = all_classes - fast_classes
258 for class_name in long_class_map.iterkeys():
259 catchall_transitions.remove(class_name)
260 assert not catchall_transitions, "class inversion not unimplemented"
261 # split deferred transitions
262 state['deferred_transitions'] = fast_transitions
263 if catchall_transition:
264 catchall_transition = [
265 ([('LONG_CHAR_CLASS', 'catch_all')], catchall_transition[1])]
266 else:
267 catchall_transition = []
268 state['long_char_transitions'] = (long_class_transitions +
269 catchall_transition) # must be last
270
201 def __canonicalize_traversal(self): 271 def __canonicalize_traversal(self):
202 dfa_states = [] 272 dfa_states = []
203 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state)) 273 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))
204 encoding = self.__dfa.encoding() 274 encoding = self.__dfa.encoding()
205 f = lambda state : CodeGenerator.__transform_state(encoding, state) 275 f = lambda state : CodeGenerator.__transform_state(encoding, state)
206 dfa_states = map(f, dfa_states) 276 dfa_states = map(f, dfa_states)
207 id_map = {x['original_node_number'] : x for x in dfa_states} 277 id_map = {x['original_node_number'] : x for x in dfa_states}
208 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map) 278 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map)
209 dfa_states = sorted(dfa_states, cmp=self.__state_cmp) 279 dfa_states = sorted(dfa_states, cmp=self.__state_cmp)
210 # remap all node numbers 280 # remap all node numbers
(...skipping 11 matching lines...) Expand all
222 if self.__inline: 292 if self.__inline:
223 inlined = reduce(self.__set_inline, dfa_states, 0) 293 inlined = reduce(self.__set_inline, dfa_states, 0)
224 if self.__log: 294 if self.__log:
225 print "%s states inlined" % inlined 295 print "%s states inlined" % inlined
226 elif self.__log: 296 elif self.__log:
227 print "no inlining" 297 print "no inlining"
228 # split transitions 298 # split transitions
229 switched = reduce(self.__split_transitions, dfa_states, 0) 299 switched = reduce(self.__split_transitions, dfa_states, 0)
230 if self.__log: 300 if self.__log:
231 print "%s states use switch (instead of if)" % switched 301 print "%s states use switch (instead of if)" % switched
302 # rewrite deferred transitions
303 for state in dfa_states:
304 self.__rewrite_deferred_transitions(state)
232 305
233 def process(self): 306 def process(self):
234 307
235 self.__canonicalize_traversal() 308 self.__canonicalize_traversal()
236 309
237 dfa_states = self.__dfa_states 310 dfa_states = self.__dfa_states
238 311
239 default_action = self.__default_action 312 default_action = self.__default_action
240 assert(default_action and default_action.match_action()) 313 assert(default_action and default_action.match_action())
241 default_action = default_action.match_action() 314 default_action = default_action.match_action()
242 315
243 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 316 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
244 template_env = jinja2.Environment( 317 template_env = jinja2.Environment(
245 loader = jinja2.PackageLoader('lexer_generator', '.'), 318 loader = jinja2.PackageLoader('lexer_generator', '.'),
246 undefined = jinja2.StrictUndefined) 319 undefined = jinja2.StrictUndefined)
247 template = template_env.get_template('code_generator.jinja') 320 template = template_env.get_template('code_generator.jinja')
248 321
249 encoding = self.__dfa.encoding() 322 encoding = self.__dfa.encoding()
250 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'} 323 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}
251 char_type = char_types[encoding.name()] 324 char_type = char_types[encoding.name()]
252 325
253 return template.render( 326 return template.render(
254 start_node_number = 0, 327 start_node_number = 0,
255 debug_print = self.__debug_print, 328 debug_print = self.__debug_print,
256 default_action = default_action, 329 default_action = default_action,
257 dfa_states = dfa_states, 330 dfa_states = dfa_states,
258 encoding = encoding.name(), 331 encoding = encoding.name(),
259 char_type = char_type, 332 char_type = char_type,
260 upper_bound = encoding.primary_range()[1]) 333 upper_bound = encoding.primary_range()[1])
OLDNEW
« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698