tools/lexer_generator/code_generator.py - Issue 83243004: Experimental parser: cleanup code requiring calls to the unicode cache

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 83243004: Experimental parser: cleanup code requiring calls to the unicode cache (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
122 distinct_keys += r[1] - r[0] + 1	122 distinct_keys += r[1] - r[0] + 1

123 ranges += 1	123 ranges += 1

124 else:	124 else:

125 raise Exception()	125 raise Exception()

126 return {	126 return {

127 'node_number' : state.node_number(),	127 'node_number' : state.node_number(),

128 'original_node_number' : state.node_number(),	128 'original_node_number' : state.node_number(),

129 'transitions' : transitions,	129 'transitions' : transitions,

130 'switch_transitions' : [],	130 'switch_transitions' : [],

131 'deferred_transitions' : [],	131 'deferred_transitions' : [],

	132 'long_char_transitions' : [],

132 'disjoint_keys' : disjoint_keys,	133 'disjoint_keys' : disjoint_keys,

133 'inline' : None,	134 'inline' : None,

134 'depth' : None,	135 'depth' : None,

135 'action' : action,	136 'action' : action,

136 'entry_action' : entry_action,	137 'entry_action' : entry_action,

137 'match_action' : match_action,	138 'match_action' : match_action,

138 'class_keys' : class_keys,	139 'class_keys' : class_keys,

139 'distinct_keys' : distinct_keys,	140 'distinct_keys' : distinct_keys,

140 'ranges' : ranges	141 'ranges' : ranges

141 }	142 }

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 if_transitions.append((i, node_id))	192 if_transitions.append((i, node_id))

192 if s:	193 if s:

193 switch_transitions.append((s, node_id))	194 switch_transitions.append((s, node_id))

194 if d:	195 if d:

195 deferred_transitions.append((d, node_id))	196 deferred_transitions.append((d, node_id))

196 state['transitions'] = if_transitions	197 state['transitions'] = if_transitions

197 state['switch_transitions'] = switch_transitions	198 state['switch_transitions'] = switch_transitions

198 state['deferred_transitions'] = deferred_transitions	199 state['deferred_transitions'] = deferred_transitions

199 return split_count + (0 if no_switch else 1)	200 return split_count + (0 if no_switch else 1)

200	201

	202 __call_map = {

	203 'non_primary_whitespace' : 'IsWhiteSpace',

	204 'non_primary_letter' : 'IsLetter',

	205 'non_primary_identifier_part_not_letter' : 'IsIdentifierPartNotLetter',

	206 'non_primary_line_terminator' : 'IsLineTerminator',

	207 }

	208

	209 def __rewrite_deferred_transitions(self, state):

	210 assert not state['long_char_transitions']

	211 transitions = state['deferred_transitions']

	212 if not transitions:

	213 return

	214 encoding = self.__dfa.encoding()

	215 bom = 'byte_order_mark'

	216 catch_all = 'non_primary_everything_else'

	217 all_classes = set(encoding.class_name_iter())

	218 fast_classes = set(['eos', 'zero'])

	219 call_classes = all_classes - fast_classes - set([bom, catch_all])

	220 def remap_transition(class_name):

	221 if class_name in call_classes:

	222 return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])

	223 if class_name == bom:

	224 return ('LONG_CHAR_CLASS', class_name)

	225 raise Exception(class_name)

	226 fast_transitions = []

	227 long_class_transitions = []

	228 long_class_map = {}

	229 catchall_transition = None

	230 # loop through and remove catch_all_transitions

	231 for (classes, transition_node_id) in transitions:

	232 ft = []

	233 lct = []

	234 has_catch_all = False

	235 for (class_type, class_name) in classes:

	236 if class_name in fast_classes:

	237 ft.append((class_type, class_name))

	238 else:

	239 assert not class_name in long_class_map

	240 long_class_map[class_name] = transition_node_id

	241 if class_name == catch_all:

	242 assert not has_catch_all

	243 assert catchall_transition == None

	244 has_catch_all = True

	245 else:

	246 lct.append(remap_transition(class_name))

	247 if ft:

	248 fast_transitions.append((ft, transition_node_id))

	249 if has_catch_all:

	250 catchall_transition = (lct, transition_node_id)

	251 elif lct:

	252 long_class_transitions.append((lct, transition_node_id))

	253 # all transitions are fast

	254 if not long_class_map:

	255 return

	256 if catchall_transition:

	257 catchall_transitions = all_classes - fast_classes

	258 for class_name in long_class_map.iterkeys():

	259 catchall_transitions.remove(class_name)

	260 assert not catchall_transitions, "class inversion not unimplemented"

	261 # split deferred transitions

	262 state['deferred_transitions'] = fast_transitions

	263 if catchall_transition:

	264 catchall_transition = [

	265 ([('LONG_CHAR_CLASS', 'catch_all')], catchall_transition[1])]

	266 else:

	267 catchall_transition = []

	268 state['long_char_transitions'] = (long_class_transitions +

	269 catchall_transition) # must be last

	270

201 def __canonicalize_traversal(self):	271 def __canonicalize_traversal(self):

202 dfa_states = []	272 dfa_states = []

203 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))	273 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))

204 encoding = self.__dfa.encoding()	274 encoding = self.__dfa.encoding()

205 f = lambda state : CodeGenerator.__transform_state(encoding, state)	275 f = lambda state : CodeGenerator.__transform_state(encoding, state)

206 dfa_states = map(f, dfa_states)	276 dfa_states = map(f, dfa_states)

207 id_map = {x['original_node_number'] : x for x in dfa_states}	277 id_map = {x['original_node_number'] : x for x in dfa_states}

208 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map)	278 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map)

209 dfa_states = sorted(dfa_states, cmp=self.__state_cmp)	279 dfa_states = sorted(dfa_states, cmp=self.__state_cmp)

210 # remap all node numbers	280 # remap all node numbers

(...skipping 11 matching lines...) Expand all Loading...
222 if self.__inline:	292 if self.__inline:

223 inlined = reduce(self.__set_inline, dfa_states, 0)	293 inlined = reduce(self.__set_inline, dfa_states, 0)

224 if self.__log:	294 if self.__log:

225 print "%s states inlined" % inlined	295 print "%s states inlined" % inlined

226 elif self.__log:	296 elif self.__log:

227 print "no inlining"	297 print "no inlining"

228 # split transitions	298 # split transitions

229 switched = reduce(self.__split_transitions, dfa_states, 0)	299 switched = reduce(self.__split_transitions, dfa_states, 0)

230 if self.__log:	300 if self.__log:

231 print "%s states use switch (instead of if)" % switched	301 print "%s states use switch (instead of if)" % switched

	302 # rewrite deferred transitions

	303 for state in dfa_states:

	304 self.__rewrite_deferred_transitions(state)

232	305

233 def process(self):	306 def process(self):

234	307

235 self.__canonicalize_traversal()	308 self.__canonicalize_traversal()

236	309

237 dfa_states = self.__dfa_states	310 dfa_states = self.__dfa_states

238	311

239 default_action = self.__default_action	312 default_action = self.__default_action

240 assert(default_action and default_action.match_action())	313 assert(default_action and default_action.match_action())

241 default_action = default_action.match_action()	314 default_action = default_action.match_action()

242	315

243 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))	316 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

244 template_env = jinja2.Environment(	317 template_env = jinja2.Environment(

245 loader = jinja2.PackageLoader('lexer_generator', '.'),	318 loader = jinja2.PackageLoader('lexer_generator', '.'),

246 undefined = jinja2.StrictUndefined)	319 undefined = jinja2.StrictUndefined)

247 template = template_env.get_template('code_generator.jinja')	320 template = template_env.get_template('code_generator.jinja')

248	321

249 encoding = self.__dfa.encoding()	322 encoding = self.__dfa.encoding()

250 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}	323 char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}

251 char_type = char_types[encoding.name()]	324 char_type = char_types[encoding.name()]

252	325

253 return template.render(	326 return template.render(

254 start_node_number = 0,	327 start_node_number = 0,

255 debug_print = self.__debug_print,	328 debug_print = self.__debug_print,

256 default_action = default_action,	329 default_action = default_action,

257 dfa_states = dfa_states,	330 dfa_states = dfa_states,

258 encoding = encoding.name(),	331 encoding = encoding.name(),

259 char_type = char_type,	332 char_type = char_type,

260 upper_bound = encoding.primary_range()[1])	333 upper_bound = encoding.primary_range()[1])

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »