tools/lexer_generator/code_generator.py - Issue 73453006: Experimental parser: switch blocks

Side by Side Diff: tools/lexer_generator/code_generator.py

Issue 73453006: Experimental parser: switch blocks (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 19 matching lines...) Expand all Loading...
30 import jinja2	30 import jinja2

31 from dfa import Dfa	31 from dfa import Dfa

32 from transition_keys import TransitionKey	32 from transition_keys import TransitionKey

33	33

34 class CodeGenerator:	34 class CodeGenerator:

35	35

36 def __init__(self,	36 def __init__(self,

37 rule_processor,	37 rule_processor,

38 minimize_default = True,	38 minimize_default = True,

39 inline = True,	39 inline = True,

	40 switching = True,

40 debug_print = False,	41 debug_print = False,

41 log = False):	42 log = False):

42 if minimize_default:	43 if minimize_default:

43 dfa = rule_processor.default_automata().minimal_dfa()	44 dfa = rule_processor.default_automata().minimal_dfa()

44 else:	45 else:

45 dfa = rule_processor.default_automata().dfa()	46 dfa = rule_processor.default_automata().dfa()

46 self.__dfa = dfa	47 self.__dfa = dfa

47 self.__default_action = rule_processor.default_action	48 self.__default_action = rule_processor.default_action

48 self.__debug_print = debug_print	49 self.__debug_print = debug_print

49 self.__start_node_number = self.__dfa.start_state().node_number()	50 self.__start_node_number = self.__dfa.start_state().node_number()

50 self.__log = log	51 self.__log = log

51 self.__inline = inline	52 self.__inline = inline

	53 self.__switching = switching

52	54

53 def __state_cmp(self, left, right):	55 def __state_cmp(self, left, right):

54 if left['original_node_number'] == self.__start_node_number:	56 if left['original_node_number'] == self.__start_node_number:

55 return -1	57 return -1

56 if right['original_node_number'] == self.__start_node_number:	58 if right['original_node_number'] == self.__start_node_number:

57 return 1	59 return 1

58 c = cmp(len(left['disjoint_keys']), len(right['disjoint_keys']))	60 c = cmp(len(left['disjoint_keys']), len(right['disjoint_keys']))

59 if c:	61 if c:

60 return c	62 return c

61 c = cmp(left['disjoint_keys'], right['disjoint_keys'])	63 c = cmp(left['disjoint_keys'], right['disjoint_keys'])

(...skipping 28 matching lines...) Expand all Loading...
90 return 1	92 return 1

91 # TODO store numeric values and cmp	93 # TODO store numeric values and cmp

92 return cmp(left[1], right[1])	94 return cmp(left[1], right[1])

93	95

94 @staticmethod	96 @staticmethod

95 def __transform_state(state):	97 def __transform_state(state):

96 # action data	98 # action data

97 action = state.action()	99 action = state.action()

98 entry_action = None if not action else action.entry_action()	100 entry_action = None if not action else action.entry_action()

99 match_action = None if not action else action.match_action()	101 match_action = None if not action else action.match_action()

100 # compute disjoint ranges

101 keys = TransitionKey.disjoint_keys(list(state.key_iter()))

102 def f(key):

103 ranges = list(key.range_iter())

104 assert len(ranges) == 1

105 return ranges[0]

106 keys = sorted(map(f, keys), CodeGenerator.__range_cmp)

107 # generate ordered transitions	102 # generate ordered transitions

108 transitions = map(lambda (k, v) : (k, v.node_number()),	103 transitions = map(lambda (k, v) : (k, v.node_number()),

109 state.transitions().items())	104 state.transitions().items())

110 def cmp(left, right):	105 def cmp(left, right):

111 return TransitionKey.canonical_compare(left[0], right[0])	106 return TransitionKey.canonical_compare(left[0], right[0])

112 transitions = sorted(transitions, cmp)	107 transitions = sorted(transitions, cmp)

	108 # map transition keys to disjoint ranges

	109 disjoint_keys = {'value' : []}

	110 def f((key, state)):

	111 ranges = list(key.range_iter())

	112 disjoint_keys['value'] += ranges

	113 return (ranges, state)

	114 transitions = map(f, transitions)

	115 disjoint_keys = sorted(disjoint_keys['value'], CodeGenerator.__range_cmp)

113 # dictionary object representing state	116 # dictionary object representing state

114 return {	117 return {

115 'node_number' : state.node_number(),	118 'node_number' : state.node_number(),

116 'original_node_number' : state.node_number(),	119 'original_node_number' : state.node_number(),

117 'transitions' : transitions,	120 'transitions' : transitions,

118 'disjoint_keys' : keys,	121 'switch_transitions' : [],

	122 'disjoint_keys' : disjoint_keys,

119 'inline' : None,	123 'inline' : None,

120 'depth' : None,	124 'depth' : None,

121 'action' : action,	125 'action' : action,

122 'entry_action' : entry_action,	126 'entry_action' : entry_action,

123 'match_action' : match_action,	127 'match_action' : match_action,

124 }	128 }

125	129

126 @staticmethod	130 @staticmethod

127 def __compute_depths(node_number, depth, id_map):	131 def __compute_depths(node_number, depth, id_map):

128 state = id_map[node_number]	132 state = id_map[node_number]

129 if state['depth'] != None:	133 if state['depth'] != None:

130 return	134 return

131 state['depth'] = depth	135 state['depth'] = depth

132 for (k, transition_node) in state['transitions']:	136 for (k, transition_node) in state['transitions']:

133 CodeGenerator.__compute_depths(transition_node, depth + 1, id_map)	137 CodeGenerator.__compute_depths(transition_node, depth + 1, id_map)

134	138

135 @staticmethod	139 @staticmethod

136 def __set_inline(count, state):	140 def __set_inline(count, state):

137 assert state['inline'] == None	141 assert state['inline'] == None

138 inline = False	142 inline = False

139 if not state['transitions']:	143 if not state['transitions']:

140 inline = True	144 inline = True

	145 # TODO add in 1 or 2 element if blocks

141 state['inline'] = inline	146 state['inline'] = inline

142 return count + 1 if inline else count	147 return count + 1 if inline else count

143	148

	149 @staticmethod

	150 def __split_transitions(split_count, state):

	151 assert not state['switch_transitions']

	152 (class_keys, distinct_keys, ranges) = (0, 0, 0)

	153 for (t, r) in state['disjoint_keys']:

	154 if t == 'CLASS':

	155 class_keys += 1

	156 elif t == 'LATIN_1':

	157 distinct_keys += r[1] - r[0] + 1

	158 ranges += 1

	159 else:

	160 raise Exception()

	161 if distinct_keys <= 7 or float(distinct_keys)/float(ranges) >= 7.0:

	162 return split_count

	163 switch_transitions = []

	164 if_transitions = []

	165 for (ranges, node_id) in state['transitions']:

	166 i = []

	167 for r in ranges:

	168 if r[0] == 'CLASS':

	169 i.append(r)

	170 else:

	171 for x in range(r[1][0], r[1][1] + 1):

	172 switch_transitions.append((x, node_id))

	173 if i:

	174 if_transitions.append((i, node_id))

	175 state['transitions'] = if_transitions

	176 state['switch_transitions'] = switch_transitions

	177 return split_count + 1

	178

144 def __canonicalize_traversal(self):	179 def __canonicalize_traversal(self):

145 dfa_states = []	180 dfa_states = []

146 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))	181 self.__dfa.visit_all_states(lambda state, acc: dfa_states.append(state))

147 dfa_states = map(CodeGenerator.__transform_state, dfa_states)	182 dfa_states = map(CodeGenerator.__transform_state, dfa_states)

148 id_map = {x['original_node_number'] : x for x in dfa_states}	183 id_map = {x['original_node_number'] : x for x in dfa_states}

149 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map)	184 CodeGenerator.__compute_depths(self.__start_node_number, 1, id_map)

150 dfa_states = sorted(dfa_states, cmp=self.__state_cmp)	185 dfa_states = sorted(dfa_states, cmp=self.__state_cmp)

151 # remap all node numbers	186 # remap all node numbers

152 for i, state in enumerate(dfa_states):	187 for i, state in enumerate(dfa_states):

153 state['node_number'] = i	188 state['node_number'] = i

154 def f((key, original_node_number)):	189 def f((key, original_node_number)):

155 return (key, id_map[original_node_number]['node_number'])	190 return (key, id_map[original_node_number]['node_number'])

156 for state in dfa_states:	191 for state in dfa_states:

157 state['transitions'] = map(f, state['transitions'])	192 state['transitions'] = map(f, state['transitions'])

158 assert id_map[self.__start_node_number]['node_number'] == 0	193 assert id_map[self.__start_node_number]['node_number'] == 0

159 assert len(dfa_states) == self.__dfa.node_count()	194 assert len(dfa_states) == self.__dfa.node_count()

160 # set nodes to inline	195 # set nodes to inline

161 if self.__inline:	196 if self.__inline:

162 inlined = reduce(CodeGenerator.__set_inline, dfa_states, 0)	197 inlined = reduce(CodeGenerator.__set_inline, dfa_states, 0)

163 if self.__log:	198 if self.__log:

164 print "inlined %s" % inlined	199 print "inlined %s" % inlined

165 elif self.__log:	200 elif self.__log:

166 print "no inlining"	201 print "no inlining"

	202 # split transitions

	203 if self.__switching:

	204 switched = reduce(CodeGenerator.__split_transitions, dfa_states, 0)

	205 if self.__log:

	206 print "switched states %s" % inlined

	207 elif self.__log:

	208 print "no switching"

	209 # store states

167 self.__dfa_states = dfa_states	210 self.__dfa_states = dfa_states

168	211

169 def process(self):	212 def process(self):

170	213

171 self.__canonicalize_traversal()	214 self.__canonicalize_traversal()

172	215

173 dfa_states = self.__dfa_states	216 dfa_states = self.__dfa_states

174	217

175 default_action = self.__default_action	218 default_action = self.__default_action

176 assert(default_action and default_action.match_action())	219 assert(default_action and default_action.match_action())

177 default_action = default_action.match_action()	220 default_action = default_action.match_action()

178	221

179 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))	222 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

180 template_env = jinja2.Environment(	223 template_env = jinja2.Environment(

181 loader = jinja2.PackageLoader('lexer_generator', '.'),	224 loader = jinja2.PackageLoader('lexer_generator', '.'),

182 undefined = jinja2.StrictUndefined)	225 undefined = jinja2.StrictUndefined)

183 template = template_env.get_template('code_generator.jinja')	226 template = template_env.get_template('code_generator.jinja')

184	227

185 return template.render(	228 return template.render(

186 start_node_number = 0,	229 start_node_number = 0,

187 debug_print = self.__debug_print,	230 debug_print = self.__debug_print,

188 default_action = default_action,	231 default_action = default_action,

189 dfa_states = dfa_states)	232 dfa_states = dfa_states)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.jinja ('k') | no next file » | no next file with comments »