tools/lexer_generator/dfa_optimizer.py - Issue 160443002: Experimental parser: remove match actions

Side by Side Diff: tools/lexer_generator/dfa_optimizer.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 from transition_keys import TransitionKey	28 from transition_keys import TransitionKey

29 from automaton import Term, Action	29 from automaton import Term, Action, Automaton

30 from dfa import Dfa	30 from dfa import Dfa

31	31

32 # --- Optimization: Replacing tokens with gotos ---	32 # --- Optimization: Replacing tokens with gotos ---

33 #	33 #

34 # This optimization reduces the code size for grammars which have constructs	34 # This optimization reduces the code size for grammars which have constructs

35 # similar to keywords and identifiers. Consider the following grammar:	35 # similar to keywords and identifiers. Consider the following grammar:

36 #	36 #

37 # "baz" <\|token(BAZ)\|>	37 # "baz" <\|token(BAZ)\|>

38 # "bazz" <\|token(BAZZ)\|>	38 # "bazz" <\|token(BAZZ)\|>

39 #	39 #

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the	136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the

137 # transition from s2 is defined for the superkey, but that is ok.	137 # transition from s2 is defined for the superkey, but that is ok.

138	138

139 class DfaOptimizer(object):	139 class DfaOptimizer(object):

140	140

141 def __init__(self, dfa, log):	141 def __init__(self, dfa, log):

142 self.__dfa = dfa	142 self.__dfa = dfa

143 self.__log = log	143 self.__log = log

144	144

145 @staticmethod	145 @staticmethod

146 def transistions_match(encoding, incoming_key, incoming_state, state):	146 def __transistions_match(encoding, incoming_key, incoming_state, state):

147 keys = set(state.key_iter())	147 keys = set(state.key_iter())

148 keys.add(incoming_key)	148 keys.add(incoming_key)

149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)	149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)

150 for key in disjoint_keys:	150 for key in disjoint_keys:

151 if not incoming_key.is_superset_of_key(key):	151 if not incoming_key.is_superset_of_key(key):

152 continue	152 continue

153 transition_state = state.transition_state_for_key(key)	153 transition_state = state.transition_state_for_key(key)

154 if incoming_state.transition_state_for_key(key) != transition_state:	154 if incoming_state.transition_state_for_key(key) != transition_state:

155 return False	155 return False

156 return True	156 return True

157	157

158 def replace_tokens_with_gotos(self):	158 @staticmethod

	159 def __build_incoming_transitions(dfa):

	160 incoming_transitions = {}

	161 def f(state, visitor_state):

	162 for key, transition_state in state.key_state_iter():

	163 if not transition_state in incoming_transitions:

	164 incoming_transitions[transition_state] = []

	165 incoming_transitions[transition_state].append((key, state))

	166 dfa.visit_all_states(f)

	167 return incoming_transitions

	168

	169 @staticmethod

	170 def __match_action(state):

	171 state = state.omega_transition()

	172 state = state if state and state.transition_count() == 0 else None

	173 return Action.empty_action() if not state else state.action()

	174

	175 @staticmethod

	176 def __build_replacement_map(dfa):

	177 replacements = {}

	178 encoding = dfa.encoding()

	179 incoming_transitions = DfaOptimizer.__build_incoming_transitions(dfa)

	180 replacement_targets = set([])

	181 # TODO(dcarney): this should be an ordered walk

	182 for state, incoming in incoming_transitions.items():

	183 if len(incoming) < 10:

	184 continue

	185 # the states action will be replaced, so we just don't optimize if there

	186 # is one

	187 if state.action(): # TODO(dcarney): allow this via action chaining

	188 continue

	189 # We only perform this optimization on 'token' actions

	190 match_action = DfaOptimizer.__match_action(state)

	191 if not match_action.name() == 'token':

	192 continue

	193 assert state.omega_transition() in dfa.terminal_set()

	194 # we can drop incoming edges for states with a match action

	195 # of either 'token' or 'harmony_token'

	196 def is_replacement_candidate(state):

	197 action = DfaOptimizer.__match_action(state)

	198 return action.name() == 'token' or action.name() == 'harmony_token'

	199 for (incoming_key, incoming_state) in incoming:

	200 # check to see if incoming edges are matched by an outgoing edge

	201 if (incoming_state == state or # drop self edges

	202 incoming_key == TransitionKey.omega() or

	203 not is_replacement_candidate(incoming_state) or

	204 not DfaOptimizer.__transistions_match(

	205 encoding, incoming_key, incoming_state, state)):

	206 continue

	207 assert (not incoming_state in replacements or

	208 replacements[incoming_state][0] == state)

	209 # set this transition as to be removed

	210 if not incoming_state in replacements:

	211 replacements[incoming_state] = (state, set())

	212 replacements[incoming_state][1].add(incoming_key)

	213 replacement_targets.add(state)

	214 # now see if we can gather other transistions into the goto

	215 for key in incoming_state.key_iter():

	216 if (key != TransitionKey.omega() and

	217 key not in replacements[incoming_state][1] and

	218 DfaOptimizer.__transistions_match(

	219 encoding, key, incoming_state, state)):

	220 # this transition can be removed

	221 replacements[incoming_state][1].add(key)

	222 return (replacement_targets, replacements)

	223

	224 @staticmethod

	225 def __new_state(is_terminal, action):

	226 return {

	227 'transitions' : {},

	228 'terminal' : is_terminal,

	229 'action' : action,

	230 }

	231

	232 @staticmethod

	233 def __new_state_name(state):

	234 return str(state.node_number())

	235

	236 @staticmethod

	237 def __split_target_state(state):

	238 old_name = DfaOptimizer.__new_state_name(state)

	239 old_match_action = DfaOptimizer.__match_action(state)

	240 assert old_match_action.name() == 'token', 'unimplemented'

	241 precedence = old_match_action.precedence()

	242 match_action = Action(Term('do_stored_token'), precedence)

	243 head_action = Action(

	244 Term('store_token', old_match_action.term().args()[0]), precedence)

	245 tail_action = Action(Term('no_op'), precedence)

	246 head = DfaOptimizer.__new_state(False, head_action)

	247 tail = DfaOptimizer.__new_state(False, tail_action)

	248 match = DfaOptimizer.__new_state(True, match_action)

	249 head['transitions'][TransitionKey.omega()] = old_name + '_tail'

	250 tail['transitions'][TransitionKey.omega()] = old_name + '_match'

	251 return (head, tail, match)

	252

	253 # generate a store action to replace an existing action

	254 @staticmethod

	255 def __replacement_action(state, transition_state):

	256 old_action = DfaOptimizer.__match_action(state)

	257 assert old_action

	258 transition_action = DfaOptimizer.__match_action(transition_state).term()

	259 if old_action.term() == transition_action:

	260 # no need to store token

	261 return Action.empty_action()

	262 new_name = 'store_' + old_action.name()

	263 return Action(

	264 Term(new_name, *old_action.term().args()), old_action.precedence())

	265

	266 @staticmethod

	267 def __apply_jump(counters, state, new_state, target_state):

	268 # generate a new action for the new omega transition

	269 new_action = DfaOptimizer.__replacement_action(state, target_state)

	270 # determine new jump target

	271 jump_target = DfaOptimizer.__new_state_name(target_state)

	272 if not new_action:

	273 # just jump to entry of target_state, which will be split

	274 new_state['transitions'][TransitionKey.omega()] = jump_target

	275 counters['goto_start'] += 1

	276 return None

	277 counters[new_action.name()] += 1

	278 # install new match state

	279 match_state_id = DfaOptimizer.__new_state_name(state) + '_match'

	280 new_state['transitions'][TransitionKey.omega()] = match_state_id

	281 match_state = DfaOptimizer.__new_state(False, new_action)

	282 match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail'

	283 return match_state

	284

	285 @staticmethod

	286 def __remove_orphaned_states(states, orphanable, start_name):

	287 seen = set([])

	288 def visit(state_id, acc):

	289 seen.add(state_id)

	290 def state_iter(state_id):

	291 return states[state_id]['transitions'].itervalues()

	292 Automaton.visit_states(set([start_name]), visit, state_iter=state_iter)

	293 def f(name, state):

	294 assert name in seen or name in orphanable

	295 return name in seen

	296 return {k: v for k, v in states.iteritems() if f(k, v)}

	297

	298 def __replace_tokens_with_gotos(self):

159 '''replace states with no entry action and match action of type 'token(X)'	299 '''replace states with no entry action and match action of type 'token(X)'

160 with new states with entry action store_token(X) and match action	300 with new states with entry action store_token(X) and match action

161 goto(state_id) which has (far) fewer transitions'''	301 goto(state_id) which has (far) fewer transitions'''

162	302

163 dfa = self.__dfa	303 dfa = self.__dfa

164 encoding = dfa.encoding()	304 (replacement_targets, replacements) = self.__build_replacement_map(dfa)

165	305 if not replacement_targets:

166 incoming_transitions = {}	306 return dfa

167 def build_incoming_transitions(state, visitor_state):

168 for key, transition_state in state.key_state_iter():

169 if not transition_state in incoming_transitions:

170 incoming_transitions[transition_state] = []

171 incoming_transitions[transition_state].append((key,state))

172 dfa.visit_all_states(build_incoming_transitions)

173

174 def is_replacement_candidate(state):

175 return (state.action().match_action().name() == 'token' or

176 state.action().match_action().name() == 'harmony_token')

177

178 replacements = {}

179 for state, incoming in incoming_transitions.items():

180 if len(incoming) < 10:

181 continue

182 if not is_replacement_candidate(state):

183 continue

184 # check to see if incoming edges are matched by an outgoing edge

185 def match_filter((incoming_key, incoming_state)):

186 return (incoming_state != state and # drop self transitions

187 is_replacement_candidate(incoming_state) and

188 self.transistions_match(

189 encoding, incoming_key, incoming_state, state))

190 for incoming_key_state in incoming:

191 if not match_filter(incoming_key_state):

192 continue

193 (incoming_key, incoming_state) = incoming_key_state

194 # set this transition as to be replaced

195 if not incoming_state in replacements:

196 replacements[incoming_state] = {}

197 replacements[incoming_state][incoming_key] = state

198 # now see if we can gather other transistions into the goto

199 for key in incoming_state.key_iter():

200 if key == incoming_key:

201 continue

202 if not self.transistions_match(

203 encoding, key, incoming_state, state):

204 continue

205 # this transition can be removed

206 replacements[incoming_state][key] = None

207 if not replacements:

208 return

209 # perform replacement	307 # perform replacement

210 states = {}	308 states = {}

211 name = lambda state : str(state.node_number())	309 name = DfaOptimizer.__new_state_name

212 counters = {	310 counters = {

213 'removals' : 0,	311 'removals' : 0,

214 'goto_start' : 0,	312 'goto_start' : 0,

215 'store_token_and_goto' : 0,	313 'store_token' : 0,

216 'store_harmony_token_and_goto' : 0,	314 'store_harmony_token' : 0,

	315 'split_state' : 0

217 }	316 }

218 store_states = set([])

219 # generate a store action to replace an existing action

220 def replacement_action(old_action, transition_state):

221 assert old_action.match_action()

222 state_id = name(transition_state)

223 if old_action.match_action().name() == 'token':

224 old_token = old_action.match_action().args()[0]

225 if (transition_state.action().match_action().name() == 'token' and

226 transition_state.action().match_action().args()[0] == old_token):

227 # no need to store token

228 match_action = Term('goto_start', state_id)

229 counters['goto_start'] += 1

230 else:

231 counters['store_token_and_goto'] += 1

232 match_action = Term('store_token_and_goto', old_token, state_id)

233 elif old_action.match_action().name() == 'harmony_token':

234 new_args = list(old_action.match_action().args()) + [state_id]

235 match_action = Term('store_harmony_token_and_goto', *new_args)

236 counters['store_harmony_token_and_goto'] += 1

237 else:

238 raise Exception(old_action.match_action())

239 return Action(old_action.entry_action(),

240 match_action,

241 old_action.precedence())

242 # map the old state to the new state, with fewer transitions and	317 # map the old state to the new state, with fewer transitions and

243 # goto actions	318 # goto actions

	319 orphanable = set([])

244 def replace_state(state, acc):	320 def replace_state(state, acc):

245 new_state = {	321 if state in replacements:

246 'transitions' : {},	322 target_state = replacements[state][0]

247 'terminal' : state in self.__dfa.terminal_set(),	323 deletions = replacements[state][1]

248 'action' : state.action(),	324 else:

249 }	325 deletions = {}

250 states[name(state)] = new_state	326 # this is a replacement target, so we split the state

251 state_replacements = replacements[state] if state in replacements else {}	327 if state in replacement_targets:

	328 assert not deletions

	329 (head, tail, match) = DfaOptimizer.__split_target_state(state)

	330 new_state = tail

	331 states[name(state)] = head

	332 states[name(state) + '_tail'] = tail

	333 states[name(state) + '_match'] = match

	334 counters['split_state'] += 1

	335 else:

	336 new_state = self.__new_state(state in self.__dfa.terminal_set(),

	337 state.action())

	338 states[name(state)] = new_state

	339 assert not TransitionKey.omega() in deletions

252 for (transition_key, transition_state) in state.key_state_iter():	340 for (transition_key, transition_state) in state.key_state_iter():

253 if not transition_key in state_replacements:	341 # just copy these transitions

	342 if transition_key != TransitionKey.omega():

	343 if not transition_key in deletions:

	344 new_state['transitions'][transition_key] = name(transition_state)

	345 continue

	346 else:

	347 # drop these transitions

	348 deletions.remove(transition_key)

	349 counters['removals'] += 1

	350 continue

	351 if transition_key in new_state['transitions']:

	352 # this is a split state, omega has already been assigned

	353 # mark old match state as orphanable

	354 orphanable.add(name(transition_state))

	355 elif not state in replacements:

	356 # no replacement

254 new_state['transitions'][transition_key] = name(transition_state)	357 new_state['transitions'][transition_key] = name(transition_state)

255 continue	358 else:

256 replacement = state_replacements[transition_key]	359 # mark old omega state as orphanable

257 del state_replacements[transition_key]	360 orphanable.add(name(transition_state))

258 # just drop the transition	361 match_state = DfaOptimizer.__apply_jump(

259 if replacement == None:	362 counters, state, new_state, target_state)

260 counters['removals'] += 1	363 if match_state:

261 continue	364 states[name(state) + '_match'] = match_state

262 assert replacement == transition_state	365 assert not deletions

263 # do goto replacement

264 new_state['action'] = replacement_action(state.action(), replacement)

265 # will need to patch up transition_state

266 store_states.add(name(transition_state))

267 assert not state_replacements

268 self.__dfa.visit_all_states(replace_state)	366 self.__dfa.visit_all_states(replace_state)

269 # now patch up all states with stores

270 start_name = name(self.__dfa.start_state())	367 start_name = name(self.__dfa.start_state())

271 for state_id in store_states:	368 states = self.__remove_orphaned_states(states, orphanable, start_name)

272 old_action = states[state_id]['action']	369 # dump stats

273 assert not old_action.entry_action()

274 assert old_action.match_action().name() == 'token', 'unimplemented'

275 entry_action = Term('store_token', old_action.match_action().args()[0])

276 match_action = Term('do_stored_token', state_id)

277 precedence = old_action.precedence()

278 states[state_id]['action'] = Action(

279 entry_action, match_action, precedence)

280 # The state might be only reachable via gotos; make sure it's connected in

281 # the state graph by adding a bogus transition from the start state. This

282 # transition doens't match any character.

283 states[start_name]['transitions'][

284 TransitionKey.unique('no_match')] = state_id

285

286 if self.__log:	370 if self.__log:

287 print 'goto_start inserted %s' % counters['goto_start']	371 print 'goto_start inserted %s' % counters['goto_start']

288 print 'store_token_and_goto inserted %s' % (	372 print 'store_token inserted %s' % (

289 counters['store_token_and_goto'])	373 counters['store_token'])

290 print 'store_harmony_token_and_goto %s' % (	374 print 'store_harmony_token %s' % (

291 counters['store_harmony_token_and_goto'])	375 counters['store_harmony_token'])

292 print 'transitions removed %s' % counters['removals']	376 print 'transitions removed %s' % counters['removals']

293 print 'do_stored_token actions added %s' % len(store_states)	377 print 'states split %s' % counters['split_state']

294 self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)	378 return Dfa(self.__dfa.encoding(), start_name, states)

295	379

296 @staticmethod	380 @staticmethod

297 def optimize(dfa, log):	381 def optimize(dfa, log):

298 optimizer = DfaOptimizer(dfa, log)	382 optimizer = DfaOptimizer(dfa, log)

299 optimizer.replace_tokens_with_gotos()	383 return optimizer.__replace_tokens_with_gotos()

300 return optimizer.__dfa

OLD	NEW

« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »