tools/lexer_generator/dfa_optimizer.py - Issue 171713005: Experimental parser: add backtracking

Side by Side Diff: tools/lexer_generator/dfa_optimizer.py

Issue 171713005: Experimental parser: add backtracking (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
131 # \| v	131 # \| v

132 # \--[A-Z]---> [s3]	132 # \--[A-Z]---> [s3]

133 #	133 #

134 # We can add a goto from s1 to s2 (after checking for "a"), because the	134 # We can add a goto from s1 to s2 (after checking for "a"), because the

135 # transitions match: with [b-z], both states transition to s2, and with [A-Z],	135 # transitions match: with [b-z], both states transition to s2, and with [A-Z],

136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the	136 # both states transition to s3. Note that [a-z] is a superkey of [b-z], and the

137 # transition from s2 is defined for the superkey, but that is ok.	137 # transition from s2 is defined for the superkey, but that is ok.

138	138

139 class DfaOptimizer(object):	139 class DfaOptimizer(object):

140	140

	141 @staticmethod

	142 def optimize(dfa, log):

	143 return DfaOptimizer(dfa, log).__replace_tokens_with_gotos()

	144

141 def __init__(self, dfa, log):	145 def __init__(self, dfa, log):

142 self.__dfa = dfa	146 self.__dfa = dfa

143 self.__log = log	147 self.__log = log

144	148

145 @staticmethod	149 @staticmethod

146 def __transistions_match(encoding, incoming_key, incoming_state, state):	150 def __transistions_match(encoding, incoming_key, incoming_state, state):

147 keys = set(state.key_iter())	151 keys = set(state.key_iter())

148 keys.add(incoming_key)	152 keys.add(incoming_key)

149 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)	153 disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)

150 for key in disjoint_keys:	154 for key in disjoint_keys:

151 if not incoming_key.is_superset_of_key(key):	155 if not incoming_key.is_superset_of_key(key):

152 continue	156 continue

153 transition_state = state.transition_state_for_key(key)	157 transition_state = state.transition_state_for_key(key)

154 if incoming_state.transition_state_for_key(key) != transition_state:	158 if incoming_state.transition_state_for_key(key) != transition_state:

155 return False	159 return False

156 return True	160 return True

157	161

158 @staticmethod	162 @staticmethod

159 def __build_incoming_transitions(dfa):

160 incoming_transitions = {}

161 def f(state, visitor_state):

162 for key, transition_state in state.key_state_iter():

163 if not transition_state in incoming_transitions:

164 incoming_transitions[transition_state] = []

165 incoming_transitions[transition_state].append((key, state))

166 dfa.visit_all_states(f)

167 return incoming_transitions

168

169 @staticmethod

170 def __build_replacement_map(dfa):	163 def __build_replacement_map(dfa):

171 replacements = {}	164 replacements = {}

172 encoding = dfa.encoding()	165 encoding = dfa.encoding()

173 incoming_transitions = DfaOptimizer.__build_incoming_transitions(dfa)	166 incoming_transitions = dfa.build_incoming_transitions_map()

174 replacement_targets = set([])	167 replacement_targets = set()

175 # TODO(dcarney): this should be an ordered walk	168 # TODO(dcarney): this should be an ordered walk

176 for state, incoming in incoming_transitions.items():	169 for state, incoming in incoming_transitions.items():

177 if len(incoming) < 10:	170 if len(incoming) < 10:

178 continue	171 continue

179 # the states action will be replaced, so we just don't optimize if there	172 # the states action will be replaced, so we just don't optimize if there

180 # is one	173 # is one

181 if state.action(): # TODO(dcarney): allow this via action chaining	174 if state.action(): # TODO(dcarney): allow this via action chaining

182 continue	175 continue

183 # We only perform this optimization on 'token' actions	176 # We only perform this optimization on 'token' actions

184 match_action = state.match_action()	177 match_action = state.match_action()

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
271 counters[new_action.name()] += 1	264 counters[new_action.name()] += 1

272 # install new match state	265 # install new match state

273 match_state_id = DfaOptimizer.__new_state_name(state) + '_match'	266 match_state_id = DfaOptimizer.__new_state_name(state) + '_match'

274 new_state['transitions'][TransitionKey.omega()] = match_state_id	267 new_state['transitions'][TransitionKey.omega()] = match_state_id

275 match_state = DfaOptimizer.__new_state(False, new_action)	268 match_state = DfaOptimizer.__new_state(False, new_action)

276 match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail'	269 match_state['transitions'][TransitionKey.omega()] = jump_target + '_tail'

277 return match_state	270 return match_state

278	271

279 @staticmethod	272 @staticmethod

280 def __remove_orphaned_states(states, orphanable, start_name):	273 def __remove_orphaned_states(states, orphanable, start_name):

281 seen = set([])	274 seen = set()

282 def visit(state_id, acc):	275 def visit(state_id, acc):

283 seen.add(state_id)	276 seen.add(state_id)

284 def state_iter(state_id):	277 def state_iter(state_id):

285 return states[state_id]['transitions'].itervalues()	278 return states[state_id]['transitions'].itervalues()

286 Automaton.visit_states(set([start_name]), visit, state_iter=state_iter)	279 Automaton.visit_states(set([start_name]), visit, state_iter=state_iter)

287 def f(name, state):	280 def f(name, state):

288 assert name in seen or name in orphanable	281 assert name in seen or name in orphanable

289 return name in seen	282 return name in seen

290 return {k: v for k, v in states.iteritems() if f(k, v)}	283 return {k: v for k, v in states.iteritems() if f(k, v)}

291	284

(...skipping 11 matching lines...) Expand all Loading...
303 name = DfaOptimizer.__new_state_name	296 name = DfaOptimizer.__new_state_name

304 counters = {	297 counters = {

305 'removals' : 0,	298 'removals' : 0,

306 'goto_start' : 0,	299 'goto_start' : 0,

307 'store_token' : 0,	300 'store_token' : 0,

308 'store_harmony_token' : 0,	301 'store_harmony_token' : 0,

309 'split_state' : 0	302 'split_state' : 0

310 }	303 }

311 # map the old state to the new state, with fewer transitions and	304 # map the old state to the new state, with fewer transitions and

312 # goto actions	305 # goto actions

313 orphanable = set([])	306 orphanable = set()

314 def replace_state(state, acc):	307 def replace_state(state, acc):

315 if state in replacements:	308 if state in replacements:

316 target_state = replacements[state][0]	309 target_state = replacements[state][0]

317 deletions = replacements[state][1]	310 deletions = replacements[state][1]

318 else:	311 else:

319 deletions = {}	312 deletions = {}

320 # this is a replacement target, so we split the state	313 # this is a replacement target, so we split the state

321 if state in replacement_targets:	314 if state in replacement_targets:

322 assert not deletions	315 assert not deletions

323 (head, tail, match) = DfaOptimizer.__split_target_state(state)	316 (head, tail, match) = DfaOptimizer.__split_target_state(state)

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
363 # dump stats	356 # dump stats

364 if self.__log:	357 if self.__log:

365 print 'goto_start inserted %s' % counters['goto_start']	358 print 'goto_start inserted %s' % counters['goto_start']

366 print 'store_token inserted %s' % (	359 print 'store_token inserted %s' % (

367 counters['store_token'])	360 counters['store_token'])

368 print 'store_harmony_token %s' % (	361 print 'store_harmony_token %s' % (

369 counters['store_harmony_token'])	362 counters['store_harmony_token'])

370 print 'transitions removed %s' % counters['removals']	363 print 'transitions removed %s' % counters['removals']

371 print 'states split %s' % counters['split_state']	364 print 'states split %s' % counters['split_state']

372 return Dfa(self.__dfa.encoding(), start_name, states)	365 return Dfa(self.__dfa.encoding(), start_name, states)

373

374 @staticmethod

375 def optimize(dfa, log):

376 optimizer = DfaOptimizer(dfa, log)

377 return optimizer.__replace_tokens_with_gotos()

OLD	NEW

« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/dot_utilities.py » ('j') | no next file with comments »