tools/lexer_generator/dfa.py - Issue 78053002: Experimental lexer generator: rename_things(Dfa minimization) += comments part 2

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 78053002: Experimental lexer generator: rename_things(Dfa minimization) += comments part 2 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: new try Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
151 class DfaMinimizer:	151 class DfaMinimizer:

152	152

153 __verify = True	153 __verify = True

154	154

155 @staticmethod	155 @staticmethod

156 def set_verify(verify):	156 def set_verify(verify):

157 DfaMinimizer.__verify = verify	157 DfaMinimizer.__verify = verify

158	158

159 def __init__(self, dfa):	159 def __init__(self, dfa):

160 self.__dfa = dfa	160 self.__dfa = dfa

161 self.__id_map = None	161 self.__id_to_state = None

162 self.__reverse_id_map = None	162 self.__state_to_id = None

163 self.__alphabet = None	163 self.__alphabet = None

164	164

165 def __create_initial_partitions(self):	165 def __create_initial_partitions(self):

166 assert not self.__id_map	166 assert not self.__id_to_state

167 assert not self.__reverse_id_map	167 assert not self.__state_to_id

168 assert not self.__alphabet	168 assert not self.__alphabet

169	169

170 # For the minimization, we associate each state with an ID. A set of states	170 # For the minimization, we associate each state with an ID. A set of states

171 # is represented as a set of state IDs.	171 # is represented as a set of state IDs.

172 id_map = {}	172 id_to_state = {}

173	173

174 # First we partition the states into the following groups:	174 # First we partition the states into the following groups:

175 # - terminal states without action	175 # - terminal states without action

176 # - nonterminal states without action	176 # - nonterminal states without action

177 # - one group per action: terminal states which have the action	177 # - one group per action: terminal states which have the action

178 # - one group per action: nonterminal states which have the action	178 # - one group per action: nonterminal states which have the action

179 # These are the keys of initial_partitions. The values are lists of state	179 # These are the keys of initial_partitions. The values are lists of state

180 # IDs.	180 # IDs.

181 initial_partitions = {}	181 initial_partitions = {}

182 terminal_set = self.__dfa.terminal_set()	182 terminal_set = self.__dfa.terminal_set()

183 all_keys = [] # Will contain all TransitionKeys in the dfa.	183 all_keys = [] # Will contain all TransitionKeys in the dfa.

184	184

185 # f fills in initial_partitions, id_map and all_keys.	185 # f fills in initial_partitions, id_to_state and all_keys.

186 def f(state, visitor_state):	186 def f(state, visitor_state):

187 state_id = len(id_map)	187 state_id = len(id_to_state)

188 id_map[state_id] = state	188 id_to_state[state_id] = state

189 action = state.action()	189 action = state.action()

190 all_keys.append(state.key_iter())	190 all_keys.append(state.key_iter())

191 if action:	191 if action:

192 if state not in terminal_set:	192 if state not in terminal_set:

193 assert action.entry_action()	193 assert action.entry_action()

194 key = ("nonterminal action", action)	194 key = ("nonterminal action", action)

195 else:	195 else:

196 key = ("terminal action", action)	196 key = ("terminal action", action)

197 elif state in terminal_set:	197 elif state in terminal_set:

198 key = ("terminal set",)	198 key = ("terminal set",)

199 else:	199 else:

200 key = ("nonterminal set",)	200 key = ("nonterminal set",)

201 if not key in initial_partitions:	201 if not key in initial_partitions:

202 initial_partitions[key] = set()	202 initial_partitions[key] = set()

203 initial_partitions[key].add(state_id)	203 initial_partitions[key].add(state_id)

204 self.__dfa.visit_all_states(f)	204 self.__dfa.visit_all_states(f)

205 partitions = set()	205 partitions = set()

206 working_set = set()	206 working_set = set()

207	207

208 # Create StatePartition objects:	208 # Create StatePartition objects:

209 for k, p in initial_partitions.items():	209 for k, p in initial_partitions.items():

210 p = StatePartition(p)	210 p = StatePartition(p)

211 partitions.add(p)	211 partitions.add(p)

212 working_set.add(p)	212 working_set.add(p)

213 reverse_id_map = {v : k for k, v in id_map.items()}	213 state_to_id = {v : k for k, v in id_to_state.items()}

214 assert len(id_map) == len(reverse_id_map)	214 assert len(id_to_state) == len(state_to_id)

215 self.__reverse_id_map = reverse_id_map	215 self.__state_to_id = state_to_id

216 self.__id_map = id_map	216 self.__id_to_state = id_to_state

217	217

218 # The alphabet defines the TransitionKeys we need to check when dedicing if	218 # The alphabet defines the TransitionKeys we need to check when dedicing if

219 # to states of the dfa can be in the same partition. See the doc string of	219 # two states of the dfa can be in the same partition. See the doc string of

220 # TransitionKey.disjoint_keys.	220 # TransitionKey.disjoint_keys.

221	221

222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is	222 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is

223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with	223 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with

224 # TransitionKey [a-d], and state S3 has a transition to S4 with	224 # TransitionKey [a-d], and state S3 has a transition to S4 with

225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will	225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will

226 # become clear when we check the transition for TransitionKey [c-d] (S1 has	226 # become clear when we check the transition for TransitionKey [c-d] (S1 has

227 # a transition to S2, S3 has a transition to S4).	227 # a transition to S2, S3 has a transition to S4).

228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))	228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))

229	229

230 # For each state and each TransitionKey in the alphabet, find out which	230 # For each state and each TransitionKey in the alphabet, find out which

231 # transition we take from the state with the TransitionKey.	231 # transition we take from the state with the TransitionKey.

232 transitions = {}	232 transitions = {}

233 for state_id, state in id_map.items():	233 for state_id, state in id_to_state.items():

234 def f((key_id, key)):	234 def f((key_id, key)):

235 transition_state = state.transition_state_for_key(key)	235 transition_state = state.transition_state_for_key(key)

236 if transition_state:	236 if transition_state:

237 return reverse_id_map[transition_state]	237 return state_to_id[transition_state]

238 return None	238 return None

239 transitions[state_id] = map(f, enumerate(self.__alphabet))	239 transitions[state_id] = map(f, enumerate(self.__alphabet))

240 self.__transitions = transitions	240 self.__transitions = transitions

241 # verify created structures	241 # verify created structures

242 if self.__verify:	242 if self.__verify:

243 for partition in partitions:	243 for partition in partitions:

244 for state_id in partition:	244 for state_id in partition:

245 transition_state_array = transitions[state_id]	245 transition_state_array = transitions[state_id]

246 state = id_map[state_id]	246 state = id_to_state[state_id]

247 for key_id, key in enumerate(self.__alphabet):	247 for key_id, key in enumerate(self.__alphabet):

248 transition_state_id = transition_state_array[key_id]	248 transition_state_id = transition_state_array[key_id]

249 transition_state = state.transition_state_for_key(key)	249 transition_state = state.transition_state_for_key(key)

250 if not transition_state:	250 if not transition_state:

251 assert transition_state_id == None	251 assert transition_state_id == None

252 else:	252 else:

253 assert transition_state_id != None	253 assert transition_state_id != None

254 assert transition_state_id == reverse_id_map[transition_state]	254 assert transition_state_id == state_to_id[transition_state]

255 return (working_set, partitions)	255 return (working_set, partitions)

256	256

257 @staticmethod	257 @staticmethod

258 def __partition_count(partitions):	258 def __partition_count(partitions):

259 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))	259 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

260	260

261 def __merge_partitions(self, partitions):	261 def __create_states_from_partitions(self, partitions):

262 id_map = self.__id_map	262 '''Creates a new set of states where each state corresponds to a

263 reverse_id_map = self.__reverse_id_map	263 partition.'''

	264 id_to_state = self.__id_to_state

	265 state_to_id = self.__state_to_id

264 verify = self.__verify	266 verify = self.__verify

265 mapping = {}	267 partition_to_name = {}

266 name_map = {}	268 state_id_to_partition = {}

267 reverse_partition_map = {}	269

	270 # Fill in partition_to_name and state_id_to_partition.

268 for partition in partitions:	271 for partition in partitions:

269 name_map[partition] = str(partition)	272 partition_to_name[partition] = str(partition)

270 for state_id in partition:	273 for state_id in partition:

271 reverse_partition_map[state_id] = partition	274 state_id_to_partition[state_id] = partition

272 transitions = self.__transitions	275 transitions = self.__transitions

	276

	277 # Create nodes for partitions.

	278 partition_name_to_node = {}

273 for partition in partitions:	279 for partition in partitions:

274 state_ids = list(partition)	280 state_ids = list(partition)

	281 # state is a representative state for the partition, and state_id is it's

	282 # ID. To check the transitions between partitions, it's enough to check

	283 # transitions from the representative state. All other states will have

	284 # equivalent transitions, that is, transitions which transition into the

	285 # same partition.

275 state_id = state_ids[0]	286 state_id = state_ids[0]

276 state = id_map[state_id]	287 state = id_to_state[state_id]

277 node = {	288 node = {

278 'transitions' : {},	289 'transitions' : {},

279 'terminal' : state in self.__dfa.terminal_set(),	290 'terminal' : state in self.__dfa.terminal_set(),

280 'action' : state.action(),	291 'action' : state.action(),

281 }	292 }

282 mapping[str(partition)] = node	293 partition_name_to_node[str(partition)] = node

283 transition_array = transitions[state_id]	294 transition_key_to_state_id = transitions[state_id]

284 for key_id, key in enumerate(self.__alphabet):	295 for key_id, key in enumerate(self.__alphabet):

285 transition_id = transition_array[key_id]	296 transition_state_id = transition_key_to_state_id[key_id]

286 if transition_id == None:	297 if transition_state_id == None:

287 if verify:	298 if verify:

	299 # There is no transition for that key from state; check that no

	300 # other states in the partition have a transition with that key

	301 # either.

288 assert not state.transition_state_for_key(key)	302 assert not state.transition_state_for_key(key)

289 for s_id in state_ids:	303 for s_id in state_ids:

290 assert not id_map[s_id].transition_state_for_key(key)	304 assert not id_to_state[s_id].transition_state_for_key(key)

291 continue	305 continue

292 transition_partition = reverse_partition_map[transition_id]	306 transition_partition = state_id_to_partition[transition_state_id]

293 assert transition_partition	307 assert transition_partition

294 if verify:	308 if verify:

	309 # There is a transition for that key from state; check that all other

	310 # states in the partition have an equivalent transition (transition

	311 # into the same partition).

295 for s_id in state_ids:	312 for s_id in state_ids:

296 transition = id_map[s_id].transition_state_for_key(key)	313 transition = id_to_state[s_id].transition_state_for_key(key)

297 assert transition	314 assert transition

298 test_partition = reverse_partition_map[reverse_id_map[transition]]	315 test_partition = state_id_to_partition[state_to_id[transition]]

299 assert test_partition == transition_partition	316 assert test_partition == transition_partition

300 node['transitions'][key] = name_map[transition_partition]	317 # Record the transition between partitions.

301 start_id = reverse_id_map[self.__dfa.start_state()]	318 node['transitions'][key] = partition_to_name[transition_partition]

302 start_name = name_map[reverse_partition_map[start_id]]	319 start_id = state_to_id[self.__dfa.start_state()]

303 return (start_name, mapping)	320 start_name = partition_to_name[state_id_to_partition[start_id]]

	321 return (start_name, partition_name_to_node)

304	322

305 def minimize(self):	323 def minimize(self):

306 '''Minimize the dfa. For the general idea of minimizing automata, see	324 '''Minimize the dfa. For the general idea of minimizing automata, see

307 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take	325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take

308 account the actions associated to stages, i.e., we cannot merge two states	326 account the actions associated to stages, i.e., we cannot merge two states

309 which have different actions.'''	327 which have different actions.'''

310 (working_set, partitions) = self.__create_initial_partitions()	328 (working_set, partitions) = self.__create_initial_partitions()

311 node_count = self.__dfa.node_count()	329 node_count = self.__dfa.node_count()

312 id_map = self.__id_map	330 id_to_state = self.__id_to_state

313 reverse_id_map = self.__reverse_id_map	331 state_to_id = self.__state_to_id

	332 # transitions is a 2-dimensional array indexed by state_id and index of the

	333 # TransitionKey in the alphabet.

	334 # transitions[state_id][transition_key_ix] = transition_state_id

314 transitions = self.__transitions	335 transitions = self.__transitions

315 key_range = range(0, len(self.__alphabet))	336 key_range = range(0, len(self.__alphabet))

316 while working_set:	337 while working_set:

317 assert working_set <= partitions	338 assert working_set <= partitions

318 assert self.__partition_count(partitions) == node_count	339 assert self.__partition_count(partitions) == node_count

	340 # We split other partitions according to test_partition: If a partition

	341 # contains two states S1 and S2, such that S1 transitions to a state in

	342 # test_partition with a TransitionKey K, and S2 transitions to a state

	343 # not in test_partition with the same K, then S1 and S2 cannot be in the

	344 # same partition.

319 test_partition = iter(working_set).next()	345 test_partition = iter(working_set).next()

320 working_set.remove(test_partition)	346 working_set.remove(test_partition)

321 test_array = [False for x in range(0, len(id_map))]	347 state_in_test_partition = [False] * len(id_to_state)

322 for x in test_partition:	348 for state_id in test_partition:

323 test_array[x] = True	349 state_in_test_partition[state_id] = True

324 for key_index in key_range:	350 for key_index in key_range:

325 map_into_partition = set()	351 # states_transitioning_to_test_partition will contain the state_ids of

326 for state_id, transition_array in transitions.items():	352 # the states (across all partitions) which transition into

327 transition_id = transition_array[key_index]	353 # test_partition (any state within test_partition) with that key.

328 if transition_id != None and test_array[transition_id]:	354 states_transitioning_to_test_partition = set()

329 map_into_partition.add(state_id)	355 for state_id, transition_key_to_state_id in transitions.items():

330 if not map_into_partition:	356 transition_state_id = transition_key_to_state_id[key_index]

	357 if (transition_state_id and

	358 state_in_test_partition[transition_state_id]):

	359 states_transitioning_to_test_partition.add(state_id)

	360 if not states_transitioning_to_test_partition:

331 continue	361 continue

	362 # For each partition, we need to split it in two: {states which

	363 # transition into test_partition, states which don't}.

332 new_partitions = set()	364 new_partitions = set()

333 old_partitions = set()	365 old_partitions = set()

334 for p in partitions:	366 for p in partitions:

335 intersection = p.set().intersection(map_into_partition)	367 intersection = p.set().intersection(

	368 states_transitioning_to_test_partition)

336 if not intersection:	369 if not intersection:

337 continue	370 continue

338 difference = p.set().difference(map_into_partition)	371 difference = p.set().difference(

	372 states_transitioning_to_test_partition)

339 if not difference:	373 if not difference:

340 continue	374 continue

341 intersection = StatePartition(intersection)	375 intersection = StatePartition(intersection)

342 difference = StatePartition(difference)	376 difference = StatePartition(difference)

343 old_partitions.add(p)	377 old_partitions.add(p)

344 new_partitions.add(intersection)	378 new_partitions.add(intersection)

345 new_partitions.add(difference)	379 new_partitions.add(difference)

346 if p in working_set:	380 if p in working_set:

347 working_set.remove(p)	381 working_set.remove(p)

348 working_set.add(intersection)	382 working_set.add(intersection)

349 working_set.add(difference)	383 working_set.add(difference)

350 elif len(intersection) <= len(difference):	384 elif len(intersection) <= len(difference):

351 working_set.add(intersection)	385 working_set.add(intersection)

352 else:	386 else:

353 working_set.add(difference)	387 working_set.add(difference)

354 if old_partitions:	388 if old_partitions:

355 partitions -= old_partitions	389 partitions -= old_partitions

356 if new_partitions:	390 if new_partitions:

357 partitions \|= new_partitions	391 partitions \|= new_partitions

358 (start_name, mapping) = self.__merge_partitions(partitions)	392 (start_name, mapping) = self.__create_states_from_partitions(partitions)

359 return Dfa(start_name, mapping)	393 return Dfa(start_name, mapping)

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »