tools/lexer_generator/dfa.py - Issue 77733005: Experimental lexer generator: rename_things(Dfa minimization) += comments

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 77733005: Experimental lexer generator: rename_things(Dfa minimization) += comments (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: comments from dcarney Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
61 matches = reduce(f, self.__transitions.items(), set())	61 matches = reduce(f, self.__transitions.items(), set())

62 # Since this is a dfa, we should have at most one such state. Return it.	62 # Since this is a dfa, we should have at most one such state. Return it.

63 if not matches:	63 if not matches:

64 return None	64 return None

65 assert len(matches) == 1	65 assert len(matches) == 1

66 return iter(matches).next()	66 return iter(matches).next()

67	67

68 def next_state_with_char(self, value):	68 def next_state_with_char(self, value):

69 return self.__matches(lambda k, v : k.matches_char(v), value)	69 return self.__matches(lambda k, v : k.matches_char(v), value)

70	70

71 def key_matches(self, value):	71 def transition_state_with_key(self, key):

72 return self.__matches(lambda k, v : k.is_superset_of_key(v), value)	72 # 'key' can be a subkey of one of the TransitionKeys for this state, but it

	73 # cannot partially overlap a TransitionKey for this state.

	74 return self.__matches(lambda k, v : k.is_superset_of_key(v), key)

73	75

74 class Dfa(Automaton):	76 class Dfa(Automaton):

75	77

76 def __init__(self, start_name, mapping):	78 def __init__(self, start_name, mapping):

77 super(Dfa, self).__init__()	79 super(Dfa, self).__init__()

78 self.__terminal_set = set()	80 self.__terminal_set = set()

79 name_map = {}	81 name_map = {}

80 for name, node_data in mapping.items():	82 for name, node_data in mapping.items():

81 node = DfaState(name, node_data['action'])	83 node = DfaState(name, node_data['action'])

82 name_map[name] = node	84 name_map[name] = node

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
200 @staticmethod	202 @staticmethod

201 def set_verify(verify):	203 def set_verify(verify):

202 DfaMinimizer.__verify = verify	204 DfaMinimizer.__verify = verify

203	205

204 def __init__(self, dfa):	206 def __init__(self, dfa):

205 self.__dfa = dfa	207 self.__dfa = dfa

206 self.__id_map = None	208 self.__id_map = None

207 self.__reverse_id_map = None	209 self.__reverse_id_map = None

208 self.__alphabet = None	210 self.__alphabet = None

209	211

210 def __partition(self):	212 def __create_initial_partitions(self):

211 assert not self.__id_map	213 assert not self.__id_map

212 assert not self.__reverse_id_map	214 assert not self.__reverse_id_map

213 assert not self.__alphabet	215 assert not self.__alphabet

214 action_map = {}	216

	217 # For the minimization, we associate each state with an ID. A set of states

	218 # is represented as a set of state IDs.

215 id_map = {}	219 id_map = {}

	220

	221 # First we partition the states into the following groups:

	222 # - terminal states without action

	223 # - nonterminal states without action

	224 # - one group per action: terminal states which have the action

	225 # - one group per action: nonterminal states which have the action

	226 # These are the keys of initial_partitions. The values are lists of state

	227 # IDs.

	228 initial_partitions = {}

216 terminal_set = self.__dfa.terminal_set()	229 terminal_set = self.__dfa.terminal_set()

217 all_keys = []	230 all_keys = [] # Will contain all TransitionKeys in the dfa.

	231

	232 # f fills in initial_partitions, id_map and all_keys.

218 def f(state, visitor_state):	233 def f(state, visitor_state):

219 state_id = len(id_map)	234 state_id = len(id_map)

220 id_map[state_id] = state	235 id_map[state_id] = state

221 action = state.action()	236 action = state.action()

222 all_keys.append(state.key_iter())	237 all_keys.append(state.key_iter())

223 if action:	238 if action:

224 if state not in terminal_set:	239 if state not in terminal_set:

225 assert action.entry_action()	240 assert action.entry_action()

226 key = ("nonterminal action", action)	241 key = ("nonterminal action", action)

227 else:	242 else:

228 key = ("terminal action", action)	243 key = ("terminal action", action)

229 elif state in terminal_set:	244 elif state in terminal_set:

230 key = ("terminal set",)	245 key = ("terminal set",)

231 else:	246 else:

232 key = ("nonterminal set",)	247 key = ("nonterminal set",)

233 if not key in action_map:	248 if not key in initial_partitions:

234 action_map[key] = set()	249 initial_partitions[key] = set()

235 action_map[key].add(state_id)	250 initial_partitions[key].add(state_id)

236 self.__dfa.visit_all_states(f)	251 self.__dfa.visit_all_states(f)

237 partitions = set()	252 partitions = set()

238 working_set = set()	253 working_set = set()

239 for k, p in action_map.items():	254

	255 # Create StatePartition objects:

	256 for k, p in initial_partitions.items():

240 p = StatePartition(p)	257 p = StatePartition(p)

241 partitions.add(p)	258 partitions.add(p)

242 if k[0] == "terminal_set" or k[0] == "terminal action" or True:	259 working_set.add(p)

243 working_set.add(p)

244 reverse_id_map = {v : k for k, v in id_map.items()}	260 reverse_id_map = {v : k for k, v in id_map.items()}

245 assert len(id_map) == len(reverse_id_map)	261 assert len(id_map) == len(reverse_id_map)

246 self.__reverse_id_map = reverse_id_map	262 self.__reverse_id_map = reverse_id_map

247 self.__id_map = id_map	263 self.__id_map = id_map

	264

	265 # The alphabet defines the TransitionKeys we need to check when dedicing if

	266 # to states of the dfa can be in the same partition. See the doc string of

	267 # TransitionKey.disjoint_keys.

	268

	269 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is

	270 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with

	271 # TransitionKey [a-d], and state S3 has a transition to S4 with

	272 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will

	273 # become clear when we check the transition for TransitionKey [c-d] (S1 has

	274 # a transition to S2, S3 has a transition to S4).

248 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))	275 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))

249 # map transitions wrt alphabet mapping	276

	277 # For each state and each TransitionKey in the alphabet, find out which

	278 # transition we take from the state with the TransitionKey.

250 transitions = {}	279 transitions = {}

251 for state_id, state in id_map.items():	280 for state_id, state in id_map.items():

252 def f((key_id, key)):	281 def f((key_id, key)):

253 transition = state.key_matches(key)	282 transition_state = state.transition_state_with_key(key)

254 if transition:	283 if transition_state:

255 return reverse_id_map[transition]	284 return reverse_id_map[transition_state]

256 return None	285 return None

257 transitions[state_id] = map(f, enumerate(self.__alphabet))	286 transitions[state_id] = map(f, enumerate(self.__alphabet))

258 self.__transitions = transitions	287 self.__transitions = transitions

259 # verify created structures	288 # verify created structures

260 if self.__verify:	289 if self.__verify:

261 for partition in partitions:	290 for partition in partitions:

262 for state_id in partition:	291 for state_id in partition:

263 transition_array = transitions[state_id]	292 transition_state_array = transitions[state_id]

264 state = id_map[state_id]	293 state = id_map[state_id]

265 for key_id, key in enumerate(self.__alphabet):	294 for key_id, key in enumerate(self.__alphabet):

266 transition_id = transition_array[key_id]	295 transition_state_id = transition_state_array[key_id]

267 transition_state = state.key_matches(key)	296 transition_state = state.transition_state_with_key(key)

268 if not transition_state:	297 if not transition_state:

269 assert transition_id == None	298 assert transition_state_id == None

270 else:	299 else:

271 assert transition_id != None	300 assert transition_state_id != None

272 assert transition_id == reverse_id_map[transition_state]	301 assert transition_state_id == reverse_id_map[transition_state]

273 return (working_set, partitions)	302 return (working_set, partitions)

274	303

275 @staticmethod	304 @staticmethod

276 def __partition_count(partitions):	305 def __partition_count(partitions):

277 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))	306 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

278	307

279 def __merge_partitions(self, partitions):	308 def __merge_partitions(self, partitions):

280 id_map = self.__id_map	309 id_map = self.__id_map

281 reverse_id_map = self.__reverse_id_map	310 reverse_id_map = self.__reverse_id_map

282 verify = self.__verify	311 verify = self.__verify

(...skipping 13 matching lines...) Expand all Loading...
296 'transitions' : {},	325 'transitions' : {},

297 'terminal' : state in self.__dfa.terminal_set(),	326 'terminal' : state in self.__dfa.terminal_set(),

298 'action' : state.action(),	327 'action' : state.action(),

299 }	328 }

300 mapping[str(partition)] = node	329 mapping[str(partition)] = node

301 transition_array = transitions[state_id]	330 transition_array = transitions[state_id]

302 for key_id, key in enumerate(self.__alphabet):	331 for key_id, key in enumerate(self.__alphabet):

303 transition_id = transition_array[key_id]	332 transition_id = transition_array[key_id]

304 if transition_id == None:	333 if transition_id == None:

305 if verify:	334 if verify:

306 assert not state.key_matches(key)	335 assert not state.transition_state_with_key(key)

307 for s_id in state_ids:	336 for s_id in state_ids:

308 assert not id_map[s_id].key_matches(key)	337 assert not id_map[s_id].transition_state_with_key(key)

309 continue	338 continue

310 transition_partition = reverse_partition_map[transition_id]	339 transition_partition = reverse_partition_map[transition_id]

311 assert transition_partition	340 assert transition_partition

312 if verify:	341 if verify:

313 for s_id in state_ids:	342 for s_id in state_ids:

314 transition = id_map[s_id].key_matches(key)	343 transition = id_map[s_id].transition_state_with_key(key)

315 assert transition	344 assert transition

316 test_partition = reverse_partition_map[reverse_id_map[transition]]	345 test_partition = reverse_partition_map[reverse_id_map[transition]]

317 assert test_partition == transition_partition	346 assert test_partition == transition_partition

318 node['transitions'][key] = name_map[transition_partition]	347 node['transitions'][key] = name_map[transition_partition]

319 start_id = reverse_id_map[self.__dfa.start_state()]	348 start_id = reverse_id_map[self.__dfa.start_state()]

320 start_name = name_map[reverse_partition_map[start_id]]	349 start_name = name_map[reverse_partition_map[start_id]]

321 return (start_name, mapping)	350 return (start_name, mapping)

322	351

323 def minimize(self):	352 def minimize(self):

324 (working_set, partitions) = self.__partition()	353 '''Minimize the dfa. For the general idea of minimizing automata, see

	354 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take

	355 account the actions associated to stages, i.e., we cannot merge two states

	356 which have different actions.'''

	357 (working_set, partitions) = self.__create_initial_partitions()

325 node_count = self.__dfa.node_count()	358 node_count = self.__dfa.node_count()

326 id_map = self.__id_map	359 id_map = self.__id_map

327 reverse_id_map = self.__reverse_id_map	360 reverse_id_map = self.__reverse_id_map

328 transitions = self.__transitions	361 transitions = self.__transitions

329 key_range = range(0, len(self.__alphabet))	362 key_range = range(0, len(self.__alphabet))

330 while working_set:	363 while working_set:

331 assert working_set <= partitions	364 assert working_set <= partitions

332 assert self.__partition_count(partitions) == node_count	365 assert self.__partition_count(partitions) == node_count

333 test_partition = iter(working_set).next()	366 test_partition = iter(working_set).next()

334 working_set.remove(test_partition)	367 working_set.remove(test_partition)

(...skipping 29 matching lines...) Expand all Loading...
364 elif len(intersection) <= len(difference):	397 elif len(intersection) <= len(difference):

365 working_set.add(intersection)	398 working_set.add(intersection)

366 else:	399 else:

367 working_set.add(difference)	400 working_set.add(difference)

368 if old_partitions:	401 if old_partitions:

369 partitions -= old_partitions	402 partitions -= old_partitions

370 if new_partitions:	403 if new_partitions:

371 partitions \|= new_partitions	404 partitions \|= new_partitions

372 (start_name, mapping) = self.__merge_partitions(partitions)	405 (start_name, mapping) = self.__merge_partitions(partitions)

373 return Dfa(start_name, mapping)	406 return Dfa(start_name, mapping)

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »