tools/lexer_generator/dfa.py - Issue 69953022: Experimental parser: faster dfa minimization

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 69953022: Experimental parser: faster dfa minimization (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
152 yield (state.action(), last_position, len(string))	152 yield (state.action(), last_position, len(string))

153	153

154 def minimize(self):	154 def minimize(self):

155 return DfaMinimizer(self).minimize()	155 return DfaMinimizer(self).minimize()

156	156

157 class StatePartition(object):	157 class StatePartition(object):

158	158

159 def __init__(self, node_numbers):	159 def __init__(self, node_numbers):

160 self.__node_numbers = node_numbers	160 self.__node_numbers = node_numbers

161 assert self.__node_numbers	161 assert self.__node_numbers

162 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)	162 self.__hash = None

163	163

164 def set(self):	164 def set(self):

165 return self.__node_numbers	165 return self.__node_numbers

166	166

167 def __hash__(self):	167 def __hash__(self):

	168 if not self.__hash:

	169 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)

168 return self.__hash	170 return self.__hash

169	171

170 def __eq__(self, other):	172 def __eq__(self, other):

171 return (isinstance(other, self.__class__) and	173 return (isinstance(other, self.__class__) and

172 self.__node_numbers == other.__node_numbers)	174 self.__node_numbers == other.__node_numbers)

173	175

174 def __len__(self):	176 def __len__(self):

175 return len(self.__node_numbers)	177 return len(self.__node_numbers)

176	178

177 def __iter__(self):	179 def __iter__(self):

178 return self.__node_numbers.__iter__()	180 return self.__node_numbers.__iter__()

179	181

180 def __str__(self):	182 def __str__(self):

181 return str([x for x in self.__node_numbers])	183 return str([x for x in self.__node_numbers])

182	184

183 class DfaMinimizer:	185 class DfaMinimizer:

184	186

	187 __verify = True

	188

	189 @staticmethod

	190 def set_verify(verify):

	191 DfaMinimizer.__verify = verify

	192

185 def __init__(self, dfa):	193 def __init__(self, dfa):

186 self.__dfa = dfa	194 self.__dfa = dfa

	195 self.__id_map = None

	196 self.__reverse_id_map = None

	197 self.__alphabet = None

187	198

188 def __partition(self):	199 def __partition(self):

	200 assert not self.__id_map

	201 assert not self.__reverse_id_map

	202 assert not self.__alphabet

189 action_map = {}	203 action_map = {}

190 id_map = {}	204 id_map = {}

191 terminal_set = self.__dfa.terminal_set()	205 terminal_set = self.__dfa.terminal_set()

	206 all_keys = []

192 def f(state, visitor_state):	207 def f(state, visitor_state):

193 node_number = len(id_map)	208 state_id = len(id_map)

194 id_map[node_number] = state	209 id_map[state_id] = state

195 action = state.action()	210 action = state.action()

	211 all_keys.append(state.key_iter())

196 if action:	212 if action:

197 # TODO add this back	213 # TODO add this back

198 # assert state in self.__terminal_set	214 # assert state in self.__terminal_set

199 if state not in terminal_set:	215 if state not in terminal_set:

200 action = "nonterminal action " + str(action)	216 action = "nonterminal action " + str(action)

201 elif state in terminal_set:	217 elif state in terminal_set:

202 action = "terminal_set"	218 action = "terminal_set"

203 if not action in action_map:	219 if not action in action_map:

204 action_map[action] = set()	220 action_map[action] = set()

205 action_map[action].add(node_number)	221 action_map[action].add(state_id)

206 self.__dfa.visit_all_states(f)	222 self.__dfa.visit_all_states(f)

207 partitions = set()	223 partitions = set()

208 for p in action_map.values():	224 for p in action_map.values():

209 assert p

210 partitions.add(StatePartition(p))	225 partitions.add(StatePartition(p))

211 return (id_map, partitions)	226 reverse_id_map = {v : k for k, v in id_map.items()}

212	227 assert len(id_map) == len(reverse_id_map)

213 def __generate_alphabet(self):	228 self.__reverse_id_map = reverse_id_map

214 keys = []	229 self.__id_map = id_map

215 self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))	230 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))

216 return TransitionKey.disjoint_keys(chain(*keys))	231 # map transitions wrt alphabet mapping

217	232 transitions = {}

218 @staticmethod	233 for state_id, state in id_map.items():

219 def __find_partition(partitions, id):	234 def f((key_id, key)):

220 for p in partitions:	235 transition = state.key_matches(key)

221 if id in p:	236 if transition:

222 return p	237 return reverse_id_map[transition]

223 assert False	238 return None

224	239 transitions[state_id] = map(f, enumerate(self.__alphabet))

225 def __verify_partitions(self, id_map, partitions):	240 self.__transitions = transitions

226 assert len(partitions) <= len(id_map)	241 # verify created structures

227 alphabet = self.__generate_alphabet()	242 if self.__verify:

228 for partition in partitions:	243 for partition in partitions:

229 for key in alphabet:

230 first = True

231 mapped_partition = None

232 for state_id in partition:	244 for state_id in partition:

233 s = id_map[state_id].key_matches(key)	245 transition_array = transitions[state_id]

234 if s:	246 state = id_map[state_id]

235 s = self.__find_partition(partitions, s.node_number())	247 for key_id, key in enumerate(self.__alphabet):

236 if first:	248 transition_id = transition_array[key_id]

237 first = False	249 transition_state = state.key_matches(key)

238 mapped_partition = s	250 if not transition_state:

239 assert mapped_partition == s	251 assert transition_id == None

	252 else:

	253 assert transition_id != None

	254 assert transition_id == reverse_id_map[transition_state]

	255 return partitions

240	256

241 @staticmethod	257 @staticmethod

242 def __partition_count(partitions):	258 def __partition_count(partitions):

243 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))	259 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

244	260

245 def __merge_partitions(self, id_map, partitions):	261 def __merge_partitions(self, partitions):

	262 id_map = self.__id_map

	263 reverse_id_map = self.__reverse_id_map

	264 verify = self.__verify

246 mapping = {}	265 mapping = {}

247 name_map = {}	266 name_map = {}

	267 reverse_partition_map = {}

248 for partition in partitions:	268 for partition in partitions:

249 name_map[partition] = str(partition)	269 name_map[partition] = str(partition)

250 alphabet = self.__generate_alphabet()	270 for state_id in partition:

251 reverse_id_map = {v:k for k, v in id_map.items()}	271 reverse_partition_map[state_id] = partition

	272 transitions = self.__transitions

252 for partition in partitions:	273 for partition in partitions:

253 state_id = iter(partition).next()	274 state_ids = list(partition)

	275 state_id = state_ids[0]

254 state = id_map[state_id]	276 state = id_map[state_id]

255 node = {	277 node = {

256 'transitions' : {},	278 'transitions' : {},

257 'terminal' : state in self.__dfa.terminal_set(),	279 'terminal' : state in self.__dfa.terminal_set(),

258 'action' : state.action(),	280 'action' : state.action(),

259 }	281 }

260 mapping[str(partition)] = node	282 mapping[str(partition)] = node

261 for key in alphabet:	283 transition_array = transitions[state_id]

262 transition_state = state.key_matches(key)	284 for key_id, key in enumerate(self.__alphabet):

263 if not transition_state:	285 transition_id = transition_array[key_id]

	286 if transition_id == None:

	287 if verify:

	288 assert not state.key_matches(key)

	289 for s_id in state_ids:

	290 assert not id_map[s_id].key_matches(key)

264 continue	291 continue

265 transition_id = reverse_id_map[transition_state]	292 transition_partition = reverse_partition_map[transition_id]

266 transition_partition = self.__find_partition(partitions, transition_id)

267 assert transition_partition	293 assert transition_partition

	294 if verify:

	295 for s_id in state_ids:

	296 transition = id_map[s_id].key_matches(key)

	297 assert transition

	298 test_partition = reverse_partition_map[reverse_id_map[transition]]

	299 assert test_partition == transition_partition

268 node['transitions'][key] = name_map[transition_partition]	300 node['transitions'][key] = name_map[transition_partition]

269 start_id = reverse_id_map[self.__dfa.start_state()]	301 start_id = reverse_id_map[self.__dfa.start_state()]

270 start_name = name_map[self.__find_partition(partitions, start_id)]	302 start_name = name_map[reverse_partition_map[start_id]]

271 return (start_name, mapping)	303 return (start_name, mapping)

272	304

273 def minimize(self):	305 def minimize(self):

274 (id_map, partitions) = self.__partition()	306 partitions = self.__partition()

275 node_count = self.__dfa.node_count()	307 node_count = self.__dfa.node_count()

276 assert self.__partition_count(partitions) == node_count	308 id_map = self.__id_map

277 if len(partitions) == 1:	309 reverse_id_map = self.__reverse_id_map

278 return self.__dfa	310 transitions = self.__transitions

	311 key_range = range(0, len(self.__alphabet))

279 working_set = set(partitions)	312 working_set = set(partitions)

280 # map alphabet

281 alphabet_mapping = {}

282 for i, key in enumerate(self.__generate_alphabet()):

283 alphabet_mapping[key] = i

284 key_range = range(0, len(alphabet_mapping))

285 # map transitions wrt alphabet mapping

286 reverse_id_map = {v:k for k, v in id_map.items()}

287 transitions = {}

288 for id, state in id_map.items():

289 def f((key, key_id)):

290 transition = state.key_matches(key)

291 if transition:

292 return reverse_id_map[transition]

293 return None

294 transitions[id] = map(f, alphabet_mapping.items())

295

296 while working_set:	313 while working_set:

297 # print "working_set %s partitions %s nodes %s" % (len(working_set),

298 # len(partitions),

299 # node_count)

300 assert working_set <= partitions	314 assert working_set <= partitions

301 assert self.__partition_count(partitions) == node_count	315 assert self.__partition_count(partitions) == node_count

302 test_partition = iter(working_set).next()	316 test_partition = iter(working_set).next()

303 working_set.remove(test_partition)	317 working_set.remove(test_partition)

304 test_array = [False for x in range(0, len(id_map))]	318 test_array = [False for x in range(0, len(id_map))]

305 for x in test_partition:	319 for x in test_partition:

306 test_array[x] = True	320 test_array[x] = True

307 for key_index in key_range:	321 for key_index in key_range:

308 map_into_partition = set()	322 map_into_partition = set()

309 for state_id, transition_array in transitions.items():	323 for state_id, transition_array in transitions.items():

310 transition_id = transition_array[key_index]	324 transition_id = transition_array[key_index]

311 if transition_id and test_array[transition_id]:	325 if transition_id != None and test_array[transition_id]:

312 map_into_partition.add(state_id)	326 map_into_partition.add(state_id)

313 if not map_into_partition:	327 if not map_into_partition:

314 continue	328 continue

315 new_partitions = set()	329 new_partitions = set()

316 old_partitions = set()	330 old_partitions = set()

317 for p in partitions:	331 for p in partitions:

318 intersection = p.set().intersection(map_into_partition)	332 intersection = p.set().intersection(map_into_partition)

319 if not intersection:	333 if not intersection:

320 continue	334 continue

321 difference = p.set().difference(map_into_partition)	335 difference = p.set().difference(map_into_partition)

322 if not difference:	336 if not difference:

323 continue	337 continue

324 intersection = StatePartition(intersection)	338 intersection = StatePartition(intersection)

325 difference = StatePartition(difference)	339 difference = StatePartition(difference)

326 old_partitions.add(p)	340 old_partitions.add(p)

327 new_partitions.add(intersection)	341 new_partitions.add(intersection)

328 new_partitions.add(difference)	342 new_partitions.add(difference)

329 if p in working_set:	343 if p in working_set:

330 working_set.remove(p)	344 working_set.remove(p)

331 working_set.add(intersection)	345 working_set.add(intersection)

332 working_set.add(difference)	346 working_set.add(difference)

333 elif len(intersection) <= len(difference):	347 elif len(intersection) <= len(difference):

334 working_set.add(intersection)	348 working_set.add(intersection)

335 else:	349 else:

336 working_set.add(difference)	350 working_set.add(difference)

337 if old_partitions:	351 if old_partitions:

338 partitions -= old_partitions	352 partitions -= old_partitions

339 if new_partitions:	353 if new_partitions:

340 partitions \|= new_partitions	354 partitions \|= new_partitions

341 # self.__verify_partitions(id_map, partitions)	355 (start_name, mapping) = self.__merge_partitions(partitions)

342 if len(partitions) == len(id_map):

343 return self.__dfa

344 (start_name, mapping) = self.__merge_partitions(id_map, partitions)

345 return Dfa(start_name, mapping)	356 return Dfa(start_name, mapping)

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »