Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(612)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 69953022: Experimental parser: faster dfa minimization (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 yield (state.action(), last_position, len(string)) 152 yield (state.action(), last_position, len(string))
153 153
154 def minimize(self): 154 def minimize(self):
155 return DfaMinimizer(self).minimize() 155 return DfaMinimizer(self).minimize()
156 156
157 class StatePartition(object): 157 class StatePartition(object):
158 158
159 def __init__(self, node_numbers): 159 def __init__(self, node_numbers):
160 self.__node_numbers = node_numbers 160 self.__node_numbers = node_numbers
161 assert self.__node_numbers 161 assert self.__node_numbers
162 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0) 162 self.__hash = None
163 163
164 def set(self): 164 def set(self):
165 return self.__node_numbers 165 return self.__node_numbers
166 166
167 def __hash__(self): 167 def __hash__(self):
168 if not self.__hash:
169 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
168 return self.__hash 170 return self.__hash
169 171
170 def __eq__(self, other): 172 def __eq__(self, other):
171 return (isinstance(other, self.__class__) and 173 return (isinstance(other, self.__class__) and
172 self.__node_numbers == other.__node_numbers) 174 self.__node_numbers == other.__node_numbers)
173 175
174 def __len__(self): 176 def __len__(self):
175 return len(self.__node_numbers) 177 return len(self.__node_numbers)
176 178
177 def __iter__(self): 179 def __iter__(self):
178 return self.__node_numbers.__iter__() 180 return self.__node_numbers.__iter__()
179 181
180 def __str__(self): 182 def __str__(self):
181 return str([x for x in self.__node_numbers]) 183 return str([x for x in self.__node_numbers])
182 184
183 class DfaMinimizer: 185 class DfaMinimizer:
184 186
187 __verify = True
188
189 @staticmethod
190 def set_verify(verify):
191 DfaMinimizer.__verify = verify
192
185 def __init__(self, dfa): 193 def __init__(self, dfa):
186 self.__dfa = dfa 194 self.__dfa = dfa
195 self.__id_map = None
196 self.__reverse_id_map = None
197 self.__alphabet = None
187 198
188 def __partition(self): 199 def __partition(self):
200 assert not self.__id_map
201 assert not self.__reverse_id_map
202 assert not self.__alphabet
189 action_map = {} 203 action_map = {}
190 id_map = {} 204 id_map = {}
191 terminal_set = self.__dfa.terminal_set() 205 terminal_set = self.__dfa.terminal_set()
206 all_keys = []
192 def f(state, visitor_state): 207 def f(state, visitor_state):
193 node_number = len(id_map) 208 state_id = len(id_map)
194 id_map[node_number] = state 209 id_map[state_id] = state
195 action = state.action() 210 action = state.action()
211 all_keys.append(state.key_iter())
196 if action: 212 if action:
197 # TODO add this back 213 # TODO add this back
198 # assert state in self.__terminal_set 214 # assert state in self.__terminal_set
199 if state not in terminal_set: 215 if state not in terminal_set:
200 action = "nonterminal action " + str(action) 216 action = "nonterminal action " + str(action)
201 elif state in terminal_set: 217 elif state in terminal_set:
202 action = "terminal_set" 218 action = "terminal_set"
203 if not action in action_map: 219 if not action in action_map:
204 action_map[action] = set() 220 action_map[action] = set()
205 action_map[action].add(node_number) 221 action_map[action].add(state_id)
206 self.__dfa.visit_all_states(f) 222 self.__dfa.visit_all_states(f)
207 partitions = set() 223 partitions = set()
208 for p in action_map.values(): 224 for p in action_map.values():
209 assert p
210 partitions.add(StatePartition(p)) 225 partitions.add(StatePartition(p))
211 return (id_map, partitions) 226 reverse_id_map = {v : k for k, v in id_map.items()}
212 227 assert len(id_map) == len(reverse_id_map)
213 def __generate_alphabet(self): 228 self.__reverse_id_map = reverse_id_map
214 keys = [] 229 self.__id_map = id_map
215 self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter())) 230 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
216 return TransitionKey.disjoint_keys(chain(*keys)) 231 # map transitions wrt alphabet mapping
217 232 transitions = {}
218 @staticmethod 233 for state_id, state in id_map.items():
219 def __find_partition(partitions, id): 234 def f((key_id, key)):
220 for p in partitions: 235 transition = state.key_matches(key)
221 if id in p: 236 if transition:
222 return p 237 return reverse_id_map[transition]
223 assert False 238 return None
224 239 transitions[state_id] = map(f, enumerate(self.__alphabet))
225 def __verify_partitions(self, id_map, partitions): 240 self.__transitions = transitions
226 assert len(partitions) <= len(id_map) 241 # verify created structures
227 alphabet = self.__generate_alphabet() 242 if self.__verify:
228 for partition in partitions: 243 for partition in partitions:
229 for key in alphabet:
230 first = True
231 mapped_partition = None
232 for state_id in partition: 244 for state_id in partition:
233 s = id_map[state_id].key_matches(key) 245 transition_array = transitions[state_id]
234 if s: 246 state = id_map[state_id]
235 s = self.__find_partition(partitions, s.node_number()) 247 for key_id, key in enumerate(self.__alphabet):
236 if first: 248 transition_id = transition_array[key_id]
237 first = False 249 transition_state = state.key_matches(key)
238 mapped_partition = s 250 if not transition_state:
239 assert mapped_partition == s 251 assert transition_id == None
252 else:
253 assert transition_id != None
254 assert transition_id == reverse_id_map[transition_state]
255 return partitions
240 256
241 @staticmethod 257 @staticmethod
242 def __partition_count(partitions): 258 def __partition_count(partitions):
243 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
244 260
245 def __merge_partitions(self, id_map, partitions): 261 def __merge_partitions(self, partitions):
262 id_map = self.__id_map
263 reverse_id_map = self.__reverse_id_map
264 verify = self.__verify
246 mapping = {} 265 mapping = {}
247 name_map = {} 266 name_map = {}
267 reverse_partition_map = {}
248 for partition in partitions: 268 for partition in partitions:
249 name_map[partition] = str(partition) 269 name_map[partition] = str(partition)
250 alphabet = self.__generate_alphabet() 270 for state_id in partition:
251 reverse_id_map = {v:k for k, v in id_map.items()} 271 reverse_partition_map[state_id] = partition
272 transitions = self.__transitions
252 for partition in partitions: 273 for partition in partitions:
253 state_id = iter(partition).next() 274 state_ids = list(partition)
275 state_id = state_ids[0]
254 state = id_map[state_id] 276 state = id_map[state_id]
255 node = { 277 node = {
256 'transitions' : {}, 278 'transitions' : {},
257 'terminal' : state in self.__dfa.terminal_set(), 279 'terminal' : state in self.__dfa.terminal_set(),
258 'action' : state.action(), 280 'action' : state.action(),
259 } 281 }
260 mapping[str(partition)] = node 282 mapping[str(partition)] = node
261 for key in alphabet: 283 transition_array = transitions[state_id]
262 transition_state = state.key_matches(key) 284 for key_id, key in enumerate(self.__alphabet):
263 if not transition_state: 285 transition_id = transition_array[key_id]
286 if transition_id == None:
287 if verify:
288 assert not state.key_matches(key)
289 for s_id in state_ids:
290 assert not id_map[s_id].key_matches(key)
264 continue 291 continue
265 transition_id = reverse_id_map[transition_state] 292 transition_partition = reverse_partition_map[transition_id]
266 transition_partition = self.__find_partition(partitions, transition_id)
267 assert transition_partition 293 assert transition_partition
294 if verify:
295 for s_id in state_ids:
296 transition = id_map[s_id].key_matches(key)
297 assert transition
298 test_partition = reverse_partition_map[reverse_id_map[transition]]
299 assert test_partition == transition_partition
268 node['transitions'][key] = name_map[transition_partition] 300 node['transitions'][key] = name_map[transition_partition]
269 start_id = reverse_id_map[self.__dfa.start_state()] 301 start_id = reverse_id_map[self.__dfa.start_state()]
270 start_name = name_map[self.__find_partition(partitions, start_id)] 302 start_name = name_map[reverse_partition_map[start_id]]
271 return (start_name, mapping) 303 return (start_name, mapping)
272 304
273 def minimize(self): 305 def minimize(self):
274 (id_map, partitions) = self.__partition() 306 partitions = self.__partition()
275 node_count = self.__dfa.node_count() 307 node_count = self.__dfa.node_count()
276 assert self.__partition_count(partitions) == node_count 308 id_map = self.__id_map
277 if len(partitions) == 1: 309 reverse_id_map = self.__reverse_id_map
278 return self.__dfa 310 transitions = self.__transitions
311 key_range = range(0, len(self.__alphabet))
279 working_set = set(partitions) 312 working_set = set(partitions)
280 # map alphabet
281 alphabet_mapping = {}
282 for i, key in enumerate(self.__generate_alphabet()):
283 alphabet_mapping[key] = i
284 key_range = range(0, len(alphabet_mapping))
285 # map transitions wrt alphabet mapping
286 reverse_id_map = {v:k for k, v in id_map.items()}
287 transitions = {}
288 for id, state in id_map.items():
289 def f((key, key_id)):
290 transition = state.key_matches(key)
291 if transition:
292 return reverse_id_map[transition]
293 return None
294 transitions[id] = map(f, alphabet_mapping.items())
295
296 while working_set: 313 while working_set:
297 # print "working_set %s partitions %s nodes %s" % (len(working_set),
298 # len(partitions),
299 # node_count)
300 assert working_set <= partitions 314 assert working_set <= partitions
301 assert self.__partition_count(partitions) == node_count 315 assert self.__partition_count(partitions) == node_count
302 test_partition = iter(working_set).next() 316 test_partition = iter(working_set).next()
303 working_set.remove(test_partition) 317 working_set.remove(test_partition)
304 test_array = [False for x in range(0, len(id_map))] 318 test_array = [False for x in range(0, len(id_map))]
305 for x in test_partition: 319 for x in test_partition:
306 test_array[x] = True 320 test_array[x] = True
307 for key_index in key_range: 321 for key_index in key_range:
308 map_into_partition = set() 322 map_into_partition = set()
309 for state_id, transition_array in transitions.items(): 323 for state_id, transition_array in transitions.items():
310 transition_id = transition_array[key_index] 324 transition_id = transition_array[key_index]
311 if transition_id and test_array[transition_id]: 325 if transition_id != None and test_array[transition_id]:
312 map_into_partition.add(state_id) 326 map_into_partition.add(state_id)
313 if not map_into_partition: 327 if not map_into_partition:
314 continue 328 continue
315 new_partitions = set() 329 new_partitions = set()
316 old_partitions = set() 330 old_partitions = set()
317 for p in partitions: 331 for p in partitions:
318 intersection = p.set().intersection(map_into_partition) 332 intersection = p.set().intersection(map_into_partition)
319 if not intersection: 333 if not intersection:
320 continue 334 continue
321 difference = p.set().difference(map_into_partition) 335 difference = p.set().difference(map_into_partition)
322 if not difference: 336 if not difference:
323 continue 337 continue
324 intersection = StatePartition(intersection) 338 intersection = StatePartition(intersection)
325 difference = StatePartition(difference) 339 difference = StatePartition(difference)
326 old_partitions.add(p) 340 old_partitions.add(p)
327 new_partitions.add(intersection) 341 new_partitions.add(intersection)
328 new_partitions.add(difference) 342 new_partitions.add(difference)
329 if p in working_set: 343 if p in working_set:
330 working_set.remove(p) 344 working_set.remove(p)
331 working_set.add(intersection) 345 working_set.add(intersection)
332 working_set.add(difference) 346 working_set.add(difference)
333 elif len(intersection) <= len(difference): 347 elif len(intersection) <= len(difference):
334 working_set.add(intersection) 348 working_set.add(intersection)
335 else: 349 else:
336 working_set.add(difference) 350 working_set.add(difference)
337 if old_partitions: 351 if old_partitions:
338 partitions -= old_partitions 352 partitions -= old_partitions
339 if new_partitions: 353 if new_partitions:
340 partitions |= new_partitions 354 partitions |= new_partitions
341 # self.__verify_partitions(id_map, partitions) 355 (start_name, mapping) = self.__merge_partitions(partitions)
342 if len(partitions) == len(id_map):
343 return self.__dfa
344 (start_name, mapping) = self.__merge_partitions(id_map, partitions)
345 return Dfa(start_name, mapping) 356 return Dfa(start_name, mapping)
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698