Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(282)

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 77153005: Experimental parser: abstract lexing (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/automaton.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 29 matching lines...) Expand all
40 def transitions_to_multiple_states(self): 40 def transitions_to_multiple_states(self):
41 return False 41 return False
42 42
43 def name(self): 43 def name(self):
44 return self.__name 44 return self.__name
45 45
46 def action(self): 46 def action(self):
47 return self.__action 47 return self.__action
48 48
49 def add_transition(self, key, state): 49 def add_transition(self, key, state):
50 assert not key == TransitionKey.epsilon()
50 assert not self.__transitions.has_key(key) 51 assert not self.__transitions.has_key(key)
51 self.__transitions[key] = state 52 self.__transitions[key] = state
52 53
53 def transitions(self): 54 def transitions(self):
54 return self.__transitions 55 return self.__transitions
55 56
57 def epsilon_closure(self):
58 return set([self])
59
56 # TODO abstract state matching 60 # TODO abstract state matching
57 def __matches(self, match_func, value): 61 def __matches(self, match_func, value):
58 # f collects states whose corresponding TransitionKey matches 'value'. 62 # f collects states whose corresponding TransitionKey matches 'value'.
59 f = (lambda acc, (key, state): 63 f = (lambda acc, (key, state):
60 acc | set([state]) if match_func(key, value) else acc) 64 acc | set([state]) if match_func(key, value) else acc)
61 matches = reduce(f, self.__transitions.items(), set()) 65 return reduce(f, self.__transitions.items(), set())
62 # Since this is a dfa, we should have at most one such state. Return it. 66
67 def transition_state_iter_for_char(self, value):
68 return iter(self.__matches(lambda k, v : k.matches_char(v), value))
69
70 def transition_state_iter_for_key(self, value):
71 return iter(self.__matches(lambda k, v : k.is_superset_of_key(v), value))
72
73 def transition_state_for_key(self, value):
74 matches = self.__matches(lambda k, v : k.is_superset_of_key(v), value)
63 if not matches: 75 if not matches:
64 return None 76 return None
77 # Since this is a dfa, we should have at most one such state. Return it.
65 assert len(matches) == 1 78 assert len(matches) == 1
66 return iter(matches).next() 79 return iter(matches).next()
67 80
68 def next_state_with_char(self, value):
69 return self.__matches(lambda k, v : k.matches_char(v), value)
70
71 def transition_state_with_key(self, key):
72 # 'key' can be a subkey of one of the TransitionKeys for this state, but it
73 # cannot partially overlap a TransitionKey for this state.
74 return self.__matches(lambda k, v : k.is_superset_of_key(v), key)
75
76 class Dfa(Automaton): 81 class Dfa(Automaton):
77 82
78 def __init__(self, start_name, mapping): 83 def __init__(self, start_name, mapping):
79 super(Dfa, self).__init__() 84 super(Dfa, self).__init__()
80 self.__terminal_set = set() 85 self.__terminal_set = set()
81 name_map = {} 86 name_map = {}
82 for name, node_data in mapping.items(): 87 for name, node_data in mapping.items():
83 node = DfaState(name, node_data['action']) 88 node = DfaState(name, node_data['action'])
84 name_map[name] = node 89 name_map[name] = node
85 if node_data['terminal']: 90 if node_data['terminal']:
(...skipping 16 matching lines...) Expand all
102 assert self.__terminal_set 107 assert self.__terminal_set
103 state_count = self.visit_all_states(lambda state, count: count + 1, 0) 108 state_count = self.visit_all_states(lambda state, count: count + 1, 0)
104 assert self.__node_count == state_count 109 assert self.__node_count == state_count
105 110
106 def node_count(self): 111 def node_count(self):
107 return self.__node_count 112 return self.__node_count
108 113
109 def start_state(self): 114 def start_state(self):
110 return self.__start 115 return self.__start
111 116
112 def start_set(self):
113 return set([self.__start])
114
115 def terminal_set(self): 117 def terminal_set(self):
116 return set(self.__terminal_set) 118 return set(self.__terminal_set)
117 119
118 @staticmethod
119 def __match_char(state, char):
120 match = list(state.state_iter(key_filter = lambda k: k.matches_char(char)))
121 if not match: return None
122 assert len(match) == 1
123 return match[0]
124
125 @staticmethod
126 def terminal_action():
127 return Action(None, ('TERMINATE', None))
128
129 @staticmethod
130 def miss_action():
131 return Action(None, ('Miss', None))
132
133 def collect_actions(self, string):
134 state = self.__start
135 for c in string:
136 state = Dfa.__match_char(state, c)
137 if not state:
138 yield self.miss_action()
139 return
140 if state.action():
141 yield state.action()
142 if state in self.__terminal_set:
143 yield self.terminal_action()
144 else:
145 yield self.miss_action()
146
147 def matches(self, string):
148 actions = list(self.collect_actions(string))
149 return actions and actions[-1] == self.terminal_action()
150
151 def lex(self, string):
152 state = self.__start
153 last_position = 0
154 for pos, c in enumerate(string):
155 next = Dfa.__match_char(state, c)
156 if not next:
157 assert state.action() # must invoke default action here
158 yield (state.action(), last_position, pos)
159 last_position = pos
160 # lex next token
161 next = Dfa.__match_char(self.__start, c)
162 assert next
163 state = next
164 assert state.action() # must invoke default action here
165 yield (state.action(), last_position, len(string))
166
167 def minimize(self): 120 def minimize(self):
168 return DfaMinimizer(self).minimize() 121 return DfaMinimizer(self).minimize()
169 122
170 class StatePartition(object): 123 class StatePartition(object):
171 124
172 def __init__(self, node_numbers): 125 def __init__(self, node_numbers):
173 self.__node_numbers = node_numbers 126 self.__node_numbers = node_numbers
174 assert self.__node_numbers 127 assert self.__node_numbers
175 self.__hash = None 128 self.__hash = None
176 129
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
272 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will 225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will
273 # become clear when we check the transition for TransitionKey [c-d] (S1 has 226 # become clear when we check the transition for TransitionKey [c-d] (S1 has
274 # a transition to S2, S3 has a transition to S4). 227 # a transition to S2, S3 has a transition to S4).
275 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys))) 228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
276 229
277 # For each state and each TransitionKey in the alphabet, find out which 230 # For each state and each TransitionKey in the alphabet, find out which
278 # transition we take from the state with the TransitionKey. 231 # transition we take from the state with the TransitionKey.
279 transitions = {} 232 transitions = {}
280 for state_id, state in id_map.items(): 233 for state_id, state in id_map.items():
281 def f((key_id, key)): 234 def f((key_id, key)):
282 transition_state = state.transition_state_with_key(key) 235 transition_state = state.transition_state_for_key(key)
283 if transition_state: 236 if transition_state:
284 return reverse_id_map[transition_state] 237 return reverse_id_map[transition_state]
285 return None 238 return None
286 transitions[state_id] = map(f, enumerate(self.__alphabet)) 239 transitions[state_id] = map(f, enumerate(self.__alphabet))
287 self.__transitions = transitions 240 self.__transitions = transitions
288 # verify created structures 241 # verify created structures
289 if self.__verify: 242 if self.__verify:
290 for partition in partitions: 243 for partition in partitions:
291 for state_id in partition: 244 for state_id in partition:
292 transition_state_array = transitions[state_id] 245 transition_state_array = transitions[state_id]
293 state = id_map[state_id] 246 state = id_map[state_id]
294 for key_id, key in enumerate(self.__alphabet): 247 for key_id, key in enumerate(self.__alphabet):
295 transition_state_id = transition_state_array[key_id] 248 transition_state_id = transition_state_array[key_id]
296 transition_state = state.transition_state_with_key(key) 249 transition_state = state.transition_state_for_key(key)
297 if not transition_state: 250 if not transition_state:
298 assert transition_state_id == None 251 assert transition_state_id == None
299 else: 252 else:
300 assert transition_state_id != None 253 assert transition_state_id != None
301 assert transition_state_id == reverse_id_map[transition_state] 254 assert transition_state_id == reverse_id_map[transition_state]
302 return (working_set, partitions) 255 return (working_set, partitions)
303 256
304 @staticmethod 257 @staticmethod
305 def __partition_count(partitions): 258 def __partition_count(partitions):
306 return len(reduce(lambda acc, p: acc | p.set(), partitions, set())) 259 return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
(...skipping 18 matching lines...) Expand all
325 'transitions' : {}, 278 'transitions' : {},
326 'terminal' : state in self.__dfa.terminal_set(), 279 'terminal' : state in self.__dfa.terminal_set(),
327 'action' : state.action(), 280 'action' : state.action(),
328 } 281 }
329 mapping[str(partition)] = node 282 mapping[str(partition)] = node
330 transition_array = transitions[state_id] 283 transition_array = transitions[state_id]
331 for key_id, key in enumerate(self.__alphabet): 284 for key_id, key in enumerate(self.__alphabet):
332 transition_id = transition_array[key_id] 285 transition_id = transition_array[key_id]
333 if transition_id == None: 286 if transition_id == None:
334 if verify: 287 if verify:
335 assert not state.transition_state_with_key(key) 288 assert not state.transition_state_for_key(key)
336 for s_id in state_ids: 289 for s_id in state_ids:
337 assert not id_map[s_id].transition_state_with_key(key) 290 assert not id_map[s_id].transition_state_for_key(key)
338 continue 291 continue
339 transition_partition = reverse_partition_map[transition_id] 292 transition_partition = reverse_partition_map[transition_id]
340 assert transition_partition 293 assert transition_partition
341 if verify: 294 if verify:
342 for s_id in state_ids: 295 for s_id in state_ids:
343 transition = id_map[s_id].transition_state_with_key(key) 296 transition = id_map[s_id].transition_state_for_key(key)
344 assert transition 297 assert transition
345 test_partition = reverse_partition_map[reverse_id_map[transition]] 298 test_partition = reverse_partition_map[reverse_id_map[transition]]
346 assert test_partition == transition_partition 299 assert test_partition == transition_partition
347 node['transitions'][key] = name_map[transition_partition] 300 node['transitions'][key] = name_map[transition_partition]
348 start_id = reverse_id_map[self.__dfa.start_state()] 301 start_id = reverse_id_map[self.__dfa.start_state()]
349 start_name = name_map[reverse_partition_map[start_id]] 302 start_name = name_map[reverse_partition_map[start_id]]
350 return (start_name, mapping) 303 return (start_name, mapping)
351 304
352 def minimize(self): 305 def minimize(self):
353 '''Minimize the dfa. For the general idea of minimizing automata, see 306 '''Minimize the dfa. For the general idea of minimizing automata, see
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 elif len(intersection) <= len(difference): 350 elif len(intersection) <= len(difference):
398 working_set.add(intersection) 351 working_set.add(intersection)
399 else: 352 else:
400 working_set.add(difference) 353 working_set.add(difference)
401 if old_partitions: 354 if old_partitions:
402 partitions -= old_partitions 355 partitions -= old_partitions
403 if new_partitions: 356 if new_partitions:
404 partitions |= new_partitions 357 partitions |= new_partitions
405 (start_name, mapping) = self.__merge_partitions(partitions) 358 (start_name, mapping) = self.__merge_partitions(partitions)
406 return Dfa(start_name, mapping) 359 return Dfa(start_name, mapping)
OLDNEW
« no previous file with comments | « tools/lexer_generator/automaton.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698