tools/lexer_generator/dfa.py - Issue 77153005: Experimental parser: abstract lexing

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 77153005: Experimental parser: abstract lexing (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 29 matching lines...) Expand all Loading...
40 def transitions_to_multiple_states(self):	40 def transitions_to_multiple_states(self):

41 return False	41 return False

42	42

43 def name(self):	43 def name(self):

44 return self.__name	44 return self.__name

45	45

46 def action(self):	46 def action(self):

47 return self.__action	47 return self.__action

48	48

49 def add_transition(self, key, state):	49 def add_transition(self, key, state):

	50 assert not key == TransitionKey.epsilon()

50 assert not self.__transitions.has_key(key)	51 assert not self.__transitions.has_key(key)

51 self.__transitions[key] = state	52 self.__transitions[key] = state

52	53

53 def transitions(self):	54 def transitions(self):

54 return self.__transitions	55 return self.__transitions

55	56

	57 def epsilon_closure(self):

	58 return set([self])

	59

56 # TODO abstract state matching	60 # TODO abstract state matching

57 def __matches(self, match_func, value):	61 def __matches(self, match_func, value):

58 # f collects states whose corresponding TransitionKey matches 'value'.	62 # f collects states whose corresponding TransitionKey matches 'value'.

59 f = (lambda acc, (key, state):	63 f = (lambda acc, (key, state):

60 acc \| set([state]) if match_func(key, value) else acc)	64 acc \| set([state]) if match_func(key, value) else acc)

61 matches = reduce(f, self.__transitions.items(), set())	65 return reduce(f, self.__transitions.items(), set())

62 # Since this is a dfa, we should have at most one such state. Return it.	66

	67 def transition_state_iter_for_char(self, value):

	68 return iter(self.__matches(lambda k, v : k.matches_char(v), value))

	69

	70 def transition_state_iter_for_key(self, value):

	71 return iter(self.__matches(lambda k, v : k.is_superset_of_key(v), value))

	72

	73 def transition_state_for_key(self, value):

	74 matches = self.__matches(lambda k, v : k.is_superset_of_key(v), value)

63 if not matches:	75 if not matches:

64 return None	76 return None

	77 # Since this is a dfa, we should have at most one such state. Return it.

65 assert len(matches) == 1	78 assert len(matches) == 1

66 return iter(matches).next()	79 return iter(matches).next()

67	80

68 def next_state_with_char(self, value):

69 return self.__matches(lambda k, v : k.matches_char(v), value)

70

71 def transition_state_with_key(self, key):

72 # 'key' can be a subkey of one of the TransitionKeys for this state, but it

73 # cannot partially overlap a TransitionKey for this state.

74 return self.__matches(lambda k, v : k.is_superset_of_key(v), key)

75

76 class Dfa(Automaton):	81 class Dfa(Automaton):

77	82

78 def __init__(self, start_name, mapping):	83 def __init__(self, start_name, mapping):

79 super(Dfa, self).__init__()	84 super(Dfa, self).__init__()

80 self.__terminal_set = set()	85 self.__terminal_set = set()

81 name_map = {}	86 name_map = {}

82 for name, node_data in mapping.items():	87 for name, node_data in mapping.items():

83 node = DfaState(name, node_data['action'])	88 node = DfaState(name, node_data['action'])

84 name_map[name] = node	89 name_map[name] = node

85 if node_data['terminal']:	90 if node_data['terminal']:

(...skipping 16 matching lines...) Expand all Loading...
102 assert self.__terminal_set	107 assert self.__terminal_set

103 state_count = self.visit_all_states(lambda state, count: count + 1, 0)	108 state_count = self.visit_all_states(lambda state, count: count + 1, 0)

104 assert self.__node_count == state_count	109 assert self.__node_count == state_count

105	110

106 def node_count(self):	111 def node_count(self):

107 return self.__node_count	112 return self.__node_count

108	113

109 def start_state(self):	114 def start_state(self):

110 return self.__start	115 return self.__start

111	116

112 def start_set(self):

113 return set([self.__start])

114

115 def terminal_set(self):	117 def terminal_set(self):

116 return set(self.__terminal_set)	118 return set(self.__terminal_set)

117	119

118 @staticmethod

119 def __match_char(state, char):

120 match = list(state.state_iter(key_filter = lambda k: k.matches_char(char)))

121 if not match: return None

122 assert len(match) == 1

123 return match[0]

124

125 @staticmethod

126 def terminal_action():

127 return Action(None, ('TERMINATE', None))

128

129 @staticmethod

130 def miss_action():

131 return Action(None, ('Miss', None))

132

133 def collect_actions(self, string):

134 state = self.__start

135 for c in string:

136 state = Dfa.__match_char(state, c)

137 if not state:

138 yield self.miss_action()

139 return

140 if state.action():

141 yield state.action()

142 if state in self.__terminal_set:

143 yield self.terminal_action()

144 else:

145 yield self.miss_action()

146

147 def matches(self, string):

148 actions = list(self.collect_actions(string))

149 return actions and actions[-1] == self.terminal_action()

150

151 def lex(self, string):

152 state = self.__start

153 last_position = 0

154 for pos, c in enumerate(string):

155 next = Dfa.__match_char(state, c)

156 if not next:

157 assert state.action() # must invoke default action here

158 yield (state.action(), last_position, pos)

159 last_position = pos

160 # lex next token

161 next = Dfa.__match_char(self.__start, c)

162 assert next

163 state = next

164 assert state.action() # must invoke default action here

165 yield (state.action(), last_position, len(string))

166

167 def minimize(self):	120 def minimize(self):

168 return DfaMinimizer(self).minimize()	121 return DfaMinimizer(self).minimize()

169	122

170 class StatePartition(object):	123 class StatePartition(object):

171	124

172 def __init__(self, node_numbers):	125 def __init__(self, node_numbers):

173 self.__node_numbers = node_numbers	126 self.__node_numbers = node_numbers

174 assert self.__node_numbers	127 assert self.__node_numbers

175 self.__hash = None	128 self.__hash = None

176	129

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
272 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will	225 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will

273 # become clear when we check the transition for TransitionKey [c-d] (S1 has	226 # become clear when we check the transition for TransitionKey [c-d] (S1 has

274 # a transition to S2, S3 has a transition to S4).	227 # a transition to S2, S3 has a transition to S4).

275 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))	228 self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))

276	229

277 # For each state and each TransitionKey in the alphabet, find out which	230 # For each state and each TransitionKey in the alphabet, find out which

278 # transition we take from the state with the TransitionKey.	231 # transition we take from the state with the TransitionKey.

279 transitions = {}	232 transitions = {}

280 for state_id, state in id_map.items():	233 for state_id, state in id_map.items():

281 def f((key_id, key)):	234 def f((key_id, key)):

282 transition_state = state.transition_state_with_key(key)	235 transition_state = state.transition_state_for_key(key)

283 if transition_state:	236 if transition_state:

284 return reverse_id_map[transition_state]	237 return reverse_id_map[transition_state]

285 return None	238 return None

286 transitions[state_id] = map(f, enumerate(self.__alphabet))	239 transitions[state_id] = map(f, enumerate(self.__alphabet))

287 self.__transitions = transitions	240 self.__transitions = transitions

288 # verify created structures	241 # verify created structures

289 if self.__verify:	242 if self.__verify:

290 for partition in partitions:	243 for partition in partitions:

291 for state_id in partition:	244 for state_id in partition:

292 transition_state_array = transitions[state_id]	245 transition_state_array = transitions[state_id]

293 state = id_map[state_id]	246 state = id_map[state_id]

294 for key_id, key in enumerate(self.__alphabet):	247 for key_id, key in enumerate(self.__alphabet):

295 transition_state_id = transition_state_array[key_id]	248 transition_state_id = transition_state_array[key_id]

296 transition_state = state.transition_state_with_key(key)	249 transition_state = state.transition_state_for_key(key)

297 if not transition_state:	250 if not transition_state:

298 assert transition_state_id == None	251 assert transition_state_id == None

299 else:	252 else:

300 assert transition_state_id != None	253 assert transition_state_id != None

301 assert transition_state_id == reverse_id_map[transition_state]	254 assert transition_state_id == reverse_id_map[transition_state]

302 return (working_set, partitions)	255 return (working_set, partitions)

303	256

304 @staticmethod	257 @staticmethod

305 def __partition_count(partitions):	258 def __partition_count(partitions):

306 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))	259 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

(...skipping 18 matching lines...) Expand all Loading...
325 'transitions' : {},	278 'transitions' : {},

326 'terminal' : state in self.__dfa.terminal_set(),	279 'terminal' : state in self.__dfa.terminal_set(),

327 'action' : state.action(),	280 'action' : state.action(),

328 }	281 }

329 mapping[str(partition)] = node	282 mapping[str(partition)] = node

330 transition_array = transitions[state_id]	283 transition_array = transitions[state_id]

331 for key_id, key in enumerate(self.__alphabet):	284 for key_id, key in enumerate(self.__alphabet):

332 transition_id = transition_array[key_id]	285 transition_id = transition_array[key_id]

333 if transition_id == None:	286 if transition_id == None:

334 if verify:	287 if verify:

335 assert not state.transition_state_with_key(key)	288 assert not state.transition_state_for_key(key)

336 for s_id in state_ids:	289 for s_id in state_ids:

337 assert not id_map[s_id].transition_state_with_key(key)	290 assert not id_map[s_id].transition_state_for_key(key)

338 continue	291 continue

339 transition_partition = reverse_partition_map[transition_id]	292 transition_partition = reverse_partition_map[transition_id]

340 assert transition_partition	293 assert transition_partition

341 if verify:	294 if verify:

342 for s_id in state_ids:	295 for s_id in state_ids:

343 transition = id_map[s_id].transition_state_with_key(key)	296 transition = id_map[s_id].transition_state_for_key(key)

344 assert transition	297 assert transition

345 test_partition = reverse_partition_map[reverse_id_map[transition]]	298 test_partition = reverse_partition_map[reverse_id_map[transition]]

346 assert test_partition == transition_partition	299 assert test_partition == transition_partition

347 node['transitions'][key] = name_map[transition_partition]	300 node['transitions'][key] = name_map[transition_partition]

348 start_id = reverse_id_map[self.__dfa.start_state()]	301 start_id = reverse_id_map[self.__dfa.start_state()]

349 start_name = name_map[reverse_partition_map[start_id]]	302 start_name = name_map[reverse_partition_map[start_id]]

350 return (start_name, mapping)	303 return (start_name, mapping)

351	304

352 def minimize(self):	305 def minimize(self):

353 '''Minimize the dfa. For the general idea of minimizing automata, see	306 '''Minimize the dfa. For the general idea of minimizing automata, see

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397 elif len(intersection) <= len(difference):	350 elif len(intersection) <= len(difference):

398 working_set.add(intersection)	351 working_set.add(intersection)

399 else:	352 else:

400 working_set.add(difference)	353 working_set.add(difference)

401 if old_partitions:	354 if old_partitions:

402 partitions -= old_partitions	355 partitions -= old_partitions

403 if new_partitions:	356 if new_partitions:

404 partitions \|= new_partitions	357 partitions \|= new_partitions

405 (start_name, mapping) = self.__merge_partitions(partitions)	358 (start_name, mapping) = self.__merge_partitions(partitions)

406 return Dfa(start_name, mapping)	359 return Dfa(start_name, mapping)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/automaton.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »