tools/lexer_generator/nfa.py - Issue 69293005: Experimental parser: add catch all rule

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 69293005: Experimental parser: add catch all rule (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 def __matches(self, match_func, value):	99 def __matches(self, match_func, value):

100 f = lambda acc, (k, vs): acc \| vs if match_func(k, value) else acc	100 f = lambda acc, (k, vs): acc \| vs if match_func(k, value) else acc

101 return reduce(f, self.__transitions.items(), set())	101 return reduce(f, self.__transitions.items(), set())

102	102

103 def char_matches(self, value):	103 def char_matches(self, value):

104 return self.__matches(lambda k, v : k.matches_char(v), value)	104 return self.__matches(lambda k, v : k.matches_char(v), value)

105	105

106 def key_matches(self, value):	106 def key_matches(self, value):

107 return self.__matches(lambda k, v : k.matches_key(v), value)	107 return self.__matches(lambda k, v : k.matches_key(v), value)

108	108

	109 def replace_catch_all(self):

	110 catch_all = TransitionKey.unique('catch_all')

	111 if not catch_all in self.__transitions:

	112 return

	113 f = lambda acc, state: acc \| state.__epsilon_closure

	114 reachable_states = reduce(f, self.__transitions[catch_all], set())

	115 f = lambda acc, state: acc \| set(state.__transitions.keys())

	116 keys = reduce(f, reachable_states, set())

	117 keys.discard(TransitionKey.epsilon())

	118 keys.discard(catch_all)

	119 inverse_key = TransitionKey.inverse_key(keys)

	120 if inverse_key:

	121 self.__transitions[inverse_key] = self.__transitions[catch_all]

	122 del self.__transitions[catch_all]

	123

109 @staticmethod	124 @staticmethod

110 def gather_transition_keys(state_set):	125 def gather_transition_keys(state_set):

111 f = lambda acc, state: acc \| set(state.__transitions.keys())	126 f = lambda acc, state: acc \| set(state.__transitions.keys())

112 return TransitionKey.disjoint_keys(reduce(f, state_set, set()))	127 keys = reduce(f, state_set, set())

	128 keys.discard(TransitionKey.epsilon())

	129 return TransitionKey.disjoint_keys(keys)

113	130

114 class NfaBuilder:	131 class NfaBuilder:

115	132

116 def __init__(self):	133 def __init__(self):

117 self.__node_number = 0	134 self.__node_number = 0

118 self.__operation_map = {}	135 self.__operation_map = {}

119 self.__members = getmembers(self)	136 self.__members = getmembers(self)

120 self.__character_classes = {}	137 self.__character_classes = {}

121 self.__states = []	138 self.__states = []

122	139

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
195 return self.__key_state(	212 return self.__key_state(

196 TransitionKey.character_class(graph, self.__character_classes))	213 TransitionKey.character_class(graph, self.__character_classes))

197	214

198 def __not_class(self, graph):	215 def __not_class(self, graph):

199 return self.__key_state(	216 return self.__key_state(

200 TransitionKey.character_class(graph, self.__character_classes))	217 TransitionKey.character_class(graph, self.__character_classes))

201	218

202 def __any(self, graph):	219 def __any(self, graph):

203 return self.__key_state(TransitionKey.any())	220 return self.__key_state(TransitionKey.any())

204	221

	222 def __epsilon(self, graph):

	223 start = self.__new_state()

	224 end = self.__new_state()

	225 start.close(end)

	226 return (start, [end])

	227

205 def __action(self, graph):	228 def __action(self, graph):

206 (start, ends) = self.__process(graph[1])	229 (start, ends) = self.__process(graph[1])

207 action = graph[2]	230 action = graph[2]

208 end = self.__new_state()	231 end = self.__new_state()

209 self.__patch_ends(ends, end)	232 self.__patch_ends(ends, end)

210 end.set_action(action)	233 end.set_action(action)

211 return (start, [end])	234 return (start, [end])

212	235

213 def __continue(self, graph):	236 def __continue(self, graph):

214 (start, ends) = self.__process(graph[1])	237 (start, ends) = self.__process(graph[1])

215 state = self.__peek_state()	238 state = self.__peek_state()

216 if not state['start_node']:	239 if not state['start_node']:

217 state['start_node'] = self.__new_state()	240 state['start_node'] = self.__new_state()

218 end = self.__new_state()	241 self.__patch_ends(ends, state['start_node'])

219 self.__patch_ends(ends, end)	242 return (start, [])

220 ends = [end]

221 end.add_epsilon_transition(state['start_node'])

222 return (start, ends)

223	243

224 def __break(self, graph):	244 def __catch_all(self, graph):

225 (start, ends) = self.__process(graph[1])	245 return self.__key_state(TransitionKey.unique('catch_all'))

226 self.__peek_state()['unpatched_ends'] += ends

227 new_end = self.__new_state()

228 for end in ends:

229 end.add_epsilon_transition(new_end)

230 return (start, [new_end])

231	246

232 def __join(self, graph):	247 def __join(self, graph):

233 (graph, name, subgraph, modifier) = graph[1:]	248 (graph, name, subgraph, modifier) = graph[1:]

234 subgraphs = self.__peek_state()['subgraphs']	249 subgraphs = self.__peek_state()['subgraphs']

235 if not name in subgraphs:	250 if not name in subgraphs:

236 subgraphs[name] = self.__nfa(subgraph)	251 subgraphs[name] = self.__nfa(subgraph)

237 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]	252 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]

238 (start, ends) = self.__process(graph)	253 (start, ends) = self.__process(graph)

239 if modifier:	254 if modifier:

240 assert modifier == 'ZERO_OR_MORE'	255 assert modifier == 'ZERO_OR_MORE'

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296	311

297 @staticmethod	312 @staticmethod

298 def add_action(graph, action):	313 def add_action(graph, action):

299 return ('ACTION', graph, action)	314 return ('ACTION', graph, action)

300	315

301 @staticmethod	316 @staticmethod

302 def add_continue(graph):	317 def add_continue(graph):

303 return ('CONTINUE', graph)	318 return ('CONTINUE', graph)

304	319

305 @staticmethod	320 @staticmethod

306 def add_break(graph):	321 def catch_all():

307 return ('BREAK', graph)	322 return ('CATCH_ALL',)

	323

	324 @staticmethod

	325 def epsilon():

	326 return ('EPSILON',)

308	327

309 @staticmethod	328 @staticmethod

310 def join_subgraph(graph, name, subgraph, modifier):	329 def join_subgraph(graph, name, subgraph, modifier):

311 if modifier:	330 if modifier:

312 modifier = NfaBuilder.__modifer_map[modifier]	331 modifier = NfaBuilder.__modifer_map[modifier]

313 return ('JOIN', graph, name, subgraph, modifier)	332 return ('JOIN', graph, name, subgraph, modifier)

314	333

315 @staticmethod	334 @staticmethod

316 def or_graphs(graphs):	335 def or_graphs(graphs):

317 return reduce(lambda acc, g: ('OR', acc, g), graphs)	336 return reduce(lambda acc, g: ('OR', acc, g), graphs)

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
406 match_states = set()	425 match_states = set()

407 f = lambda acc, state: acc \| state.key_matches(key)	426 f = lambda acc, state: acc \| state.key_matches(key)

408 for state in reduce(f, nfa_state_set, set()):	427 for state in reduce(f, nfa_state_set, set()):

409 match_states.add(state)	428 match_states.add(state)

410 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)	429 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)

411 dfa_nodes[name]['transitions'][key] = transition_state	430 dfa_nodes[name]['transitions'][key] = transition_state

412 return name	431 return name

413	432

414 def compute_dfa(self):	433 def compute_dfa(self):

415 self.__compute_epsilon_closures()	434 self.__compute_epsilon_closures()

	435 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None)

416 dfa_nodes = {}	436 dfa_nodes = {}

417 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)	437 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)

418 return (start_name, dfa_nodes)	438 return (start_name, dfa_nodes)

419	439

420 def to_dot(self):	440 def to_dot(self):

421 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)	441 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)

422 state_iterator = lambda x : x	442 state_iterator = lambda x : x

423 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)	443 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)

OLD	NEW

« src/lexer/lexer_py.re ('K') | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »