tools/lexer_generator/nfa.py - Issue 68343004: Experimental parser: better actions

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 68343004: Experimental parser: better actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 19 matching lines...) Expand all Loading...
30 from automaton import *	30 from automaton import *

31 from inspect import getmembers	31 from inspect import getmembers

32	32

33 class NfaState(AutomatonState):	33 class NfaState(AutomatonState):

34	34

35 def __init__(self, node_number):	35 def __init__(self, node_number):

36 super(NfaState, self).__init__(node_number)	36 super(NfaState, self).__init__(node_number)

37 self.__transitions = {}	37 self.__transitions = {}

38 self.__unclosed = set()	38 self.__unclosed = set()

39 self.__epsilon_closure = None	39 self.__epsilon_closure = None

40 self.__transition_action = None	40 self.__action = None

41	41

42 def epsilon_closure(self):	42 def epsilon_closure(self):

43 return self.__epsilon_closure	43 return self.__epsilon_closure

44	44

45 def set_epsilon_closure(self, closure):	45 def set_epsilon_closure(self, closure):

46 assert self.is_closed()	46 assert self.is_closed()

47 assert self.__epsilon_closure == None	47 assert self.__epsilon_closure == None

48 self.__epsilon_closure = frozenset(closure)	48 self.__epsilon_closure = frozenset(closure)

49	49

50 def set_transition_action(self, action):	50 def action(self):

	51 assert self.is_closed()

	52 return self.__action

	53

	54 def set_action(self, action):

51 assert not self.is_closed()	55 assert not self.is_closed()

52 assert self.__transition_action == None	56 assert not self.__action

53 self.__transition_action = action	57 self.__action = action

54	58

55 def transitions(self):	59 def transitions(self):

56 assert self.is_closed()	60 assert self.is_closed()

57 return self.__transitions	61 return self.__transitions

58	62

59 def next_states(self, key_filter):	63 def next_states(self, key_filter):

60 assert self.is_closed()	64 assert self.is_closed()

61 first = lambda v: [x[0] for x in v]	65 f = lambda acc, (k, v) : acc if not key_filter(k) else acc \| set(v)

62 f = lambda acc, (k, v) : acc if not key_filter(k) else acc \| set(first(v))

63 return reduce(f, self.__transitions.items(), set())	66 return reduce(f, self.__transitions.items(), set())

64	67

65 def __add_transition(self, key, action, next_state):	68 def __add_transition(self, key, next_state):

66 if next_state == None:	69 if next_state == None:

67 assert not action

68 assert not self.is_closed(), "already closed"	70 assert not self.is_closed(), "already closed"

69 self.__unclosed.add(key)	71 self.__unclosed.add(key)

70 return	72 return

71 if not key in self.__transitions:	73 if not key in self.__transitions:

72 self.__transitions[key] = set()	74 self.__transitions[key] = set()

73 self.__transitions[key].add((next_state, action))	75 self.__transitions[key].add(next_state)

74	76

75 def add_unclosed_transition(self, key):	77 def add_unclosed_transition(self, key):

76 assert key != TransitionKey.epsilon()	78 assert key != TransitionKey.epsilon()

77 self.__add_transition(key, None, None)	79 self.__add_transition(key, None)

78	80

79 def add_epsilon_transition(self, state):	81 def add_epsilon_transition(self, state):

80 assert state != None	82 assert state != None

81 self.__add_transition(TransitionKey.epsilon(), None, state)	83 self.__add_transition(TransitionKey.epsilon(), state)

82	84

83 def is_closed(self):	85 def is_closed(self):

84 return self.__unclosed == None	86 return self.__unclosed == None

85	87

86 def close(self, end):	88 def close(self, end):

87 assert not self.is_closed()	89 assert not self.is_closed()

88 unclosed, self.__unclosed = self.__unclosed, None	90 unclosed, self.__unclosed = self.__unclosed, None

89 action, self.__transition_action = self.__transition_action, None

90 if end == None:	91 if end == None:

91 assert not unclosed and not action	92 assert not unclosed

92 return	93 return

93 for key in unclosed:	94 for key in unclosed:

94 self.__add_transition(key, action, end)	95 self.__add_transition(key, end)

95 if not unclosed:	96 if not unclosed:

96 self.__add_transition(TransitionKey.epsilon(), action, end)	97 self.__add_transition(TransitionKey.epsilon(), end)

97	98

98 def __matches(self, match_func, value):	99 def __matches(self, match_func, value):

99 f = lambda acc, (k, vs): acc \| vs if match_func(k, value) else acc	100 f = lambda acc, (k, vs): acc \| vs if match_func(k, value) else acc

100 return reduce(f, self.__transitions.items(), set())	101 return reduce(f, self.__transitions.items(), set())

101	102

102 def char_matches(self, value):	103 def char_matches(self, value):

103 return self.__matches(lambda k, v : k.matches_char(v), value)	104 return self.__matches(lambda k, v : k.matches_char(v), value)

104	105

105 def key_matches(self, value):	106 def key_matches(self, value):

106 return self.__matches(lambda k, v : k.matches_key(v), value)	107 return self.__matches(lambda k, v : k.matches_key(v), value)

107	108

108 def __str__(self):

109 return "NfaState(" + str(self.node_number()) + ")"

110

111 @staticmethod	109 @staticmethod

112 def gather_transition_keys(state_set):	110 def gather_transition_keys(state_set):

113 f = lambda acc, state: acc \| set(state.__transitions.keys())	111 f = lambda acc, state: acc \| set(state.__transitions.keys())

114 return TransitionKey.disjoint_keys(reduce(f, state_set, set()))	112 return TransitionKey.disjoint_keys(reduce(f, state_set, set()))

115	113

116 class NfaBuilder:	114 class NfaBuilder:

117	115

118 def __init__(self):	116 def __init__(self):

119 self.__node_number = 0	117 self.__node_number = 0

120 self.__operation_map = {}	118 self.__operation_map = {}

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
198 TransitionKey.character_class(graph, self.__character_classes))	196 TransitionKey.character_class(graph, self.__character_classes))

199	197

200 def __not_class(self, graph):	198 def __not_class(self, graph):

201 return self.__key_state(	199 return self.__key_state(

202 TransitionKey.character_class(graph, self.__character_classes))	200 TransitionKey.character_class(graph, self.__character_classes))

203	201

204 def __any(self, graph):	202 def __any(self, graph):

205 return self.__key_state(TransitionKey.any())	203 return self.__key_state(TransitionKey.any())

206	204

207 def __action(self, graph):	205 def __action(self, graph):

208 incoming = graph[3]

209 (start, ends) = self.__process(graph[1])	206 (start, ends) = self.__process(graph[1])

210 action = graph[2]	207 action = graph[2]

211 if incoming:	208 end = self.__new_state()

212 new_start = self.__new_state()	209 self.__patch_ends(ends, end)

213 new_start.set_transition_action(action)	210 end.set_action(action)

214 new_start.close(start)	211 return (start, [end])

215 start = new_start

216 else:

217 new_end = self.__new_state()

218 for end in ends:

219 end.set_transition_action(action)

220 self.__patch_ends(ends, new_end)

221 ends = [new_end]

222 return (start, ends)

223	212

224 def __continue(self, graph):	213 def __continue(self, graph):

225 (start, ends) = self.__process(graph[1])	214 (start, ends) = self.__process(graph[1])

226 state = self.__peek_state()	215 state = self.__peek_state()

227 if not state['start_node']:	216 if not state['start_node']:

228 state['start_node'] = self.__new_state()	217 state['start_node'] = self.__new_state()

229 end = self.__new_state()	218 end = self.__new_state()

230 self.__patch_ends(ends, end)	219 self.__patch_ends(ends, end)

231 ends = [end]	220 ends = [end]

232 end.add_epsilon_transition(state['start_node'])	221 end.add_epsilon_transition(state['start_node'])

233 return (start, ends)	222 return (start, ends)

234	223

235 def __join(self, graph):	224 def __join(self, graph):

236 (graph, name, subgraph) = graph[1:]	225 (graph, name, subgraph, modifier) = graph[1:]

237 subgraphs = self.__peek_state()['subgraphs']	226 subgraphs = self.__peek_state()['subgraphs']

238 if not name in subgraphs:	227 if not name in subgraphs:

239 subgraphs[name] = self.__nfa(subgraph)	228 subgraphs[name] = self.__nfa(subgraph)

240 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]	229 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]

241 (start, ends) = self.__process(graph)	230 (start, ends) = self.__process(graph)

	231 if modifier:

	232 assert modifier == 'ZERO_OR_MORE'

	233 for end in ends:

	234 end.add_epsilon_transition(subgraph_end)

242 self.__patch_ends(ends, subgraph_start)	235 self.__patch_ends(ends, subgraph_start)

243 end = self.__new_state()	236 end = self.__new_state()

244 subgraph_end.add_epsilon_transition(end)	237 subgraph_end.add_epsilon_transition(end)

245 return (start, [end])	238 return (start, [end])

246	239

247 def __process(self, graph):	240 def __process(self, graph):

248 assert type(graph) == TupleType	241 assert type(graph) == TupleType

249 method = "_NfaBuilder__" + graph[0].lower()	242 method = "_NfaBuilder__" + graph[0].lower()

250 if not method in self.__operation_map:	243 if not method in self.__operation_map:

251 matches = filter(lambda (name, func): name == method, self.__members)	244 matches = filter(lambda (name, func): name == method, self.__members)

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 self.__patch_ends(ends, end)	276 self.__patch_ends(ends, end)

284 return (start, end, self.__node_number - start_node_number)	277 return (start, end, self.__node_number - start_node_number)

285	278

286 def nfa(self, graph):	279 def nfa(self, graph):

287 (start, end, nodes_created) = self.__nfa(graph)	280 (start, end, nodes_created) = self.__nfa(graph)

288 end.close(None)	281 end.close(None)

289 return Nfa(start, end, nodes_created)	282 return Nfa(start, end, nodes_created)

290	283

291 @staticmethod	284 @staticmethod

292 def add_action(graph, action):	285 def add_action(graph, action):

293 return ('ACTION', graph, action, False)	286 return ('ACTION', graph, action)

294

295 @staticmethod

296 def add_incoming_action(graph, action):

297 return ('ACTION', graph, action, True)

298	287

299 @staticmethod	288 @staticmethod

300 def add_continue(graph):	289 def add_continue(graph):

301 return ('CONTINUE', graph)	290 return ('CONTINUE', graph)

302	291

303 @staticmethod	292 @staticmethod

304 def join_subgraph(graph, name, subgraph):	293 def join_subgraph(graph, name, subgraph, modifier):

305 return ('JOIN', graph, name, subgraph)	294 if modifier:

	295 modifier = NfaBuilder.__modifer_map[modifier]

	296 return ('JOIN', graph, name, subgraph, modifier)

306	297

307 @staticmethod	298 @staticmethod

308 def or_graphs(graphs):	299 def or_graphs(graphs):

309 return reduce(lambda acc, g: ('OR', acc, g), graphs)	300 return reduce(lambda acc, g: ('OR', acc, g), graphs)

310	301

311 @staticmethod	302 @staticmethod

312 def cat_graphs(graphs):	303 def cat_graphs(graphs):

313 return reduce(lambda acc, g: ('CAT', acc, g), graphs)	304 return reduce(lambda acc, g: ('CAT', acc, g), graphs)

314	305

315 __modifer_map = {	306 __modifer_map = {

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
365 return reduce(f, states, set(states))	356 return reduce(f, states, set(states))

366	357

367 def matches(self, string):	358 def matches(self, string):

368 self.__compute_epsilon_closures()	359 self.__compute_epsilon_closures()

369 valid_states = Nfa.__close(set([self.__start]))	360 valid_states = Nfa.__close(set([self.__start]))

370 for c in string:	361 for c in string:

371 f = lambda acc, state: acc \| state.char_matches(c)	362 f = lambda acc, state: acc \| state.char_matches(c)

372 transitions = reduce(f, valid_states, set())	363 transitions = reduce(f, valid_states, set())

373 if not transitions:	364 if not transitions:

374 return False	365 return False

375 valid_states = Nfa.__close(set([x[0] for x in transitions]))	366 valid_states = Nfa.__close(transitions)

376 return self.__end in valid_states	367 return self.__end in valid_states

377	368

378 @staticmethod	369 @staticmethod

379 def __to_dfa(nfa_state_set, dfa_nodes, end_node):	370 def __to_dfa(nfa_state_set, dfa_nodes, end_node):

380 nfa_state_set = Nfa.__close(nfa_state_set)	371 nfa_state_set = Nfa.__close(nfa_state_set)

381 assert nfa_state_set	372 assert nfa_state_set

382 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))	373 name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))

383 if name in dfa_nodes:	374 if name in dfa_nodes:

384 return name	375 return name

	376 def gather_actions(states):

	377 actions = set()

	378 for state in states:

	379 if state.action():

	380 actions.add(state.action())

	381 actions = list(actions)

	382 actions.sort()

	383 return actions

385 dfa_nodes[name] = {	384 dfa_nodes[name] = {

386 'transitions': {},	385 'transitions': {},

387 'terminal': end_node in nfa_state_set}	386 'terminal': end_node in nfa_state_set,

	387 'actions' : gather_actions(nfa_state_set)}

388 for key in NfaState.gather_transition_keys(nfa_state_set):	388 for key in NfaState.gather_transition_keys(nfa_state_set):

	389 match_states = set()

389 f = lambda acc, state: acc \| state.key_matches(key)	390 f = lambda acc, state: acc \| state.key_matches(key)

390 transitions = reduce(f, nfa_state_set, set())	391 for state in reduce(f, nfa_state_set, set()):

391 match_states = set()

392 actions = []

393 for (state, action) in transitions:

394 match_states.add(state)	392 match_states.add(state)

395 if action:

396 actions.append(action)

397

398 # Pull in actions which can be taken with an epsilon transition from the

399 # match state.

400 e = TransitionKey.epsilon()

401 if e in state.transitions():

402 for e_trans in state.transitions()[e]:

403 if e_trans[1]:

404 actions.append(e_trans[1])

405 for s in state.epsilon_closure():

406 if e in s.transitions():

407 for e_trans in s.transitions()[e]:

408 if e_trans[1]:

409 actions.append(e_trans[1])

410

411 assert len(match_states) == len(transitions)

412

413 actions.sort()

414 action = actions[0] if actions else None

415 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)	393 transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)

416 dfa_nodes[name]['transitions'][key] = (transition_state, action)	394 dfa_nodes[name]['transitions'][key] = transition_state

417 return name	395 return name

418	396

419 def compute_dfa(self):	397 def compute_dfa(self):

420 self.__compute_epsilon_closures()	398 self.__compute_epsilon_closures()

421 dfa_nodes = {}	399 dfa_nodes = {}

422 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)	400 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)

423 return (start_name, dfa_nodes)	401 return (start_name, dfa_nodes)

424	402

425 def to_dot(self):	403 def to_dot(self):

426 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)	404 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)

427 return self.generate_dot(self.__start, set([self.__end]), iterator)	405 state_iterator = lambda x : x

	406 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/lexer_test.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »