tools/lexer_generator/dfa.py - Issue 68343004: Experimental parser: better actions

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 68343004: Experimental parser: better actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 13 matching lines...) Expand all Loading...
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 from automaton import *	28 from automaton import *

29 from nfa import Nfa	29 from nfa import Nfa

30 from transition_keys import TransitionKey	30 from transition_keys import TransitionKey

31	31

32 class DfaState(AutomatonState):	32 class DfaState(AutomatonState):

33	33

34 def __init__(self, name, node_number):	34 def __init__(self, name, node_number, actions):

35 super(DfaState, self).__init__(node_number)	35 super(DfaState, self).__init__(node_number)

36 self.__name = name	36 self.__name = name

37 self.__transitions = {}	37 self.__transitions = {}

	38 self.__actions = actions

38	39

39 def name(self):	40 def name(self):

40 return self.__name	41 return self.__name

41	42

42 def add_transition(self, key, action, state):	43 def action(self):

	44 return self.__actions[0] if self.__actions else None

	45

	46 def actions(self):

	47 return self.__actions

	48

	49 def add_transition(self, key, state):

43 assert not self.__transitions.has_key(key)	50 assert not self.__transitions.has_key(key)

44 self.__transitions[key] = (state, action)	51 self.__transitions[key] = state

45	52

46 def transitions(self):	53 def transitions(self):

47 return self.__transitions	54 return self.__transitions

48	55

49 def __str__(self):

50 return str(self.node_number())

51

52 class Dfa(Automaton):	56 class Dfa(Automaton):

53	57

54 def __init__(self, start_name, mapping):	58 def __init__(self, start_name, mapping):

55 super(Dfa, self).__init__()	59 super(Dfa, self).__init__()

56 self.__terminal_set = set()	60 self.__terminal_set = set()

57 name_map = {}	61 name_map = {}

58 action_map = {}	62 for i, (name, node_data) in enumerate(mapping.items()):

59 for i, name in enumerate(mapping.keys()):	63 node = DfaState(name, i, node_data['actions'])

60 name_map[name] = DfaState(name, i)	64 name_map[name] = node

	65 if node_data['terminal']:

	66 self.__terminal_set.add(node)

61 for name, node_data in mapping.items():	67 for name, node_data in mapping.items():

62 node = name_map[name]	68 node = name_map[name]

63 if node_data['terminal']:

64 self.__terminal_set.add(node)

65 inversion = {}	69 inversion = {}

66 for key, (state, action) in node_data['transitions'].items():	70 for key, state in node_data['transitions'].items():

67 if not state in inversion:	71 if not state in inversion:

68 inversion[state] = {}	72 inversion[state] = []

69 # TODO fix this	73 inversion[state].append(key)

70 action_key = str(action)	74 for state, keys in inversion.items():

71 if not action_key in action_map:	75 merged_key = TransitionKey.merged_key(keys)

72 action_map[action_key] = action	76 node.add_transition(merged_key, name_map[state])

73 if not action_key in inversion[state]:

74 inversion[state][action_key] = []

75 inversion[state][action_key].append(key)

76 for state, values in inversion.items():

77 for action_key, keys in values.items():

78 merged_key = TransitionKey.merged_key(keys)

79 action = action_map[action_key]

80 node.add_transition(merged_key, action, name_map[state])

81 self.__start = name_map[start_name]	77 self.__start = name_map[start_name]

82 assert self.__terminal_set	78 assert self.__terminal_set

83	79

	80 @staticmethod

	81 def __match_char(state, char):

	82 match = [s for k, s in state.transitions().items() if k.matches_char(char)]

	83 if not match: return None

	84 assert len(match) == 1

	85 return match[0]

	86

84 def collect_actions(self, string):	87 def collect_actions(self, string):

85 state = self.__start	88 state = self.__start

86 for c in string:	89 for c in string:

87 next = [s for k, s in state.transitions().items() if k.matches_char(c)]	90 state = Dfa.__match_char(state, c)

88 if not next:	91 if not state:

89 yield ('MISS',)	92 yield ('MISS',)

90 return	93 return

91 assert len(next) == 1	94 if state.action():

92 (state, action) = next[0]	95 yield state.action()

93 if action:

94 yield action

95 if state in self.__terminal_set:	96 if state in self.__terminal_set:

96 yield ('TERMINATE', )	97 yield ('TERMINATE', )

97 else:	98 else:

98 yield ('MISS',)	99 yield ('MISS',)

99	100

100 def matches(self, string):	101 def matches(self, string):

101 actions = list(self.collect_actions(string))	102 actions = list(self.collect_actions(string))

102 return actions and actions[-1][0] == 'TERMINATE'	103 return actions and actions[-1][0] == 'TERMINATE'

103	104

104 def lex(self, string):	105 def lex(self, string):

105 state = self.__start	106 state = self.__start

106 stored_action = None	107 last_position = 0

107 pos = 0	108 for pos, c in enumerate(string):

108 while pos < len(string):	109 next = Dfa.__match_char(state, c)

109 c = string[pos]

110 next = [s for k, s in state.transitions().items() if k.matches_char(c)]

111 if not next:	110 if not next:

112 # Maybe we have a stored action before. Take it and backtrack to the	111 assert state.action() # must invoke default action here

113 # position where that action was.	112 yield (state.action()[1], last_position, pos)

114 if stored_action:	113 last_position = pos

115 yield stored_action	114 # lex next token

116 return	115 next = Dfa.__match_char(self.__start, c)

117 # FIXME: Otherwise, use the default rule if this happens at the start	116 assert next

118 # state of the automaton.	117 state = next

119	118 assert state.action() # must invoke default action here

120 assert len(next) == 1	119 yield (state.action()[1], last_position, len(string))

121 (state, action) = next[0]

122

123 # Special actions: terminate

124 if action and action[2] == 'terminate':

125 if stored_action:

126 yield stored_action

127 yield (action, pos)

128 return

129

130 # Normally we don't know yet whether to take the action - it depends on

131 # what comes next.

132 if action:

133 stored_action = (action, pos)

134

135 pos += 1

136	120

137 def __visit_all_edges(self, visitor, state):	121 def __visit_all_edges(self, visitor, state):

138 edge = set([self.__start])	122 edge = set([self.__start])

139 first = lambda v: set([x[0] for x in v])	123 next_edge = lambda node: set(node.transitions().values())

140 next_edge = lambda node: first(node.transitions().values())

141 return self.visit_edges(edge, next_edge, visitor, state)	124 return self.visit_edges(edge, next_edge, visitor, state)

142	125

143 def to_dot(self):	126 def to_dot(self):

144 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)	127 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)

145 return self.generate_dot(self.__start, self.__terminal_set, iterator)	128 state_iterator = lambda x : [x]

	129 return self.generate_dot(self.__start, self.__terminal_set, iterator, state_ iterator)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/automaton.py ('k') | tools/lexer_generator/generator.py » ('j') | no next file with comments »