tools/lexer_generator/generator.py - Issue 68343004: Experimental parser: better actions

Side by Side Diff: tools/lexer_generator/generator.py

Issue 68343004: Experimental parser: better actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
83 loads.append(load_template % ("dfa [%s]" % name, dfa_i))	83 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))	84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

85 return file_template % body	85 return file_template % body

86	86

87 def process_rules(self, parser_state):	87 def process_rules(self, parser_state):

88 rule_map = {}	88 rule_map = {}

89 builder = NfaBuilder()	89 builder = NfaBuilder()

90 builder.set_character_classes(parser_state.character_classes)	90 builder.set_character_classes(parser_state.character_classes)

91 assert 'default' in parser_state.rules	91 assert 'default' in parser_state.rules

92 def process(k, v):	92 def process(k, v):

93 assert 'default' in v

94 graphs = []	93 graphs = []

95 for (graph, action) in v['regex']:	94 for (graph, (precedence, code, transition)) in v['regex']:

96 (precedence, code, transition) = action	95 default_code = v['default_action']

97 if code:	96 action = code if code else default_code

98 graph = NfaBuilder.add_action(graph, (precedence, code, None))	97 if action:

	98 graph = NfaBuilder.add_action(graph, (precedence, action))

99 if transition == 'continue':	99 if transition == 'continue':

100 if not v['default'][1][2] == 'continue':	100 assert not k == 'default'

101 graph = NfaBuilder.add_continue(graph)	101 graph = NfaBuilder.add_continue(graph)

102 else:	102 elif transition == 'break':

103 pass # TODO null key	103 pass

104 elif (transition == 'break' or	104 elif (transition == 'terminate' or

105 transition == 'terminate' or

106 transition == 'terminate_illegal'):	105 transition == 'terminate_illegal'):

107 graph = NfaBuilder.add_action(graph, (10000, transition, None))	106 assert not code

	107 graph = NfaBuilder.add_action(graph, (-1, transition))

108 else:	108 else:

109 assert k == 'default'	109 assert k == 'default'

110 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transitio n])	110 subgraph_modifier = '*' if code else None

	111 graph = NfaBuilder.join_subgraph(

	112 graph, transition, rule_map[transition], subgraph_modifier)

111 graphs.append(graph)	113 graphs.append(graph)

112 graph = NfaBuilder.or_graphs(graphs)	114 graph = NfaBuilder.or_graphs(graphs)

113 # merge default action

114 (precedence, code, transition) = v['default'][1]

115 assert transition == 'continue' or transition == 'break'

116 if transition == 'continue':

117 assert k != 'default'

118 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))

119 if code:

120 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))

121 rule_map[k] = graph	115 rule_map[k] = graph

	116 # process first the subgraphs, then the default graph

122 for k, v in parser_state.rules.items():	117 for k, v in parser_state.rules.items():

123 if k == 'default': continue	118 if k == 'default': continue

124 process(k, v)	119 process(k, v)

125 process('default', parser_state.rules['default'])	120 process('default', parser_state.rules['default'])

	121 # build the automata

126 for rule_name, graph in rule_map.items():	122 for rule_name, graph in rule_map.items():

127 nfa = builder.nfa(graph)	123 nfa = builder.nfa(graph)

128 (start, dfa_nodes) = nfa.compute_dfa()	124 (start, dfa_nodes) = nfa.compute_dfa()

129 dfa = Dfa(start, dfa_nodes)	125 dfa = Dfa(start, dfa_nodes)

130 self.__automata[rule_name] = (nfa, dfa)	126 self.__automata[rule_name] = (nfa, dfa)

131	127

132 # Lexes strings with the help of DFAs procuded by the grammar. For sanity	128 # Lexes strings with the help of DFAs procuded by the grammar. For sanity

133 # checking the automata.	129 # checking the automata.

134 def lex(self, string):	130 def lex(self, string):

135 (nfa, dfa) = self.__automata['default'] # FIXME	131 (nfa, dfa) = self.__automata['default']

136	132 return dfa.lex(string)

137 action_stream = []

138 terminate_seen = False

139 offset = 0

140 while not terminate_seen and string:

141 result = list(dfa.lex(string))

142 last_position = 0

143 for (action, position) in result:

144 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))

145 last_position = position

146 if action[2] == 'terminate':

147 terminate_seen = True

148 string = string[(last_position + 1):]

149 offset += last_position

150 return action_stream

151	133

152 if __name__ == '__main__':	134 if __name__ == '__main__':

153	135

154 parser = argparse.ArgumentParser()	136 parser = argparse.ArgumentParser()

155 parser.add_argument('--html')	137 parser.add_argument('--html')

156 parser.add_argument('--re', default='src/lexer/lexer_py.re')	138 parser.add_argument('--re', default='src/lexer/lexer_py.re')

157 parser.add_argument('--input')	139 parser.add_argument('--input')

158 args = parser.parse_args()	140 args = parser.parse_args()

159	141

160 re_file = args.re	142 re_file = args.re

161 parser_state = RuleParserState()	143 parser_state = RuleParserState()

162 print "parsing %s" % re_file	144 print "parsing %s" % re_file

163 with open(re_file, 'r') as f:	145 with open(re_file, 'r') as f:

164 generator = Generator(f.read())	146 generator = Generator(f.read())

165	147

166 html_file = args.html	148 html_file = args.html

167 if html_file:	149 if html_file:

168 html = generator.generate_html()	150 html = generator.generate_html()

169 with open(args.html, 'w') as f:	151 with open(args.html, 'w') as f:

170 f.write(html)	152 f.write(html)

171 print "wrote html to %s" % html_file	153 print "wrote html to %s" % html_file

172	154

173 input_file = args.input	155 input_file = args.input

174 if input_file:	156 if input_file:

175 with open(input_file, 'r') as f:	157 with open(input_file, 'r') as f:

176 input_text = f.read() + '\0'	158 input_text = f.read() + '\0'

177 for t in generator.lex(input_text):	159 for t in generator.lex(input_text):

178 print t	160 print t

OLD	NEW

« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »