tools/lexer_generator/generator.py - Issue 68683007: Parser generator: more layering refactoring

Side by Side Diff: tools/lexer_generator/generator.py

Issue 68683007: Parser generator: more layering refactoring (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 11 matching lines...) Expand all Loading...
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import argparse	28 import argparse

29 from nfa import Nfa	29 from nfa import Nfa

30 from nfa_builder import NfaBuilder	30 from nfa_builder import NfaBuilder

31 from dfa import Dfa	31 from dfa import Dfa

32 from rule_parser import RuleParser, RuleParserState	32 from rule_parser import RuleParser, RuleParserState, RuleProcessor

33 from code_generator import CodeGenerator	33 from code_generator import CodeGenerator

34	34

35 file_template = '''	35 file_template = '''

36 <html>	36 <html>

37 <head>	37 <head>

38 <script src="viz.js"></script>	38 <script src="viz.js"></script>

39 <script>	39 <script>

40 function draw(name, id) {	40 function draw(name, id) {

41 code = document.getElementById(id).innerHTML	41 code = document.getElementById(id).innerHTML

42 document.body.innerHTML += "<h1>" + name + "</h1>";	42 document.body.innerHTML += "<h1>" + name + "</h1>";

(...skipping 14 matching lines...) Expand all Loading...
57 %s	57 %s

58 </script>	58 </script>

59 '''	59 '''

60	60

61 load_template = ''' draw('%s', '%s');'''	61 load_template = ''' draw('%s', '%s');'''

62	62

63 load_outer_template = ''' <script>	63 load_outer_template = ''' <script>

64 %s	64 %s

65 </script>'''	65 </script>'''

66	66

67 class Generator(object):	67 def generate_html(rule_processor):

	68 scripts = []

	69 loads = []

	70 for i, (name, (nfa, dfa)) in enumerate(list(rule_processor.automata_iter())):

	71 if name == 'Normal': continue

	72 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)

	73 scripts.append(script_template % (nfa_i, nfa.to_dot()))

	74 scripts.append(script_template % (dfa_i, dfa.to_dot()))

	75 loads.append(load_template % ("nfa [%s]" % name, nfa_i))

	76 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

	77 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

	78 return file_template % body

68	79

69 def __init__(self, rules):	80 def generate_code(rule_processor):

70 parser_state = RuleParserState()	81 (nfa, dfa) = rule_processor.default_automata()

71 RuleParser.parse(rules, parser_state)	82 return CodeGenerator.dfa_to_code(dfa)

72 self.__automata = {}

73 self.process_rules(parser_state)

74	83

75 def generate_html(self):	84 def lex(rule_processor, string):

76 scripts = []	85 for t in rule_processor.lex(string + '\0'):

77 loads = []	86 print t

78 for i, name in enumerate(self.__automata):

79 (nfa, dfa) = self.__automata[name]

80 if name == 'Normal': continue

81 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)

82 scripts.append(script_template % (nfa_i, nfa.to_dot()))

83 scripts.append(script_template % (dfa_i, dfa.to_dot()))

84 loads.append(load_template % ("nfa [%s]" % name, nfa_i))

85 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

86 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

87 return file_template % body

88

89 def process_rules(self, parser_state):

90 rule_map = {}

91 builder = NfaBuilder()

92 builder.set_character_classes(parser_state.character_classes)

93 assert 'default' in parser_state.rules

94 def process(k, v):

95 graphs = []

96 continues = 0

97 for (graph, (precedence, code, transition)) in v['regex']:

98 default_code = v['default_action']

99 action = code if code else default_code

100 if action:

101 graph = NfaBuilder.add_action(graph, (precedence, action))

102 if not transition or transition == 'break':

103 pass

104 elif transition == 'continue':

105 assert not k == 'default'

106 continues += 1

107 graph = NfaBuilder.add_continue(graph)

108 elif (transition == 'terminate' or

109 transition == 'terminate_illegal'):

110 assert not code

111 graph = NfaBuilder.add_action(graph, (-1, transition))

112 else:

113 assert k == 'default'

114 subgraph_modifier = '*' if code else None

115 graph = NfaBuilder.join_subgraph(

116 graph, transition, rule_map[transition], subgraph_modifier)

117 graphs.append(graph)

118 if continues == len(graphs):

119 graphs.append(NfaBuilder.epsilon())

120 if v['catch_all']:

121 assert v['catch_all'] == 'continue'

122 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))

123 graph = NfaBuilder.or_graphs(graphs)

124 rule_map[k] = graph

125 # process first the subgraphs, then the default graph

126 for k, v in parser_state.rules.items():

127 if k == 'default': continue

128 process(k, v)

129 process('default', parser_state.rules['default'])

130 # build the automata

131 for rule_name, graph in rule_map.items():

132 nfa = builder.nfa(graph)

133 (start, dfa_nodes) = nfa.compute_dfa()

134 dfa = Dfa(start, dfa_nodes)

135 self.__automata[rule_name] = (nfa, dfa)

136

137 # Lexes strings with the help of DFAs procuded by the grammar. For sanity

138 # checking the automata.

139 def lex(self, string):

140 (nfa, dfa) = self.__automata['default']

141 return dfa.lex(string)

142

143 def generate_code(self):

144 (nfa, dfa) = self.__automata['default']

145 return CodeGenerator.dfa_to_code(dfa)

146	87

147 if __name__ == '__main__':	88 if __name__ == '__main__':

148	89

149 parser = argparse.ArgumentParser()	90 parser = argparse.ArgumentParser()

150 parser.add_argument('--html')	91 parser.add_argument('--html')

151 parser.add_argument('--re', default='src/lexer/lexer_py.re')	92 parser.add_argument('--re', default='src/lexer/lexer_py.re')

152 parser.add_argument('--input')	93 parser.add_argument('--input')

153 parser.add_argument('--code')	94 parser.add_argument('--code')

154 args = parser.parse_args()	95 args = parser.parse_args()

155	96

156 re_file = args.re	97 re_file = args.re

157 parser_state = RuleParserState()

158 print "parsing %s" % re_file	98 print "parsing %s" % re_file

159 with open(re_file, 'r') as f:	99 with open(re_file, 'r') as f:

160 generator = Generator(f.read())	100 rule_processor = RuleProcessor.parse(f.read())

161	101

162 html_file = args.html	102 html_file = args.html

163 if html_file:	103 if html_file:

164 html = generator.generate_html()	104 html = generate_html(rule_processor)

165 with open(args.html, 'w') as f:	105 with open(args.html, 'w') as f:

166 f.write(html)	106 f.write(html)

167 print "wrote html to %s" % html_file	107 print "wrote html to %s" % html_file

168	108

169 code_file = args.code	109 code_file = args.code

170 if code_file:	110 if code_file:

171 code = generator.generate_code()	111 code = generate_code(rule_processor)

172 with open(code_file, 'w') as f:	112 with open(code_file, 'w') as f:

173 f.write(code)	113 f.write(code)

174 print "wrote code to %s" % code_file	114 print "wrote code to %s" % code_file

175	115

176 input_file = args.input	116 input_file = args.input

177 if input_file:	117 if input_file:

178 with open(input_file, 'r') as f:	118 with open(input_file, 'r') as f:

179 input_text = f.read() + '\0'	119 lex(rule_processor, f.read())

180 for t in generator.lex(input_text):

181 print t

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »