Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: tools/lexer_generator/generator.py

Issue 68683007: Parser generator: more layering refactoring (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 11 matching lines...) Expand all
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import argparse 28 import argparse
29 from nfa import Nfa 29 from nfa import Nfa
30 from nfa_builder import NfaBuilder 30 from nfa_builder import NfaBuilder
31 from dfa import Dfa 31 from dfa import Dfa
32 from rule_parser import RuleParser, RuleParserState 32 from rule_parser import RuleParser, RuleParserState, RuleProcessor
33 from code_generator import CodeGenerator 33 from code_generator import CodeGenerator
34 34
35 file_template = ''' 35 file_template = '''
36 <html> 36 <html>
37 <head> 37 <head>
38 <script src="viz.js"></script> 38 <script src="viz.js"></script>
39 <script> 39 <script>
40 function draw(name, id) { 40 function draw(name, id) {
41 code = document.getElementById(id).innerHTML 41 code = document.getElementById(id).innerHTML
42 document.body.innerHTML += "<h1>" + name + "</h1>"; 42 document.body.innerHTML += "<h1>" + name + "</h1>";
(...skipping 14 matching lines...) Expand all
57 %s 57 %s
58 </script> 58 </script>
59 ''' 59 '''
60 60
61 load_template = ''' draw('%s', '%s');''' 61 load_template = ''' draw('%s', '%s');'''
62 62
63 load_outer_template = ''' <script> 63 load_outer_template = ''' <script>
64 %s 64 %s
65 </script>''' 65 </script>'''
66 66
67 class Generator(object): 67 def generate_html(rule_processor):
68 scripts = []
69 loads = []
70 for i, (name, (nfa, dfa)) in enumerate(list(rule_processor.automata_iter())):
71 if name == 'Normal': continue
72 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
73 scripts.append(script_template % (nfa_i, nfa.to_dot()))
74 scripts.append(script_template % (dfa_i, dfa.to_dot()))
75 loads.append(load_template % ("nfa [%s]" % name, nfa_i))
76 loads.append(load_template % ("dfa [%s]" % name, dfa_i))
77 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
78 return file_template % body
68 79
69 def __init__(self, rules): 80 def generate_code(rule_processor):
70 parser_state = RuleParserState() 81 (nfa, dfa) = rule_processor.default_automata()
71 RuleParser.parse(rules, parser_state) 82 return CodeGenerator.dfa_to_code(dfa)
72 self.__automata = {}
73 self.process_rules(parser_state)
74 83
75 def generate_html(self): 84 def lex(rule_processor, string):
76 scripts = [] 85 for t in rule_processor.lex(string + '\0'):
77 loads = [] 86 print t
78 for i, name in enumerate(self.__automata):
79 (nfa, dfa) = self.__automata[name]
80 if name == 'Normal': continue
81 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
82 scripts.append(script_template % (nfa_i, nfa.to_dot()))
83 scripts.append(script_template % (dfa_i, dfa.to_dot()))
84 loads.append(load_template % ("nfa [%s]" % name, nfa_i))
85 loads.append(load_template % ("dfa [%s]" % name, dfa_i))
86 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
87 return file_template % body
88
89 def process_rules(self, parser_state):
90 rule_map = {}
91 builder = NfaBuilder()
92 builder.set_character_classes(parser_state.character_classes)
93 assert 'default' in parser_state.rules
94 def process(k, v):
95 graphs = []
96 continues = 0
97 for (graph, (precedence, code, transition)) in v['regex']:
98 default_code = v['default_action']
99 action = code if code else default_code
100 if action:
101 graph = NfaBuilder.add_action(graph, (precedence, action))
102 if not transition or transition == 'break':
103 pass
104 elif transition == 'continue':
105 assert not k == 'default'
106 continues += 1
107 graph = NfaBuilder.add_continue(graph)
108 elif (transition == 'terminate' or
109 transition == 'terminate_illegal'):
110 assert not code
111 graph = NfaBuilder.add_action(graph, (-1, transition))
112 else:
113 assert k == 'default'
114 subgraph_modifier = '*' if code else None
115 graph = NfaBuilder.join_subgraph(
116 graph, transition, rule_map[transition], subgraph_modifier)
117 graphs.append(graph)
118 if continues == len(graphs):
119 graphs.append(NfaBuilder.epsilon())
120 if v['catch_all']:
121 assert v['catch_all'] == 'continue'
122 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
123 graph = NfaBuilder.or_graphs(graphs)
124 rule_map[k] = graph
125 # process first the subgraphs, then the default graph
126 for k, v in parser_state.rules.items():
127 if k == 'default': continue
128 process(k, v)
129 process('default', parser_state.rules['default'])
130 # build the automata
131 for rule_name, graph in rule_map.items():
132 nfa = builder.nfa(graph)
133 (start, dfa_nodes) = nfa.compute_dfa()
134 dfa = Dfa(start, dfa_nodes)
135 self.__automata[rule_name] = (nfa, dfa)
136
137 # Lexes strings with the help of DFAs procuded by the grammar. For sanity
138 # checking the automata.
139 def lex(self, string):
140 (nfa, dfa) = self.__automata['default']
141 return dfa.lex(string)
142
143 def generate_code(self):
144 (nfa, dfa) = self.__automata['default']
145 return CodeGenerator.dfa_to_code(dfa)
146 87
147 if __name__ == '__main__': 88 if __name__ == '__main__':
148 89
149 parser = argparse.ArgumentParser() 90 parser = argparse.ArgumentParser()
150 parser.add_argument('--html') 91 parser.add_argument('--html')
151 parser.add_argument('--re', default='src/lexer/lexer_py.re') 92 parser.add_argument('--re', default='src/lexer/lexer_py.re')
152 parser.add_argument('--input') 93 parser.add_argument('--input')
153 parser.add_argument('--code') 94 parser.add_argument('--code')
154 args = parser.parse_args() 95 args = parser.parse_args()
155 96
156 re_file = args.re 97 re_file = args.re
157 parser_state = RuleParserState()
158 print "parsing %s" % re_file 98 print "parsing %s" % re_file
159 with open(re_file, 'r') as f: 99 with open(re_file, 'r') as f:
160 generator = Generator(f.read()) 100 rule_processor = RuleProcessor.parse(f.read())
161 101
162 html_file = args.html 102 html_file = args.html
163 if html_file: 103 if html_file:
164 html = generator.generate_html() 104 html = generate_html(rule_processor)
165 with open(args.html, 'w') as f: 105 with open(args.html, 'w') as f:
166 f.write(html) 106 f.write(html)
167 print "wrote html to %s" % html_file 107 print "wrote html to %s" % html_file
168 108
169 code_file = args.code 109 code_file = args.code
170 if code_file: 110 if code_file:
171 code = generator.generate_code() 111 code = generate_code(rule_processor)
172 with open(code_file, 'w') as f: 112 with open(code_file, 'w') as f:
173 f.write(code) 113 f.write(code)
174 print "wrote code to %s" % code_file 114 print "wrote code to %s" % code_file
175 115
176 input_file = args.input 116 input_file = args.input
177 if input_file: 117 if input_file:
178 with open(input_file, 'r') as f: 118 with open(input_file, 'r') as f:
179 input_text = f.read() + '\0' 119 lex(rule_processor, f.read())
180 for t in generator.lex(input_text):
181 print t
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698