Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(438)

Side by Side Diff: tools/lexer_generator/generator.py

Issue 66613002: Experimental lexer generator: Refactoring, merge Lexer and Generator. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/lexer.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 %s 55 %s
56 </script> 56 </script>
57 ''' 57 '''
58 58
59 load_template = ''' draw('%s', '%s');''' 59 load_template = ''' draw('%s', '%s');'''
60 60
61 load_outer_template = ''' <script> 61 load_outer_template = ''' <script>
62 %s 62 %s
63 </script>''' 63 </script>'''
64 64
65 def generate_html(data): 65 class Generator(object):
66 scripts = []
67 loads = []
68 for i, (name, nfa, dfa) in enumerate(data):
69 if name == 'Normal': continue
70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
71 scripts.append(script_template % (nfa_i, nfa.to_dot()))
72 scripts.append(script_template % (dfa_i, dfa.to_dot()))
73 loads.append(load_template % ("nfa [%s]" % name, nfa_i))
74 loads.append(load_template % ("dfa [%s]" % name, dfa_i))
75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
76 return file_template % body
77 66
78 def process_rules(parser_state): 67 def __init__(self, rules):
79 rule_map = {} 68 parser_state = RuleParserState()
80 builder = NfaBuilder() 69 RuleParser.parse(rules, parser_state)
81 builder.set_character_classes(parser_state.character_classes) 70 self.__automata = {}
82 assert 'default' in parser_state.rules 71 self.process_rules(parser_state)
83 def process(k, v): 72
84 assert 'default' in v 73 def generate_html(self):
85 graphs = [] 74 scripts = []
86 for (graph, action) in v['regex']: 75 loads = []
87 (precedence, code, transition) = action 76 for i, name in enumerate(self.__automata):
77 (nfa, dfa) = self.__automata[name]
78 if name == 'Normal': continue
79 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
80 scripts.append(script_template % (nfa_i, nfa.to_dot()))
81 scripts.append(script_template % (dfa_i, dfa.to_dot()))
82 loads.append(load_template % ("nfa [%s]" % name, nfa_i))
83 loads.append(load_template % ("dfa [%s]" % name, dfa_i))
84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
85 return file_template % body
86
87 def process_rules(self, parser_state):
88 rule_map = {}
89 builder = NfaBuilder()
90 builder.set_character_classes(parser_state.character_classes)
91 assert 'default' in parser_state.rules
92 def process(k, v):
93 assert 'default' in v
94 graphs = []
95 for (graph, action) in v['regex']:
96 (precedence, code, transition) = action
97 if code:
98 graph = NfaBuilder.add_action(graph, (precedence, code, None))
99 if transition == 'continue':
100 if not v['default'][1][2] == 'continue':
101 graph = NfaBuilder.add_continue(graph)
102 else:
103 pass # TODO null key
104 elif (transition == 'break' or
105 transition == 'terminate' or
106 transition == 'terminate_illegal'):
107 graph = NfaBuilder.add_action(graph, (10000, transition, None))
108 else:
109 assert k == 'default'
110 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transitio n])
111 graphs.append(graph)
112 graph = NfaBuilder.or_graphs(graphs)
113 # merge default action
114 (precedence, code, transition) = v['default'][1]
115 assert transition == 'continue' or transition == 'break'
116 if transition == 'continue':
117 assert k != 'default'
118 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))
88 if code: 119 if code:
89 graph = NfaBuilder.add_action(graph, (precedence, code, None)) 120 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))
90 if transition == 'continue': 121 rule_map[k] = graph
91 if not v['default'][1][2] == 'continue': 122 for k, v in parser_state.rules.items():
92 graph = NfaBuilder.add_continue(graph) 123 if k == 'default': continue
93 else: 124 process(k, v)
94 pass # TODO null key 125 process('default', parser_state.rules['default'])
95 elif (transition == 'break' or 126 for rule_name, graph in rule_map.items():
96 transition == 'terminate' or 127 nfa = builder.nfa(graph)
97 transition == 'terminate_illegal'): 128 (start, dfa_nodes) = nfa.compute_dfa()
98 graph = NfaBuilder.add_action(graph, (10000, transition, None)) 129 dfa = Dfa(start, dfa_nodes)
99 else: 130 self.__automata[rule_name] = (nfa, dfa)
100 assert k == 'default' 131
101 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition] ) 132 # Lexes strings with the help of DFAs procuded by the grammar. For sanity
102 graphs.append(graph) 133 # checking the automata.
103 graph = NfaBuilder.or_graphs(graphs) 134 def lex(self, string):
104 # merge default action 135 (nfa, dfa) = self.__automata['default'] # FIXME
105 (precedence, code, transition) = v['default'][1] 136
106 assert transition == 'continue' or transition == 'break' 137 action_stream = []
107 if transition == 'continue': 138 terminate_seen = False
108 assert k != 'default' 139 offset = 0
109 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None)) 140 while not terminate_seen and string:
110 if code: 141 result = list(dfa.lex(string))
111 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None)) 142 last_position = 0
112 rule_map[k] = graph 143 for (action, position) in result:
113 for k, v in parser_state.rules.items(): 144 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))
114 if k == 'default': continue 145 last_position = position
115 process(k, v) 146 if action[2] == 'terminate':
116 process('default', parser_state.rules['default']) 147 terminate_seen = True
117 html_data = [] 148 string = string[(last_position + 1):]
118 for rule_name, graph in rule_map.items(): 149 offset += last_position
119 nfa = builder.nfa(graph) 150 return action_stream
120 (start, dfa_nodes) = nfa.compute_dfa()
121 dfa = Dfa(start, dfa_nodes)
122 html_data.append((rule_name, nfa, dfa))
123 return html_data
124 151
125 if __name__ == '__main__': 152 if __name__ == '__main__':
126 153
127 parser = argparse.ArgumentParser() 154 parser = argparse.ArgumentParser()
128 parser.add_argument('--html') 155 parser.add_argument('--html')
129 parser.add_argument('--re', default='src/lexer/lexer_py.re') 156 parser.add_argument('--re', default='src/lexer/lexer_py.re')
157 parser.add_argument('--input')
130 args = parser.parse_args() 158 args = parser.parse_args()
131 159
132 re_file = args.re 160 re_file = args.re
133 parser_state = RuleParserState() 161 parser_state = RuleParserState()
134 print "parsing %s" % re_file 162 print "parsing %s" % re_file
135 with open(re_file, 'r') as f: 163 with open(re_file, 'r') as f:
136 RuleParser.parse(f.read(), parser_state) 164 generator = Generator(f.read())
137 html_data = process_rules(parser_state)
138 165
139 html_file = args.html 166 html_file = args.html
140 if html_file: 167 if html_file:
141 html = generate_html(html_data) 168 html = generator.generate_html()
142 with open(args.html, 'w') as f: 169 with open(args.html, 'w') as f:
143 f.write(html) 170 f.write(html)
144 print "wrote html to %s" % html_file 171 print "wrote html to %s" % html_file
172
173 input_file = args.input
174 if input_file:
175 with open(input_file, 'r') as f:
176 input_text = f.read() + '\0'
177 for t in generator.lex(input_text):
178 print t
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698