Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(246)

Side by Side Diff: tools/lexer_generator/lexer.py

Issue 60663007: Experimental lexer generator: First draft of a Python lexer (based on the automata). (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: . Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 12 matching lines...) Expand all
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import argparse 28 import argparse
29 from nfa import Nfa, NfaBuilder 29 from nfa import Nfa, NfaBuilder
30 from dfa import Dfa 30 from dfa import Dfa
31 from rule_parser import RuleParser, RuleParserState 31 from rule_parser import RuleParser, RuleParserState
32 32
33 file_template = ''' 33 # FIXME: We need to move this to a common place!
34 <html>
35 <head>
36 <script src="viz.js"></script>
37 <script>
38 function draw(name, id) {
39 code = document.getElementById(id).innerHTML
40 document.body.innerHTML += "<h1>" + name + "</h1>";
41 try {
42 document.body.innerHTML += Viz(code, 'svg');
43 } catch(e) {
44 document.body.innerHTML += "<h3>error</h3>";
45 }
46 }
47 </script>
48 </head>
49 <body>
50 %s
51 </body>
52 </html>'''
53
54 script_template = ''' <script type="text/vnd.graphviz" id="%s">
55 %s
56 </script>
57 '''
58
59 load_template = ''' draw('%s', '%s');'''
60
61 load_outer_template = ''' <script>
62 %s
63 </script>'''
64
65 def generate_html(data):
66 scripts = []
67 loads = []
68 for i, (name, nfa, dfa) in enumerate(data):
69 if name == 'Normal': continue
70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
71 scripts.append(script_template % (nfa_i, nfa.to_dot()))
72 scripts.append(script_template % (dfa_i, dfa.to_dot()))
73 loads.append(load_template % ("nfa [%s]" % name, nfa_i))
74 loads.append(load_template % ("dfa [%s]" % name, dfa_i))
75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
76 return file_template % body
77
78 def process_rules(parser_state): 34 def process_rules(parser_state):
79 rule_map = {} 35 dfas = {}
80 builder = NfaBuilder() 36 builder = NfaBuilder()
81 builder.set_character_classes(parser_state.character_classes) 37 builder.set_character_classes(parser_state.character_classes)
82 assert 'default' in parser_state.rules
83 for k, v in parser_state.rules.items(): 38 for k, v in parser_state.rules.items():
84 assert 'default' in v
85 graphs = [] 39 graphs = []
86 for (graph, action) in v['regex']: 40 for (graph, action) in v['regex']:
87 graphs.append(NfaBuilder.add_action(graph, action)) 41 graphs.append(NfaBuilder.add_action(graph, action))
88 rule_map[k] = NfaBuilder.or_graphs(graphs) 42 nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
89 html_data = [] 43 (start_name, dfa_nodes) = nfa.compute_dfa()
90 for rule_name, graph in rule_map.items(): 44 dfas[k] = Dfa(start_name, dfa_nodes)
91 nfa = builder.nfa(graph) 45 return dfas
92 (start, dfa_nodes) = nfa.compute_dfa() 46
93 dfa = Dfa(start, dfa_nodes) 47 # Lexes strings with the help of DFAs procuded by the grammar. For sanity
94 html_data.append((rule_name, nfa, dfa)) 48 # checking the automata.
95 return html_data 49 class Lexer(object):
50
51 def __init__(self, rules):
52 parser_state = RuleParserState()
53 RuleParser.parse(rules, parser_state)
54 self.dfas = process_rules(parser_state)
55
56 def lex(self, string):
57 dfa = self.dfas['default'] # FIXME
58
59 action_stream = []
60 terminate_seen = False
61 offset = 0
62 while not terminate_seen and string:
63 result = list(dfa.lex(string))
64 last_position = 0
65 for (action, position) in result:
66 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))
67 last_position = position
68 if action[2] == 'terminate':
69 terminate_seen = True
70 string = string[(last_position + 1):]
71 offset += last_position
72 return action_stream
96 73
97 if __name__ == '__main__': 74 if __name__ == '__main__':
98 75
99 parser = argparse.ArgumentParser() 76 parser = argparse.ArgumentParser()
100 parser.add_argument('--html') 77 parser.add_argument('--rules')
78 parser.add_argument('--input')
101 args = parser.parse_args() 79 args = parser.parse_args()
102 80
103 re_file = 'src/lexer/lexer_py.re' 81 re_file = args.rules
82 input_file = args.input
104 83
105 parser_state = RuleParserState()
106 with open(re_file, 'r') as f: 84 with open(re_file, 'r') as f:
107 RuleParser.parse(f.read(), parser_state) 85 rules = f.read()
108 html_data = process_rules(parser_state) 86 with open(input_file, 'r') as f:
87 input_text = f.read() + '\0'
109 88
110 html_file = args.html 89 lexer = Lexer(rules)
111 if html_file: 90 for t in lexer.lex(input_text):
112 html = generate_html(html_data) 91 print t
113 with open(args.html, 'w') as f:
114 f.write(html)
115 print "wrote html to %s" % html_file
OLDNEW
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698