Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1515)

Unified Diff: tools/lexer_generator/lexer.py

Issue 60663007: Experimental lexer generator: First draft of a Python lexer (based on the automata). (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: . Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/lexer.py
diff --git a/tools/lexer_generator/generator.py b/tools/lexer_generator/lexer.py
similarity index 52%
copy from tools/lexer_generator/generator.py
copy to tools/lexer_generator/lexer.py
index 035b5a746294691dae525be36db6f8fa753c0477..1d60847f002567e9b10eec34aa4846f0e7cf681b 100644
--- a/tools/lexer_generator/generator.py
+++ b/tools/lexer_generator/lexer.py
@@ -30,86 +30,62 @@ from nfa import Nfa, NfaBuilder
from dfa import Dfa
from rule_parser import RuleParser, RuleParserState
-file_template = '''
-<html>
- <head>
- <script src="viz.js"></script>
- <script>
- function draw(name, id) {
- code = document.getElementById(id).innerHTML
- document.body.innerHTML += "<h1>" + name + "</h1>";
- try {
- document.body.innerHTML += Viz(code, 'svg');
- } catch(e) {
- document.body.innerHTML += "<h3>error</h3>";
- }
- }
- </script>
- </head>
- <body>
-%s
- </body>
-</html>'''
-
-script_template = ''' <script type="text/vnd.graphviz" id="%s">
-%s
- </script>
-'''
-
-load_template = ''' draw('%s', '%s');'''
-
-load_outer_template = ''' <script>
-%s
- </script>'''
-
-def generate_html(data):
- scripts = []
- loads = []
- for i, (name, nfa, dfa) in enumerate(data):
- if name == 'Normal': continue
- (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
- scripts.append(script_template % (nfa_i, nfa.to_dot()))
- scripts.append(script_template % (dfa_i, dfa.to_dot()))
- loads.append(load_template % ("nfa [%s]" % name, nfa_i))
- loads.append(load_template % ("dfa [%s]" % name, dfa_i))
- body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
- return file_template % body
-
+# FIXME: We need to move this to a common place!
def process_rules(parser_state):
- rule_map = {}
+ dfas = {}
builder = NfaBuilder()
builder.set_character_classes(parser_state.character_classes)
- assert 'default' in parser_state.rules
for k, v in parser_state.rules.items():
- assert 'default' in v
graphs = []
for (graph, action) in v['regex']:
graphs.append(NfaBuilder.add_action(graph, action))
- rule_map[k] = NfaBuilder.or_graphs(graphs)
- html_data = []
- for rule_name, graph in rule_map.items():
- nfa = builder.nfa(graph)
- (start, dfa_nodes) = nfa.compute_dfa()
- dfa = Dfa(start, dfa_nodes)
- html_data.append((rule_name, nfa, dfa))
- return html_data
+ nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
+ (start_name, dfa_nodes) = nfa.compute_dfa()
+ dfas[k] = Dfa(start_name, dfa_nodes)
+ return dfas
+
+# Lexes strings with the help of DFAs procuded by the grammar. For sanity
+# checking the automata.
+class Lexer(object):
+
+ def __init__(self, rules):
+ parser_state = RuleParserState()
+ RuleParser.parse(rules, parser_state)
+ self.dfas = process_rules(parser_state)
+
+ def lex(self, string):
+ dfa = self.dfas['default'] # FIXME
+
+ action_stream = []
+ terminate_seen = False
+ offset = 0
+ while not terminate_seen and string:
+ result = list(dfa.lex(string))
+ last_position = 0
+ for (action, position) in result:
+ action_stream.append((action[1], action[2], last_position + offset, position + 1 + offset, string[last_position:(position + 1)]))
+ last_position = position
+ if action[2] == 'terminate':
+ terminate_seen = True
+ string = string[(last_position + 1):]
+ offset += last_position
+ return action_stream
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--html')
+ parser.add_argument('--rules')
+ parser.add_argument('--input')
args = parser.parse_args()
- re_file = 'src/lexer/lexer_py.re'
+ re_file = args.rules
+ input_file = args.input
- parser_state = RuleParserState()
with open(re_file, 'r') as f:
- RuleParser.parse(f.read(), parser_state)
- html_data = process_rules(parser_state)
+ rules = f.read()
+ with open(input_file, 'r') as f:
+ input_text = f.read() + '\0'
- html_file = args.html
- if html_file:
- html = generate_html(html_data)
- with open(args.html, 'w') as f:
- f.write(html)
- print "wrote html to %s" % html_file
+ lexer = Lexer(rules)
+ for t in lexer.lex(input_text):
+ print t
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698