| Index: tools/lexer_generator/lexer.py
|
| diff --git a/tools/lexer_generator/generator.py b/tools/lexer_generator/lexer.py
|
| similarity index 52%
|
| copy from tools/lexer_generator/generator.py
|
| copy to tools/lexer_generator/lexer.py
|
| index 035b5a746294691dae525be36db6f8fa753c0477..1d60847f002567e9b10eec34aa4846f0e7cf681b 100644
|
| --- a/tools/lexer_generator/generator.py
|
| +++ b/tools/lexer_generator/lexer.py
|
| @@ -30,86 +30,62 @@ from nfa import Nfa, NfaBuilder
|
| from dfa import Dfa
|
| from rule_parser import RuleParser, RuleParserState
|
|
|
| -file_template = '''
|
| -<html>
|
| - <head>
|
| - <script src="viz.js"></script>
|
| - <script>
|
| - function draw(name, id) {
|
| - code = document.getElementById(id).innerHTML
|
| - document.body.innerHTML += "<h1>" + name + "</h1>";
|
| - try {
|
| - document.body.innerHTML += Viz(code, 'svg');
|
| - } catch(e) {
|
| - document.body.innerHTML += "<h3>error</h3>";
|
| - }
|
| - }
|
| - </script>
|
| - </head>
|
| - <body>
|
| -%s
|
| - </body>
|
| -</html>'''
|
| -
|
| -script_template = ''' <script type="text/vnd.graphviz" id="%s">
|
| -%s
|
| - </script>
|
| -'''
|
| -
|
| -load_template = ''' draw('%s', '%s');'''
|
| -
|
| -load_outer_template = ''' <script>
|
| -%s
|
| - </script>'''
|
| -
|
| -def generate_html(data):
|
| - scripts = []
|
| - loads = []
|
| - for i, (name, nfa, dfa) in enumerate(data):
|
| - if name == 'Normal': continue
|
| - (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
|
| - scripts.append(script_template % (nfa_i, nfa.to_dot()))
|
| - scripts.append(script_template % (dfa_i, dfa.to_dot()))
|
| - loads.append(load_template % ("nfa [%s]" % name, nfa_i))
|
| - loads.append(load_template % ("dfa [%s]" % name, dfa_i))
|
| - body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
|
| - return file_template % body
|
| -
|
| +# FIXME: We need to move this to a common place!
|
| def process_rules(parser_state):
|
| - rule_map = {}
|
| + dfas = {}
|
| builder = NfaBuilder()
|
| builder.set_character_classes(parser_state.character_classes)
|
| - assert 'default' in parser_state.rules
|
| for k, v in parser_state.rules.items():
|
| - assert 'default' in v
|
| graphs = []
|
| for (graph, action) in v['regex']:
|
| graphs.append(NfaBuilder.add_action(graph, action))
|
| - rule_map[k] = NfaBuilder.or_graphs(graphs)
|
| - html_data = []
|
| - for rule_name, graph in rule_map.items():
|
| - nfa = builder.nfa(graph)
|
| - (start, dfa_nodes) = nfa.compute_dfa()
|
| - dfa = Dfa(start, dfa_nodes)
|
| - html_data.append((rule_name, nfa, dfa))
|
| - return html_data
|
| + nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
|
| + (start_name, dfa_nodes) = nfa.compute_dfa()
|
| + dfas[k] = Dfa(start_name, dfa_nodes)
|
| + return dfas
|
| +
|
| +# Lexes strings with the help of DFAs procuded by the grammar. For sanity
|
| +# checking the automata.
|
| +class Lexer(object):
|
| +
|
| + def __init__(self, rules):
|
| + parser_state = RuleParserState()
|
| + RuleParser.parse(rules, parser_state)
|
| + self.dfas = process_rules(parser_state)
|
| +
|
| + def lex(self, string):
|
| + dfa = self.dfas['default'] # FIXME
|
| +
|
| + action_stream = []
|
| + terminate_seen = False
|
| + offset = 0
|
| + while not terminate_seen and string:
|
| + result = list(dfa.lex(string))
|
| + last_position = 0
|
| + for (action, position) in result:
|
| + action_stream.append((action[1], action[2], last_position + offset, position + 1 + offset, string[last_position:(position + 1)]))
|
| + last_position = position
|
| + if action[2] == 'terminate':
|
| + terminate_seen = True
|
| + string = string[(last_position + 1):]
|
| + offset += last_position
|
| + return action_stream
|
|
|
| if __name__ == '__main__':
|
|
|
| parser = argparse.ArgumentParser()
|
| - parser.add_argument('--html')
|
| + parser.add_argument('--rules')
|
| + parser.add_argument('--input')
|
| args = parser.parse_args()
|
|
|
| - re_file = 'src/lexer/lexer_py.re'
|
| + re_file = args.rules
|
| + input_file = args.input
|
|
|
| - parser_state = RuleParserState()
|
| with open(re_file, 'r') as f:
|
| - RuleParser.parse(f.read(), parser_state)
|
| - html_data = process_rules(parser_state)
|
| + rules = f.read()
|
| + with open(input_file, 'r') as f:
|
| + input_text = f.read() + '\0'
|
|
|
| - html_file = args.html
|
| - if html_file:
|
| - html = generate_html(html_data)
|
| - with open(args.html, 'w') as f:
|
| - f.write(html)
|
| - print "wrote html to %s" % html_file
|
| + lexer = Lexer(rules)
|
| + for t in lexer.lex(input_text):
|
| + print t
|
|
|