| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 %s | 55 %s |
| 56 </script> | 56 </script> |
| 57 ''' | 57 ''' |
| 58 | 58 |
| 59 load_template = ''' draw('%s', '%s');''' | 59 load_template = ''' draw('%s', '%s');''' |
| 60 | 60 |
| 61 load_outer_template = ''' <script> | 61 load_outer_template = ''' <script> |
| 62 %s | 62 %s |
| 63 </script>''' | 63 </script>''' |
| 64 | 64 |
| 65 def generate_html(data): | 65 class Generator(object): |
| 66 scripts = [] | |
| 67 loads = [] | |
| 68 for i, (name, nfa, dfa) in enumerate(data): | |
| 69 if name == 'Normal': continue | |
| 70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i) | |
| 71 scripts.append(script_template % (nfa_i, nfa.to_dot())) | |
| 72 scripts.append(script_template % (dfa_i, dfa.to_dot())) | |
| 73 loads.append(load_template % ("nfa [%s]" % name, nfa_i)) | |
| 74 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) | |
| 75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) | |
| 76 return file_template % body | |
| 77 | 66 |
| 78 def process_rules(parser_state): | 67 def __init__(self, rules): |
| 79 rule_map = {} | 68 parser_state = RuleParserState() |
| 80 builder = NfaBuilder() | 69 RuleParser.parse(rules, parser_state) |
| 81 builder.set_character_classes(parser_state.character_classes) | 70 self.__automata = {} |
| 82 assert 'default' in parser_state.rules | 71 self.process_rules(parser_state) |
| 83 def process(k, v): | 72 |
| 84 assert 'default' in v | 73 def generate_html(self): |
| 85 graphs = [] | 74 scripts = [] |
| 86 for (graph, action) in v['regex']: | 75 loads = [] |
| 87 (precedence, code, transition) = action | 76 for i, name in enumerate(self.__automata): |
| 77 (nfa, dfa) = self.__automata[name] |
| 78 if name == 'Normal': continue |
| 79 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i) |
| 80 scripts.append(script_template % (nfa_i, nfa.to_dot())) |
| 81 scripts.append(script_template % (dfa_i, dfa.to_dot())) |
| 82 loads.append(load_template % ("nfa [%s]" % name, nfa_i)) |
| 83 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) |
| 84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) |
| 85 return file_template % body |
| 86 |
| 87 def process_rules(self, parser_state): |
| 88 rule_map = {} |
| 89 builder = NfaBuilder() |
| 90 builder.set_character_classes(parser_state.character_classes) |
| 91 assert 'default' in parser_state.rules |
| 92 def process(k, v): |
| 93 assert 'default' in v |
| 94 graphs = [] |
| 95 for (graph, action) in v['regex']: |
| 96 (precedence, code, transition) = action |
| 97 if code: |
| 98 graph = NfaBuilder.add_action(graph, (precedence, code, None)) |
| 99 if transition == 'continue': |
| 100 if not v['default'][1][2] == 'continue': |
| 101 graph = NfaBuilder.add_continue(graph) |
| 102 else: |
| 103 pass # TODO null key |
| 104 elif (transition == 'break' or |
| 105 transition == 'terminate' or |
| 106 transition == 'terminate_illegal'): |
| 107 graph = NfaBuilder.add_action(graph, (10000, transition, None)) |
| 108 else: |
| 109 assert k == 'default' |
| 110 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transitio
n]) |
| 111 graphs.append(graph) |
| 112 graph = NfaBuilder.or_graphs(graphs) |
| 113 # merge default action |
| 114 (precedence, code, transition) = v['default'][1] |
| 115 assert transition == 'continue' or transition == 'break' |
| 116 if transition == 'continue': |
| 117 assert k != 'default' |
| 118 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None)) |
| 88 if code: | 119 if code: |
| 89 graph = NfaBuilder.add_action(graph, (precedence, code, None)) | 120 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None)) |
| 90 if transition == 'continue': | 121 rule_map[k] = graph |
| 91 if not v['default'][1][2] == 'continue': | 122 for k, v in parser_state.rules.items(): |
| 92 graph = NfaBuilder.add_continue(graph) | 123 if k == 'default': continue |
| 93 else: | 124 process(k, v) |
| 94 pass # TODO null key | 125 process('default', parser_state.rules['default']) |
| 95 elif (transition == 'break' or | 126 for rule_name, graph in rule_map.items(): |
| 96 transition == 'terminate' or | 127 nfa = builder.nfa(graph) |
| 97 transition == 'terminate_illegal'): | 128 (start, dfa_nodes) = nfa.compute_dfa() |
| 98 graph = NfaBuilder.add_action(graph, (10000, transition, None)) | 129 dfa = Dfa(start, dfa_nodes) |
| 99 else: | 130 self.__automata[rule_name] = (nfa, dfa) |
| 100 assert k == 'default' | 131 |
| 101 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition]
) | 132 # Lexes strings with the help of DFAs procuded by the grammar. For sanity |
| 102 graphs.append(graph) | 133 # checking the automata. |
| 103 graph = NfaBuilder.or_graphs(graphs) | 134 def lex(self, string): |
| 104 # merge default action | 135 (nfa, dfa) = self.__automata['default'] # FIXME |
| 105 (precedence, code, transition) = v['default'][1] | 136 |
| 106 assert transition == 'continue' or transition == 'break' | 137 action_stream = [] |
| 107 if transition == 'continue': | 138 terminate_seen = False |
| 108 assert k != 'default' | 139 offset = 0 |
| 109 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None)) | 140 while not terminate_seen and string: |
| 110 if code: | 141 result = list(dfa.lex(string)) |
| 111 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None)) | 142 last_position = 0 |
| 112 rule_map[k] = graph | 143 for (action, position) in result: |
| 113 for k, v in parser_state.rules.items(): | 144 action_stream.append((action[1], action[2], last_position + offset, posi
tion + 1 + offset, string[last_position:(position + 1)])) |
| 114 if k == 'default': continue | 145 last_position = position |
| 115 process(k, v) | 146 if action[2] == 'terminate': |
| 116 process('default', parser_state.rules['default']) | 147 terminate_seen = True |
| 117 html_data = [] | 148 string = string[(last_position + 1):] |
| 118 for rule_name, graph in rule_map.items(): | 149 offset += last_position |
| 119 nfa = builder.nfa(graph) | 150 return action_stream |
| 120 (start, dfa_nodes) = nfa.compute_dfa() | |
| 121 dfa = Dfa(start, dfa_nodes) | |
| 122 html_data.append((rule_name, nfa, dfa)) | |
| 123 return html_data | |
| 124 | 151 |
| 125 if __name__ == '__main__': | 152 if __name__ == '__main__': |
| 126 | 153 |
| 127 parser = argparse.ArgumentParser() | 154 parser = argparse.ArgumentParser() |
| 128 parser.add_argument('--html') | 155 parser.add_argument('--html') |
| 129 parser.add_argument('--re', default='src/lexer/lexer_py.re') | 156 parser.add_argument('--re', default='src/lexer/lexer_py.re') |
| 157 parser.add_argument('--input') |
| 130 args = parser.parse_args() | 158 args = parser.parse_args() |
| 131 | 159 |
| 132 re_file = args.re | 160 re_file = args.re |
| 133 parser_state = RuleParserState() | 161 parser_state = RuleParserState() |
| 134 print "parsing %s" % re_file | 162 print "parsing %s" % re_file |
| 135 with open(re_file, 'r') as f: | 163 with open(re_file, 'r') as f: |
| 136 RuleParser.parse(f.read(), parser_state) | 164 generator = Generator(f.read()) |
| 137 html_data = process_rules(parser_state) | |
| 138 | 165 |
| 139 html_file = args.html | 166 html_file = args.html |
| 140 if html_file: | 167 if html_file: |
| 141 html = generate_html(html_data) | 168 html = generator.generate_html() |
| 142 with open(args.html, 'w') as f: | 169 with open(args.html, 'w') as f: |
| 143 f.write(html) | 170 f.write(html) |
| 144 print "wrote html to %s" % html_file | 171 print "wrote html to %s" % html_file |
| 172 |
| 173 input_file = args.input |
| 174 if input_file: |
| 175 with open(input_file, 'r') as f: |
| 176 input_text = f.read() + '\0' |
| 177 for t in generator.lex(input_text): |
| 178 print t |
| OLD | NEW |