| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 83 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) | 83 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) |
| 84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) | 84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) |
| 85 return file_template % body | 85 return file_template % body |
| 86 | 86 |
| 87 def process_rules(self, parser_state): | 87 def process_rules(self, parser_state): |
| 88 rule_map = {} | 88 rule_map = {} |
| 89 builder = NfaBuilder() | 89 builder = NfaBuilder() |
| 90 builder.set_character_classes(parser_state.character_classes) | 90 builder.set_character_classes(parser_state.character_classes) |
| 91 assert 'default' in parser_state.rules | 91 assert 'default' in parser_state.rules |
| 92 def process(k, v): | 92 def process(k, v): |
| 93 assert 'default' in v | |
| 94 graphs = [] | 93 graphs = [] |
| 95 for (graph, action) in v['regex']: | 94 for (graph, (precedence, code, transition)) in v['regex']: |
| 96 (precedence, code, transition) = action | 95 default_code = v['default_action'] |
| 97 if code: | 96 action = code if code else default_code |
| 98 graph = NfaBuilder.add_action(graph, (precedence, code, None)) | 97 if action: |
| 98 graph = NfaBuilder.add_action(graph, (precedence, action)) |
| 99 if transition == 'continue': | 99 if transition == 'continue': |
| 100 if not v['default'][1][2] == 'continue': | 100 assert not k == 'default' |
| 101 graph = NfaBuilder.add_continue(graph) | 101 graph = NfaBuilder.add_continue(graph) |
| 102 else: | 102 elif transition == 'break': |
| 103 pass # TODO null key | 103 pass |
| 104 elif (transition == 'break' or | 104 elif (transition == 'terminate' or |
| 105 transition == 'terminate' or | |
| 106 transition == 'terminate_illegal'): | 105 transition == 'terminate_illegal'): |
| 107 graph = NfaBuilder.add_action(graph, (10000, transition, None)) | 106 assert not code |
| 107 graph = NfaBuilder.add_action(graph, (-1, transition)) |
| 108 else: | 108 else: |
| 109 assert k == 'default' | 109 assert k == 'default' |
| 110 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transitio
n]) | 110 subgraph_modifier = '*' if code else None |
| 111 graph = NfaBuilder.join_subgraph( |
| 112 graph, transition, rule_map[transition], subgraph_modifier) |
| 111 graphs.append(graph) | 113 graphs.append(graph) |
| 112 graph = NfaBuilder.or_graphs(graphs) | 114 graph = NfaBuilder.or_graphs(graphs) |
| 113 # merge default action | |
| 114 (precedence, code, transition) = v['default'][1] | |
| 115 assert transition == 'continue' or transition == 'break' | |
| 116 if transition == 'continue': | |
| 117 assert k != 'default' | |
| 118 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None)) | |
| 119 if code: | |
| 120 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None)) | |
| 121 rule_map[k] = graph | 115 rule_map[k] = graph |
| 116 # process first the subgraphs, then the default graph |
| 122 for k, v in parser_state.rules.items(): | 117 for k, v in parser_state.rules.items(): |
| 123 if k == 'default': continue | 118 if k == 'default': continue |
| 124 process(k, v) | 119 process(k, v) |
| 125 process('default', parser_state.rules['default']) | 120 process('default', parser_state.rules['default']) |
| 121 # build the automata |
| 126 for rule_name, graph in rule_map.items(): | 122 for rule_name, graph in rule_map.items(): |
| 127 nfa = builder.nfa(graph) | 123 nfa = builder.nfa(graph) |
| 128 (start, dfa_nodes) = nfa.compute_dfa() | 124 (start, dfa_nodes) = nfa.compute_dfa() |
| 129 dfa = Dfa(start, dfa_nodes) | 125 dfa = Dfa(start, dfa_nodes) |
| 130 self.__automata[rule_name] = (nfa, dfa) | 126 self.__automata[rule_name] = (nfa, dfa) |
| 131 | 127 |
| 132 # Lexes strings with the help of DFAs procuded by the grammar. For sanity | 128 # Lexes strings with the help of DFAs procuded by the grammar. For sanity |
| 133 # checking the automata. | 129 # checking the automata. |
| 134 def lex(self, string): | 130 def lex(self, string): |
| 135 (nfa, dfa) = self.__automata['default'] # FIXME | 131 (nfa, dfa) = self.__automata['default'] |
| 136 | 132 return dfa.lex(string) |
| 137 action_stream = [] | |
| 138 terminate_seen = False | |
| 139 offset = 0 | |
| 140 while not terminate_seen and string: | |
| 141 result = list(dfa.lex(string)) | |
| 142 last_position = 0 | |
| 143 for (action, position) in result: | |
| 144 action_stream.append((action[1], action[2], last_position + offset, posi
tion + 1 + offset, string[last_position:(position + 1)])) | |
| 145 last_position = position | |
| 146 if action[2] == 'terminate': | |
| 147 terminate_seen = True | |
| 148 string = string[(last_position + 1):] | |
| 149 offset += last_position | |
| 150 return action_stream | |
| 151 | 133 |
| 152 if __name__ == '__main__': | 134 if __name__ == '__main__': |
| 153 | 135 |
| 154 parser = argparse.ArgumentParser() | 136 parser = argparse.ArgumentParser() |
| 155 parser.add_argument('--html') | 137 parser.add_argument('--html') |
| 156 parser.add_argument('--re', default='src/lexer/lexer_py.re') | 138 parser.add_argument('--re', default='src/lexer/lexer_py.re') |
| 157 parser.add_argument('--input') | 139 parser.add_argument('--input') |
| 158 args = parser.parse_args() | 140 args = parser.parse_args() |
| 159 | 141 |
| 160 re_file = args.re | 142 re_file = args.re |
| 161 parser_state = RuleParserState() | 143 parser_state = RuleParserState() |
| 162 print "parsing %s" % re_file | 144 print "parsing %s" % re_file |
| 163 with open(re_file, 'r') as f: | 145 with open(re_file, 'r') as f: |
| 164 generator = Generator(f.read()) | 146 generator = Generator(f.read()) |
| 165 | 147 |
| 166 html_file = args.html | 148 html_file = args.html |
| 167 if html_file: | 149 if html_file: |
| 168 html = generator.generate_html() | 150 html = generator.generate_html() |
| 169 with open(args.html, 'w') as f: | 151 with open(args.html, 'w') as f: |
| 170 f.write(html) | 152 f.write(html) |
| 171 print "wrote html to %s" % html_file | 153 print "wrote html to %s" % html_file |
| 172 | 154 |
| 173 input_file = args.input | 155 input_file = args.input |
| 174 if input_file: | 156 if input_file: |
| 175 with open(input_file, 'r') as f: | 157 with open(input_file, 'r') as f: |
| 176 input_text = f.read() + '\0' | 158 input_text = f.read() + '\0' |
| 177 for t in generator.lex(input_text): | 159 for t in generator.lex(input_text): |
| 178 print t | 160 print t |
| OLD | NEW |