| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import argparse | 28 import argparse |
| 29 from nfa import Nfa | 29 from nfa import Nfa |
| 30 from nfa_builder import NfaBuilder | 30 from nfa_builder import NfaBuilder |
| 31 from dfa import Dfa | 31 from dfa import Dfa |
| 32 from rule_parser import RuleParser, RuleParserState | 32 from rule_parser import RuleParser, RuleParserState, RuleProcessor |
| 33 from code_generator import CodeGenerator | 33 from code_generator import CodeGenerator |
| 34 | 34 |
| 35 file_template = ''' | 35 file_template = ''' |
| 36 <html> | 36 <html> |
| 37 <head> | 37 <head> |
| 38 <script src="viz.js"></script> | 38 <script src="viz.js"></script> |
| 39 <script> | 39 <script> |
| 40 function draw(name, id) { | 40 function draw(name, id) { |
| 41 code = document.getElementById(id).innerHTML | 41 code = document.getElementById(id).innerHTML |
| 42 document.body.innerHTML += "<h1>" + name + "</h1>"; | 42 document.body.innerHTML += "<h1>" + name + "</h1>"; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 57 %s | 57 %s |
| 58 </script> | 58 </script> |
| 59 ''' | 59 ''' |
| 60 | 60 |
| 61 load_template = ''' draw('%s', '%s');''' | 61 load_template = ''' draw('%s', '%s');''' |
| 62 | 62 |
| 63 load_outer_template = ''' <script> | 63 load_outer_template = ''' <script> |
| 64 %s | 64 %s |
| 65 </script>''' | 65 </script>''' |
| 66 | 66 |
| 67 class Generator(object): | 67 def generate_html(rule_processor): |
| 68 scripts = [] |
| 69 loads = [] |
| 70 for i, (name, (nfa, dfa)) in enumerate(list(rule_processor.automata_iter())): |
| 71 if name == 'Normal': continue |
| 72 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i) |
| 73 scripts.append(script_template % (nfa_i, nfa.to_dot())) |
| 74 scripts.append(script_template % (dfa_i, dfa.to_dot())) |
| 75 loads.append(load_template % ("nfa [%s]" % name, nfa_i)) |
| 76 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) |
| 77 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) |
| 78 return file_template % body |
| 68 | 79 |
| 69 def __init__(self, rules): | 80 def generate_code(rule_processor): |
| 70 parser_state = RuleParserState() | 81 (nfa, dfa) = rule_processor.default_automata() |
| 71 RuleParser.parse(rules, parser_state) | 82 return CodeGenerator.dfa_to_code(dfa) |
| 72 self.__automata = {} | |
| 73 self.process_rules(parser_state) | |
| 74 | 83 |
| 75 def generate_html(self): | 84 def lex(rule_processor, string): |
| 76 scripts = [] | 85 for t in rule_processor.lex(string + '\0'): |
| 77 loads = [] | 86 print t |
| 78 for i, name in enumerate(self.__automata): | |
| 79 (nfa, dfa) = self.__automata[name] | |
| 80 if name == 'Normal': continue | |
| 81 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i) | |
| 82 scripts.append(script_template % (nfa_i, nfa.to_dot())) | |
| 83 scripts.append(script_template % (dfa_i, dfa.to_dot())) | |
| 84 loads.append(load_template % ("nfa [%s]" % name, nfa_i)) | |
| 85 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) | |
| 86 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) | |
| 87 return file_template % body | |
| 88 | |
| 89 def process_rules(self, parser_state): | |
| 90 rule_map = {} | |
| 91 builder = NfaBuilder() | |
| 92 builder.set_character_classes(parser_state.character_classes) | |
| 93 assert 'default' in parser_state.rules | |
| 94 def process(k, v): | |
| 95 graphs = [] | |
| 96 continues = 0 | |
| 97 for (graph, (precedence, code, transition)) in v['regex']: | |
| 98 default_code = v['default_action'] | |
| 99 action = code if code else default_code | |
| 100 if action: | |
| 101 graph = NfaBuilder.add_action(graph, (precedence, action)) | |
| 102 if not transition or transition == 'break': | |
| 103 pass | |
| 104 elif transition == 'continue': | |
| 105 assert not k == 'default' | |
| 106 continues += 1 | |
| 107 graph = NfaBuilder.add_continue(graph) | |
| 108 elif (transition == 'terminate' or | |
| 109 transition == 'terminate_illegal'): | |
| 110 assert not code | |
| 111 graph = NfaBuilder.add_action(graph, (-1, transition)) | |
| 112 else: | |
| 113 assert k == 'default' | |
| 114 subgraph_modifier = '*' if code else None | |
| 115 graph = NfaBuilder.join_subgraph( | |
| 116 graph, transition, rule_map[transition], subgraph_modifier) | |
| 117 graphs.append(graph) | |
| 118 if continues == len(graphs): | |
| 119 graphs.append(NfaBuilder.epsilon()) | |
| 120 if v['catch_all']: | |
| 121 assert v['catch_all'] == 'continue' | |
| 122 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all())) | |
| 123 graph = NfaBuilder.or_graphs(graphs) | |
| 124 rule_map[k] = graph | |
| 125 # process first the subgraphs, then the default graph | |
| 126 for k, v in parser_state.rules.items(): | |
| 127 if k == 'default': continue | |
| 128 process(k, v) | |
| 129 process('default', parser_state.rules['default']) | |
| 130 # build the automata | |
| 131 for rule_name, graph in rule_map.items(): | |
| 132 nfa = builder.nfa(graph) | |
| 133 (start, dfa_nodes) = nfa.compute_dfa() | |
| 134 dfa = Dfa(start, dfa_nodes) | |
| 135 self.__automata[rule_name] = (nfa, dfa) | |
| 136 | |
| 137 # Lexes strings with the help of DFAs procuded by the grammar. For sanity | |
| 138 # checking the automata. | |
| 139 def lex(self, string): | |
| 140 (nfa, dfa) = self.__automata['default'] | |
| 141 return dfa.lex(string) | |
| 142 | |
| 143 def generate_code(self): | |
| 144 (nfa, dfa) = self.__automata['default'] | |
| 145 return CodeGenerator.dfa_to_code(dfa) | |
| 146 | 87 |
| 147 if __name__ == '__main__': | 88 if __name__ == '__main__': |
| 148 | 89 |
| 149 parser = argparse.ArgumentParser() | 90 parser = argparse.ArgumentParser() |
| 150 parser.add_argument('--html') | 91 parser.add_argument('--html') |
| 151 parser.add_argument('--re', default='src/lexer/lexer_py.re') | 92 parser.add_argument('--re', default='src/lexer/lexer_py.re') |
| 152 parser.add_argument('--input') | 93 parser.add_argument('--input') |
| 153 parser.add_argument('--code') | 94 parser.add_argument('--code') |
| 154 args = parser.parse_args() | 95 args = parser.parse_args() |
| 155 | 96 |
| 156 re_file = args.re | 97 re_file = args.re |
| 157 parser_state = RuleParserState() | |
| 158 print "parsing %s" % re_file | 98 print "parsing %s" % re_file |
| 159 with open(re_file, 'r') as f: | 99 with open(re_file, 'r') as f: |
| 160 generator = Generator(f.read()) | 100 rule_processor = RuleProcessor.parse(f.read()) |
| 161 | 101 |
| 162 html_file = args.html | 102 html_file = args.html |
| 163 if html_file: | 103 if html_file: |
| 164 html = generator.generate_html() | 104 html = generate_html(rule_processor) |
| 165 with open(args.html, 'w') as f: | 105 with open(args.html, 'w') as f: |
| 166 f.write(html) | 106 f.write(html) |
| 167 print "wrote html to %s" % html_file | 107 print "wrote html to %s" % html_file |
| 168 | 108 |
| 169 code_file = args.code | 109 code_file = args.code |
| 170 if code_file: | 110 if code_file: |
| 171 code = generator.generate_code() | 111 code = generate_code(rule_processor) |
| 172 with open(code_file, 'w') as f: | 112 with open(code_file, 'w') as f: |
| 173 f.write(code) | 113 f.write(code) |
| 174 print "wrote code to %s" % code_file | 114 print "wrote code to %s" % code_file |
| 175 | 115 |
| 176 input_file = args.input | 116 input_file = args.input |
| 177 if input_file: | 117 if input_file: |
| 178 with open(input_file, 'r') as f: | 118 with open(input_file, 'r') as f: |
| 179 input_text = f.read() + '\0' | 119 lex(rule_processor, f.read()) |
| 180 for t in generator.lex(input_text): | |
| 181 print t | |
| OLD | NEW |