| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import argparse | 28 import argparse |
| 29 from nfa import Nfa, NfaBuilder | 29 from nfa import Nfa, NfaBuilder |
| 30 from dfa import Dfa | 30 from dfa import Dfa |
| 31 from rule_parser import RuleParser, RuleParserState | 31 from rule_parser import RuleParser, RuleParserState |
| 32 | 32 |
| 33 file_template = ''' | 33 # FIXME: We need to move this to a common place! |
| 34 <html> | |
| 35 <head> | |
| 36 <script src="viz.js"></script> | |
| 37 <script> | |
| 38 function draw(name, id) { | |
| 39 code = document.getElementById(id).innerHTML | |
| 40 document.body.innerHTML += "<h1>" + name + "</h1>"; | |
| 41 try { | |
| 42 document.body.innerHTML += Viz(code, 'svg'); | |
| 43 } catch(e) { | |
| 44 document.body.innerHTML += "<h3>error</h3>"; | |
| 45 } | |
| 46 } | |
| 47 </script> | |
| 48 </head> | |
| 49 <body> | |
| 50 %s | |
| 51 </body> | |
| 52 </html>''' | |
| 53 | |
| 54 script_template = ''' <script type="text/vnd.graphviz" id="%s"> | |
| 55 %s | |
| 56 </script> | |
| 57 ''' | |
| 58 | |
| 59 load_template = ''' draw('%s', '%s');''' | |
| 60 | |
| 61 load_outer_template = ''' <script> | |
| 62 %s | |
| 63 </script>''' | |
| 64 | |
| 65 def generate_html(data): | |
| 66 scripts = [] | |
| 67 loads = [] | |
| 68 for i, (name, nfa, dfa) in enumerate(data): | |
| 69 if name == 'Normal': continue | |
| 70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i) | |
| 71 scripts.append(script_template % (nfa_i, nfa.to_dot())) | |
| 72 scripts.append(script_template % (dfa_i, dfa.to_dot())) | |
| 73 loads.append(load_template % ("nfa [%s]" % name, nfa_i)) | |
| 74 loads.append(load_template % ("dfa [%s]" % name, dfa_i)) | |
| 75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads)) | |
| 76 return file_template % body | |
| 77 | |
| 78 def process_rules(parser_state): | 34 def process_rules(parser_state): |
| 79 rule_map = {} | 35 dfas = {} |
| 80 builder = NfaBuilder() | 36 builder = NfaBuilder() |
| 81 builder.set_character_classes(parser_state.character_classes) | 37 builder.set_character_classes(parser_state.character_classes) |
| 82 assert 'default' in parser_state.rules | |
| 83 for k, v in parser_state.rules.items(): | 38 for k, v in parser_state.rules.items(): |
| 84 assert 'default' in v | |
| 85 graphs = [] | 39 graphs = [] |
| 86 for (graph, action) in v['regex']: | 40 for (graph, action) in v['regex']: |
| 87 graphs.append(NfaBuilder.add_action(graph, action)) | 41 graphs.append(NfaBuilder.add_action(graph, action)) |
| 88 rule_map[k] = NfaBuilder.or_graphs(graphs) | 42 nfa = builder.nfa(NfaBuilder.or_graphs(graphs)) |
| 89 html_data = [] | 43 (start_name, dfa_nodes) = nfa.compute_dfa() |
| 90 for rule_name, graph in rule_map.items(): | 44 dfas[k] = Dfa(start_name, dfa_nodes) |
| 91 nfa = builder.nfa(graph) | 45 return dfas |
| 92 (start, dfa_nodes) = nfa.compute_dfa() | 46 |
| 93 dfa = Dfa(start, dfa_nodes) | 47 # Lexes strings with the help of DFAs procuded by the grammar. For sanity |
| 94 html_data.append((rule_name, nfa, dfa)) | 48 # checking the automata. |
| 95 return html_data | 49 class Lexer(object): |
| 50 |
| 51 def __init__(self, rules): |
| 52 parser_state = RuleParserState() |
| 53 RuleParser.parse(rules, parser_state) |
| 54 self.dfas = process_rules(parser_state) |
| 55 |
| 56 def lex(self, string): |
| 57 dfa = self.dfas['default'] # FIXME |
| 58 |
| 59 action_stream = [] |
| 60 terminate_seen = False |
| 61 offset = 0 |
| 62 while not terminate_seen and string: |
| 63 result = list(dfa.lex(string)) |
| 64 last_position = 0 |
| 65 for (action, position) in result: |
| 66 action_stream.append((action[1], action[2], last_position + offset, posi
tion + 1 + offset, string[last_position:(position + 1)])) |
| 67 last_position = position |
| 68 if action[2] == 'terminate': |
| 69 terminate_seen = True |
| 70 string = string[(last_position + 1):] |
| 71 offset += last_position |
| 72 return action_stream |
| 96 | 73 |
| 97 if __name__ == '__main__': | 74 if __name__ == '__main__': |
| 98 | 75 |
| 99 parser = argparse.ArgumentParser() | 76 parser = argparse.ArgumentParser() |
| 100 parser.add_argument('--html') | 77 parser.add_argument('--rules') |
| 78 parser.add_argument('--input') |
| 101 args = parser.parse_args() | 79 args = parser.parse_args() |
| 102 | 80 |
| 103 re_file = 'src/lexer/lexer_py.re' | 81 re_file = args.rules |
| 82 input_file = args.input |
| 104 | 83 |
| 105 parser_state = RuleParserState() | |
| 106 with open(re_file, 'r') as f: | 84 with open(re_file, 'r') as f: |
| 107 RuleParser.parse(f.read(), parser_state) | 85 rules = f.read() |
| 108 html_data = process_rules(parser_state) | 86 with open(input_file, 'r') as f: |
| 87 input_text = f.read() + '\0' |
| 109 | 88 |
| 110 html_file = args.html | 89 lexer = Lexer(rules) |
| 111 if html_file: | 90 for t in lexer.lex(input_text): |
| 112 html = generate_html(html_data) | 91 print t |
| 113 with open(args.html, 'w') as f: | |
| 114 f.write(html) | |
| 115 print "wrote html to %s" % html_file | |
| OLD | NEW |