| OLD | NEW |
| (Empty) |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | |
| 2 # Redistribution and use in source and binary forms, with or without | |
| 3 # modification, are permitted provided that the following conditions are | |
| 4 # met: | |
| 5 # | |
| 6 # * Redistributions of source code must retain the above copyright | |
| 7 # notice, this list of conditions and the following disclaimer. | |
| 8 # * Redistributions in binary form must reproduce the above | |
| 9 # copyright notice, this list of conditions and the following | |
| 10 # disclaimer in the documentation and/or other materials provided | |
| 11 # with the distribution. | |
| 12 # * Neither the name of Google Inc. nor the names of its | |
| 13 # contributors may be used to endorse or promote products derived | |
| 14 # from this software without specific prior written permission. | |
| 15 # | |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 import argparse | |
| 29 from nfa import Nfa, NfaBuilder | |
| 30 from dfa import Dfa | |
| 31 from rule_parser import RuleParser, RuleParserState | |
| 32 | |
| 33 # FIXME: We need to move this to a common place! | |
| 34 def process_rules(parser_state): | |
| 35 dfas = {} | |
| 36 builder = NfaBuilder() | |
| 37 builder.set_character_classes(parser_state.character_classes) | |
| 38 for k, v in parser_state.rules.items(): | |
| 39 graphs = [] | |
| 40 for (graph, action) in v['regex']: | |
| 41 graphs.append(NfaBuilder.add_action(graph, action)) | |
| 42 nfa = builder.nfa(NfaBuilder.or_graphs(graphs)) | |
| 43 (start_name, dfa_nodes) = nfa.compute_dfa() | |
| 44 dfas[k] = Dfa(start_name, dfa_nodes) | |
| 45 return dfas | |
| 46 | |
| 47 # Lexes strings with the help of DFAs procuded by the grammar. For sanity | |
| 48 # checking the automata. | |
| 49 class Lexer(object): | |
| 50 | |
| 51 def __init__(self, rules): | |
| 52 parser_state = RuleParserState() | |
| 53 RuleParser.parse(rules, parser_state) | |
| 54 self.dfas = process_rules(parser_state) | |
| 55 | |
| 56 def lex(self, string): | |
| 57 dfa = self.dfas['default'] # FIXME | |
| 58 | |
| 59 action_stream = [] | |
| 60 terminate_seen = False | |
| 61 offset = 0 | |
| 62 while not terminate_seen and string: | |
| 63 result = list(dfa.lex(string)) | |
| 64 last_position = 0 | |
| 65 for (action, position) in result: | |
| 66 action_stream.append((action[1], action[2], last_position + offset, posi
tion + 1 + offset, string[last_position:(position + 1)])) | |
| 67 last_position = position | |
| 68 if action[2] == 'terminate': | |
| 69 terminate_seen = True | |
| 70 string = string[(last_position + 1):] | |
| 71 offset += last_position | |
| 72 return action_stream | |
| 73 | |
| 74 if __name__ == '__main__': | |
| 75 | |
| 76 parser = argparse.ArgumentParser() | |
| 77 parser.add_argument('--rules') | |
| 78 parser.add_argument('--input') | |
| 79 args = parser.parse_args() | |
| 80 | |
| 81 re_file = args.rules | |
| 82 input_file = args.input | |
| 83 | |
| 84 with open(re_file, 'r') as f: | |
| 85 rules = f.read() | |
| 86 with open(input_file, 'r') as f: | |
| 87 input_text = f.read() + '\0' | |
| 88 | |
| 89 lexer = Lexer(rules) | |
| 90 for t in lexer.lex(input_text): | |
| 91 print t | |
| OLD | NEW |