Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: tools/lexer_generator/lexer.py

Issue 66613002: Experimental lexer generator: Refactoring, merge Lexer and Generator. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are
4 # met:
5 #
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 import argparse
29 from nfa import Nfa, NfaBuilder
30 from dfa import Dfa
31 from rule_parser import RuleParser, RuleParserState
32
33 # FIXME: We need to move this to a common place!
34 def process_rules(parser_state):
35 dfas = {}
36 builder = NfaBuilder()
37 builder.set_character_classes(parser_state.character_classes)
38 for k, v in parser_state.rules.items():
39 graphs = []
40 for (graph, action) in v['regex']:
41 graphs.append(NfaBuilder.add_action(graph, action))
42 nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
43 (start_name, dfa_nodes) = nfa.compute_dfa()
44 dfas[k] = Dfa(start_name, dfa_nodes)
45 return dfas
46
47 # Lexes strings with the help of DFAs procuded by the grammar. For sanity
48 # checking the automata.
49 class Lexer(object):
50
51 def __init__(self, rules):
52 parser_state = RuleParserState()
53 RuleParser.parse(rules, parser_state)
54 self.dfas = process_rules(parser_state)
55
56 def lex(self, string):
57 dfa = self.dfas['default'] # FIXME
58
59 action_stream = []
60 terminate_seen = False
61 offset = 0
62 while not terminate_seen and string:
63 result = list(dfa.lex(string))
64 last_position = 0
65 for (action, position) in result:
66 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))
67 last_position = position
68 if action[2] == 'terminate':
69 terminate_seen = True
70 string = string[(last_position + 1):]
71 offset += last_position
72 return action_stream
73
74 if __name__ == '__main__':
75
76 parser = argparse.ArgumentParser()
77 parser.add_argument('--rules')
78 parser.add_argument('--input')
79 args = parser.parse_args()
80
81 re_file = args.rules
82 input_file = args.input
83
84 with open(re_file, 'r') as f:
85 rules = f.read()
86 with open(input_file, 'r') as f:
87 input_text = f.read() + '\0'
88
89 lexer = Lexer(rules)
90 for t in lexer.lex(input_text):
91 print t
OLDNEW
« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698