Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 137883006: Experimental parser: use Terms instead of tuples (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/nfa_builder.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.yacc as yacc 28 import ply.yacc as yacc
29 from types import ListType, TupleType
29 from regex_lexer import RegexLexer 30 from regex_lexer import RegexLexer
30 from types import ListType, TupleType 31 from action import Term
31 32
32 class RegexParser: 33 class RegexParser:
33 34
34 tokens = RegexLexer.tokens 35 tokens = RegexLexer.tokens
35 36
36 token_map = { 37 token_map = {
37 '+': 'ONE_OR_MORE', 38 '+': 'ONE_OR_MORE',
38 '?': 'ZERO_OR_ONE', 39 '?': 'ZERO_OR_ONE',
39 '*': 'ZERO_OR_MORE', 40 '*': 'ZERO_OR_MORE',
40 '|': 'OR', 41 '|': 'OR',
41 '.': 'ANY', 42 '.': 'ANY',
42 } 43 }
43 44
44 def p_start(self, p): 45 def p_start(self, p):
45 '''start : fragments OR fragments 46 '''start : fragments OR fragments
46 | fragments''' 47 | fragments'''
47 if len(p) == 2: 48 if len(p) == 2:
48 p[0] = p[1] 49 p[0] = p[1]
49 else: 50 else:
50 p[0] = (self.token_map[p[2]], p[1], p[3]) 51 p[0] = Term(self.token_map[p[2]], p[1], p[3])
51 52
52 def p_fragments(self, p): 53 def p_fragments(self, p):
53 '''fragments : fragment 54 '''fragments : fragment
54 | fragment fragments''' 55 | fragment fragments'''
55 if len(p) == 2: 56 if len(p) == 2:
56 p[0] = p[1] 57 p[0] = p[1]
57 else: 58 else:
58 p[0] = self.__cat(p[1], p[2]) 59 p[0] = self.__cat(p[1], p[2])
59 60
60 def p_fragment(self, p): 61 def p_fragment(self, p):
61 '''fragment : literal maybe_modifier 62 '''fragment : literal maybe_modifier
62 | class maybe_modifier 63 | class maybe_modifier
63 | group maybe_modifier 64 | group maybe_modifier
64 | any maybe_modifier 65 | any maybe_modifier
65 ''' 66 '''
66 if p[2] != None: 67 if p[2] != None:
67 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT': 68 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
68 p[0] = (p[2][0], p[2][1], p[2][2], p[1]) 69 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
69 else: 70 else:
70 p[0] = (p[2], p[1]) 71 p[0] = Term(p[2], p[1])
71 else: 72 else:
72 p[0] = p[1] 73 p[0] = p[1]
73 74
74 def p_maybe_modifier(self, p): 75 def p_maybe_modifier(self, p):
75 '''maybe_modifier : ONE_OR_MORE 76 '''maybe_modifier : ONE_OR_MORE
76 | ZERO_OR_ONE 77 | ZERO_OR_ONE
77 | ZERO_OR_MORE 78 | ZERO_OR_MORE
78 | repetition 79 | repetition
79 | empty''' 80 | empty'''
80 p[0] = p[1] 81 p[0] = p[1]
81 if p[1] in self.token_map: 82 if p[1] in self.token_map:
82 p[0] = self.token_map[p[1]] 83 p[0] = self.token_map[p[1]]
83 84
84 def p_repetition(self, p): 85 def p_repetition(self, p):
85 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END 86 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
86 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END''' 87 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
87 if len(p) == 4: 88 if len(p) == 4:
88 p[0] = ("REPEAT", p[2], p[2]) 89 p[0] = ("REPEAT", p[2], p[2])
89 else: 90 else:
90 p[0] = ("REPEAT", p[2], p[4]) 91 p[0] = ("REPEAT", p[2], p[4])
91 92
92 def p_literal(self, p): 93 def p_literal(self, p):
93 '''literal : LITERAL''' 94 '''literal : LITERAL'''
94 p[0] = ('LITERAL', p[1]) 95 p[0] = Term('LITERAL', p[1])
95 96
96 def p_any(self, p): 97 def p_any(self, p):
97 '''any : ANY''' 98 '''any : ANY'''
98 p[0] = (self.token_map[p[1]],) 99 p[0] = Term(self.token_map[p[1]])
99 100
100 def p_class(self, p): 101 def p_class(self, p):
101 '''class : CLASS_BEGIN class_content CLASS_END 102 '''class : CLASS_BEGIN class_content CLASS_END
102 | CLASS_BEGIN NOT class_content CLASS_END''' 103 | CLASS_BEGIN NOT class_content CLASS_END'''
103 if len(p) == 4: 104 if len(p) == 4:
104 p[0] = ("CLASS", p[2]) 105 p[0] = Term("CLASS", p[2])
105 else: 106 else:
106 p[0] = ("NOT_CLASS", p[3]) 107 p[0] = Term("NOT_CLASS", p[3])
107 108
108 def p_group(self, p): 109 def p_group(self, p):
109 '''group : GROUP_BEGIN start GROUP_END''' 110 '''group : GROUP_BEGIN start GROUP_END'''
110 p[0] = p[2] 111 p[0] = p[2]
111 112
112 def p_class_content(self, p): 113 def p_class_content(self, p):
113 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content 114 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
114 | CLASS_LITERAL maybe_class_content 115 | CLASS_LITERAL maybe_class_content
115 | CHARACTER_CLASS maybe_class_content 116 | CHARACTER_CLASS maybe_class_content
116 | CLASS_LITERAL_AS_OCTAL maybe_class_content 117 | CLASS_LITERAL_AS_OCTAL maybe_class_content
117 ''' 118 '''
118 if len(p) == 5: 119 if len(p) == 5:
119 left = ("RANGE", p[1], p[3]) 120 left = Term("RANGE", p[1], p[3])
120 else: 121 else:
121 if len(p[1]) == 1: 122 if len(p[1]) == 1:
122 left = ('LITERAL', p[1]) 123 left = Term('LITERAL', p[1])
123 elif p[1][0] == '\\': 124 elif p[1][0] == '\\':
124 left = ('LITERAL', chr(int(p[1][1:], 8))) 125 left = Term('LITERAL', chr(int(p[1][1:], 8)))
125 else: 126 else:
126 left = ('CHARACTER_CLASS', p[1][1:-1]) 127 left = Term('CHARACTER_CLASS', p[1][1:-1])
127 p[0] = self.__cat(left, p[len(p)-1]) 128 p[0] = self.__cat(left, p[len(p)-1])
128 129
129 def p_maybe_class_content(self, p): 130 def p_maybe_class_content(self, p):
130 '''maybe_class_content : class_content 131 '''maybe_class_content : class_content
131 | empty''' 132 | empty'''
132 p[0] = p[1] 133 p[0] = p[1]
133 134
134 def p_empty(self, p): 135 def p_empty(self, p):
135 'empty :' 136 'empty :'
136 137
137 def p_error(self, p): 138 def p_error(self, p):
138 raise Exception("Syntax error in input '%s'" % str(p)) 139 raise Exception("Syntax error in input '%s'" % str(p))
139 140
140 @staticmethod 141 @staticmethod
141 def __cat(left, right): 142 def __cat(left, right):
142 if right == None: 143 assert left
143 return left 144 return left if not right else Term('CAT', left, right)
144 return ('CAT', left, right)
145 145
146 def build(self, **kwargs): 146 def build(self, **kwargs):
147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) 147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
148 self.lexer = RegexLexer() 148 self.lexer = RegexLexer()
149 self.lexer.build(**kwargs) 149 self.lexer.build(**kwargs)
150 150
151 __static_instance = None 151 __static_instance = None
152 @staticmethod 152 @staticmethod
153 def parse(data): 153 def parse(data):
154 parser = RegexParser.__static_instance 154 parser = RegexParser.__static_instance
155 if not parser: 155 if not parser:
156 parser = RegexParser() 156 parser = RegexParser()
157 parser.build() 157 parser.build()
158 RegexParser.__static_instance = parser 158 RegexParser.__static_instance = parser
159 try: 159 try:
160 return parser.parser.parse(data, lexer=parser.lexer.lexer) 160 return parser.parser.parse(data, lexer=parser.lexer.lexer)
161 except Exception: 161 except Exception:
162 RegexParser.__static_instance = None 162 RegexParser.__static_instance = None
163 raise 163 raise
OLDNEW
« no previous file with comments | « tools/lexer_generator/nfa_builder.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698