Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(532)

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 149113010: Experimental parser: unify parser construction (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 13 matching lines...) Expand all
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex 28 import ply.lex as lex
29 import ply.yacc as yacc 29 import ply.yacc as yacc
30 from types import ListType, TupleType 30 from types import ListType, TupleType
31 from regex_lexer import RegexLexer 31 from regex_lexer import RegexLexer
32 from action import Term 32 from action import Term
33 33
34 class ParserBuilder:
35
36 class Logger(object):
37 def debug(self,msg,*args,**kwargs):
38 pass
39
40 def info(self,msg,*args,**kwargs):
41 pass
42
43 def warning(self,msg,*args,**kwargs):
44 pass
45 # assert False, "warning: "+ (msg % args) + "\n"
46
47 def error(self,msg,*args,**kwargs):
48 assert False, "error: "+ (msg % args) + "\n"
49
50 __static_instances = {}
51 @staticmethod
52 def parse(
53 string, name, new_lexer, new_parser, preparse = None, postparse = None):
54 if not name in ParserBuilder.__static_instances:
55 logger = ParserBuilder.Logger()
56 lexer_instance = new_lexer()
57 lexer_instance.lex = lex.lex(module=lexer_instance)
58 instance = new_parser()
59 instance.yacc = yacc.yacc(
60 module=instance, debug=True, write_tables=0,
61 debuglog=logger, errorlog=logger)
62 ParserBuilder.__static_instances[name] = (lexer_instance, instance)
63 (lexer_instance, instance) = ParserBuilder.__static_instances[name]
64 if preparse:
65 preparse(instance)
66 try:
67 return_value = instance.yacc.parse(string, lexer=lexer_instance.lex)
68 except Exception:
69 del ParserBuilder.__static_instances[name]
70 raise
71 if postparse:
72 postparse(instance)
73 return return_value
74
34 def build_escape_map(chars): 75 def build_escape_map(chars):
35 def add_escape(d, char): 76 def add_escape(d, char):
36 d['\\' + char] = char 77 d['\\' + char] = char
37 return d 78 return d
38 return reduce(add_escape, chars, 79 return reduce(add_escape, chars,
39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'}) 80 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
40 81
41 class RegexLexer: 82 class RegexLexer:
42 83
43 tokens = ( 84 tokens = (
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 t_ONE_OR_MORE = r'\+' 129 t_ONE_OR_MORE = r'\+'
89 t_ZERO_OR_MORE = r'\*' 130 t_ZERO_OR_MORE = r'\*'
90 t_ZERO_OR_ONE = r'\?' 131 t_ZERO_OR_ONE = r'\?'
91 132
92 t_ANY = r'\.' 133 t_ANY = r'\.'
93 134
94 t_LITERAL = r'.' 135 t_LITERAL = r'.'
95 136
96 def t_CLASS_BEGIN(self, t): 137 def t_CLASS_BEGIN(self, t):
97 r'\[' 138 r'\['
98 self.lexer.push_state('class') 139 self.lex.push_state('class')
99 return t 140 return t
100 141
101 def t_class_CLASS_END(self, t): 142 def t_class_CLASS_END(self, t):
102 r'\]' 143 r'\]'
103 self.lexer.pop_state() 144 self.lex.pop_state()
104 return t 145 return t
105 146
106 t_class_RANGE = '-' 147 t_class_RANGE = '-'
107 t_class_NOT = '\^' 148 t_class_NOT = '\^'
108 t_class_CHARACTER_CLASS = r':\w+:' 149 t_class_CHARACTER_CLASS = r':\w+:'
109 150
110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): 151 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
111 r'\\\d+' 152 r'\\\d+'
112 return t 153 return t
113 154
114 __escaped_class_literals = build_escape_map("^[]-:\\") 155 __escaped_class_literals = build_escape_map("^[]-:\\")
115 156
116 def t_class_ESCAPED_CLASS_LITERAL(self, t): 157 def t_class_ESCAPED_CLASS_LITERAL(self, t):
117 r'\\.' 158 r'\\.'
118 t.type = 'CLASS_LITERAL' 159 t.type = 'CLASS_LITERAL'
119 t.value = RegexLexer.__escaped_class_literals[t.value] 160 t.value = RegexLexer.__escaped_class_literals[t.value]
120 return t 161 return t
121 162
122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]' 163 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
123 164
124 def t_REPEAT_BEGIN(self, t): 165 def t_REPEAT_BEGIN(self, t):
125 r'\{' 166 r'\{'
126 self.lexer.push_state('repeat') 167 self.lex.push_state('repeat')
127 return t 168 return t
128 169
129 def t_repeat_REPEAT_END(self, t): 170 def t_repeat_REPEAT_END(self, t):
130 r'\}' 171 r'\}'
131 self.lexer.pop_state() 172 self.lex.pop_state()
132 return t 173 return t
133 174
134 t_repeat_NUMBER = r'[0-9]+' 175 t_repeat_NUMBER = r'[0-9]+'
135 t_repeat_COMMA = r',' 176 t_repeat_COMMA = r','
136 177
137 t_ANY_ignore = '\n' 178 t_ANY_ignore = '\n'
138 179
139 def t_ANY_error(self, t): 180 def t_ANY_error(self, t):
140 raise Exception("Illegal character '%s'" % t.value[0]) 181 raise Exception("Illegal character '%s'" % t.value[0])
141 182
142 def build(self, **kwargs):
143 self.lexer = lex.lex(module=self, **kwargs)
144
145 class RegexParser: 183 class RegexParser:
146 184
147 tokens = RegexLexer.tokens 185 tokens = RegexLexer.tokens
148 186
149 token_map = { 187 token_map = {
150 '+': 'ONE_OR_MORE', 188 '+': 'ONE_OR_MORE',
151 '?': 'ZERO_OR_ONE', 189 '?': 'ZERO_OR_ONE',
152 '*': 'ZERO_OR_MORE', 190 '*': 'ZERO_OR_MORE',
153 '|': 'OR', 191 '|': 'OR',
154 '.': 'ANY', 192 '.': 'ANY',
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
261 'empty :' 299 'empty :'
262 300
263 def p_error(self, p): 301 def p_error(self, p):
264 raise Exception("Syntax error in input '%s'" % str(p)) 302 raise Exception("Syntax error in input '%s'" % str(p))
265 303
266 @staticmethod 304 @staticmethod
267 def __cat(left, right): 305 def __cat(left, right):
268 assert left 306 assert left
269 return left if not right else Term('CAT', left, right) 307 return left if not right else Term('CAT', left, right)
270 308
271 def build(self, **kwargs):
272 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
273 self.lexer = RegexLexer()
274 self.lexer.build(**kwargs)
275
276 __static_instance = None
277 @staticmethod 309 @staticmethod
278 def parse(data): 310 def parse(string):
279 parser = RegexParser.__static_instance 311 new_lexer = lambda: RegexLexer()
280 if not parser: 312 new_parser = lambda: RegexParser()
281 parser = RegexParser() 313 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser)
282 parser.build()
283 RegexParser.__static_instance = parser
284 try:
285 return parser.parser.parse(data, lexer=parser.lexer.lexer)
286 except Exception:
287 RegexParser.__static_instance = None
288 raise
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698