tools/lexer_generator/regex_parser.py - Issue 149113010: Experimental parser: unify parser construction

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 149113010: Experimental parser: unify parser construction (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 13 matching lines...) Expand all Loading...
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.lex as lex	28 import ply.lex as lex

29 import ply.yacc as yacc	29 import ply.yacc as yacc

30 from types import ListType, TupleType	30 from types import ListType, TupleType

31 from regex_lexer import RegexLexer	31 from regex_lexer import RegexLexer

32 from action import Term	32 from action import Term

33	33

	34 class ParserBuilder:

	35

	36 class Logger(object):

	37 def debug(self,msg,args,*kwargs):

	38 pass

	39

	40 def info(self,msg,args,*kwargs):

	41 pass

	42

	43 def warning(self,msg,args,*kwargs):

	44 pass

	45 # assert False, "warning: "+ (msg % args) + "\n"

	46

	47 def error(self,msg,args,*kwargs):

	48 assert False, "error: "+ (msg % args) + "\n"

	49

	50 __static_instances = {}

	51 @staticmethod

	52 def parse(

	53 string, name, new_lexer, new_parser, preparse = None, postparse = None):

	54 if not name in ParserBuilder.__static_instances:

	55 logger = ParserBuilder.Logger()

	56 lexer_instance = new_lexer()

	57 lexer_instance.lex = lex.lex(module=lexer_instance)

	58 instance = new_parser()

	59 instance.yacc = yacc.yacc(

	60 module=instance, debug=True, write_tables=0,

	61 debuglog=logger, errorlog=logger)

	62 ParserBuilder.__static_instances[name] = (lexer_instance, instance)

	63 (lexer_instance, instance) = ParserBuilder.__static_instances[name]

	64 if preparse:

	65 preparse(instance)

	66 try:

	67 return_value = instance.yacc.parse(string, lexer=lexer_instance.lex)

	68 except Exception:

	69 del ParserBuilder.__static_instances[name]

	70 raise

	71 if postparse:

	72 postparse(instance)

	73 return return_value

	74

34 def build_escape_map(chars):	75 def build_escape_map(chars):

35 def add_escape(d, char):	76 def add_escape(d, char):

36 d['\\' + char] = char	77 d['\\' + char] = char

37 return d	78 return d

38 return reduce(add_escape, chars,	79 return reduce(add_escape, chars,

39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})	80 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})

40	81

41 class RegexLexer:	82 class RegexLexer:

42	83

43 tokens = (	84 tokens = (

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
88 t_ONE_OR_MORE = r'\+'	129 t_ONE_OR_MORE = r'\+'

89 t_ZERO_OR_MORE = r'\*'	130 t_ZERO_OR_MORE = r'\*'

90 t_ZERO_OR_ONE = r'\?'	131 t_ZERO_OR_ONE = r'\?'

91	132

92 t_ANY = r'\.'	133 t_ANY = r'\.'

93	134

94 t_LITERAL = r'.'	135 t_LITERAL = r'.'

95	136

96 def t_CLASS_BEGIN(self, t):	137 def t_CLASS_BEGIN(self, t):

97 r'\['	138 r'\['

98 self.lexer.push_state('class')	139 self.lex.push_state('class')

99 return t	140 return t

100	141

101 def t_class_CLASS_END(self, t):	142 def t_class_CLASS_END(self, t):

102 r'\]'	143 r'\]'

103 self.lexer.pop_state()	144 self.lex.pop_state()

104 return t	145 return t

105	146

106 t_class_RANGE = '-'	147 t_class_RANGE = '-'

107 t_class_NOT = '\^'	148 t_class_NOT = '\^'

108 t_class_CHARACTER_CLASS = r':\w+:'	149 t_class_CHARACTER_CLASS = r':\w+:'

109	150

110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):	151 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):

111 r'\\\d+'	152 r'\\\d+'

112 return t	153 return t

113	154

114 __escaped_class_literals = build_escape_map("^[]-:\\")	155 __escaped_class_literals = build_escape_map("^[]-:\\")

115	156

116 def t_class_ESCAPED_CLASS_LITERAL(self, t):	157 def t_class_ESCAPED_CLASS_LITERAL(self, t):

117 r'\\.'	158 r'\\.'

118 t.type = 'CLASS_LITERAL'	159 t.type = 'CLASS_LITERAL'

119 t.value = RegexLexer.__escaped_class_literals[t.value]	160 t.value = RegexLexer.__escaped_class_literals[t.value]

120 return t	161 return t

121	162

122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'	163 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'

123	164

124 def t_REPEAT_BEGIN(self, t):	165 def t_REPEAT_BEGIN(self, t):

125 r'\{'	166 r'\{'

126 self.lexer.push_state('repeat')	167 self.lex.push_state('repeat')

127 return t	168 return t

128	169

129 def t_repeat_REPEAT_END(self, t):	170 def t_repeat_REPEAT_END(self, t):

130 r'\}'	171 r'\}'

131 self.lexer.pop_state()	172 self.lex.pop_state()

132 return t	173 return t

133	174

134 t_repeat_NUMBER = r'[0-9]+'	175 t_repeat_NUMBER = r'[0-9]+'

135 t_repeat_COMMA = r','	176 t_repeat_COMMA = r','

136	177

137 t_ANY_ignore = '\n'	178 t_ANY_ignore = '\n'

138	179

139 def t_ANY_error(self, t):	180 def t_ANY_error(self, t):

140 raise Exception("Illegal character '%s'" % t.value[0])	181 raise Exception("Illegal character '%s'" % t.value[0])

141	182

142 def build(self, **kwargs):

143 self.lexer = lex.lex(module=self, **kwargs)

144

145 class RegexParser:	183 class RegexParser:

146	184

147 tokens = RegexLexer.tokens	185 tokens = RegexLexer.tokens

148	186

149 token_map = {	187 token_map = {

150 '+': 'ONE_OR_MORE',	188 '+': 'ONE_OR_MORE',

151 '?': 'ZERO_OR_ONE',	189 '?': 'ZERO_OR_ONE',

152 '*': 'ZERO_OR_MORE',	190 '*': 'ZERO_OR_MORE',

153 '\|': 'OR',	191 '\|': 'OR',

154 '.': 'ANY',	192 '.': 'ANY',

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
261 'empty :'	299 'empty :'

262	300

263 def p_error(self, p):	301 def p_error(self, p):

264 raise Exception("Syntax error in input '%s'" % str(p))	302 raise Exception("Syntax error in input '%s'" % str(p))

265	303

266 @staticmethod	304 @staticmethod

267 def __cat(left, right):	305 def __cat(left, right):

268 assert left	306 assert left

269 return left if not right else Term('CAT', left, right)	307 return left if not right else Term('CAT', left, right)

270	308

271 def build(self, **kwargs):

272 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

273 self.lexer = RegexLexer()

274 self.lexer.build(**kwargs)

275

276 __static_instance = None

277 @staticmethod	309 @staticmethod

278 def parse(data):	310 def parse(string):

279 parser = RegexParser.__static_instance	311 new_lexer = lambda: RegexLexer()

280 if not parser:	312 new_parser = lambda: RegexParser()

281 parser = RegexParser()	313 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser)

282 parser.build()

283 RegexParser.__static_instance = parser

284 try:

285 return parser.parser.parse(data, lexer=parser.lexer.lexer)

286 except Exception:

287 RegexParser.__static_instance = None

288 raise

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »