tools/lexer_generator/regex_parser.py - Issue 137883006: Experimental parser: use Terms instead of tuples

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 137883006: Experimental parser: use Terms instead of tuples (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.yacc as yacc	28 import ply.yacc as yacc

	29 from types import ListType, TupleType

29 from regex_lexer import RegexLexer	30 from regex_lexer import RegexLexer

30 from types import ListType, TupleType	31 from action import Term

31	32

32 class RegexParser:	33 class RegexParser:

33	34

34 tokens = RegexLexer.tokens	35 tokens = RegexLexer.tokens

35	36

36 token_map = {	37 token_map = {

37 '+': 'ONE_OR_MORE',	38 '+': 'ONE_OR_MORE',

38 '?': 'ZERO_OR_ONE',	39 '?': 'ZERO_OR_ONE',

39 '*': 'ZERO_OR_MORE',	40 '*': 'ZERO_OR_MORE',

40 '\|': 'OR',	41 '\|': 'OR',

41 '.': 'ANY',	42 '.': 'ANY',

42 }	43 }

43	44

44 def p_start(self, p):	45 def p_start(self, p):

45 '''start : fragments OR fragments	46 '''start : fragments OR fragments

46 \| fragments'''	47 \| fragments'''

47 if len(p) == 2:	48 if len(p) == 2:

48 p[0] = p[1]	49 p[0] = p[1]

49 else:	50 else:

50 p[0] = (self.token_map[p[2]], p[1], p[3])	51 p[0] = Term(self.token_map[p[2]], p[1], p[3])

51	52

52 def p_fragments(self, p):	53 def p_fragments(self, p):

53 '''fragments : fragment	54 '''fragments : fragment

54 \| fragment fragments'''	55 \| fragment fragments'''

55 if len(p) == 2:	56 if len(p) == 2:

56 p[0] = p[1]	57 p[0] = p[1]

57 else:	58 else:

58 p[0] = self.__cat(p[1], p[2])	59 p[0] = self.__cat(p[1], p[2])

59	60

60 def p_fragment(self, p):	61 def p_fragment(self, p):

61 '''fragment : literal maybe_modifier	62 '''fragment : literal maybe_modifier

62 \| class maybe_modifier	63 \| class maybe_modifier

63 \| group maybe_modifier	64 \| group maybe_modifier

64 \| any maybe_modifier	65 \| any maybe_modifier

65 '''	66 '''

66 if p[2] != None:	67 if p[2] != None:

67 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':	68 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':

68 p[0] = (p[2][0], p[2][1], p[2][2], p[1])	69 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])

69 else:	70 else:

70 p[0] = (p[2], p[1])	71 p[0] = Term(p[2], p[1])

71 else:	72 else:

72 p[0] = p[1]	73 p[0] = p[1]

73	74

74 def p_maybe_modifier(self, p):	75 def p_maybe_modifier(self, p):

75 '''maybe_modifier : ONE_OR_MORE	76 '''maybe_modifier : ONE_OR_MORE

76 \| ZERO_OR_ONE	77 \| ZERO_OR_ONE

77 \| ZERO_OR_MORE	78 \| ZERO_OR_MORE

78 \| repetition	79 \| repetition

79 \| empty'''	80 \| empty'''

80 p[0] = p[1]	81 p[0] = p[1]

81 if p[1] in self.token_map:	82 if p[1] in self.token_map:

82 p[0] = self.token_map[p[1]]	83 p[0] = self.token_map[p[1]]

83	84

84 def p_repetition(self, p):	85 def p_repetition(self, p):

85 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END	86 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END

86 \| REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''	87 \| REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''

87 if len(p) == 4:	88 if len(p) == 4:

88 p[0] = ("REPEAT", p[2], p[2])	89 p[0] = ("REPEAT", p[2], p[2])

89 else:	90 else:

90 p[0] = ("REPEAT", p[2], p[4])	91 p[0] = ("REPEAT", p[2], p[4])

91	92

92 def p_literal(self, p):	93 def p_literal(self, p):

93 '''literal : LITERAL'''	94 '''literal : LITERAL'''

94 p[0] = ('LITERAL', p[1])	95 p[0] = Term('LITERAL', p[1])

95	96

96 def p_any(self, p):	97 def p_any(self, p):

97 '''any : ANY'''	98 '''any : ANY'''

98 p[0] = (self.token_map[p[1]],)	99 p[0] = Term(self.token_map[p[1]])

99	100

100 def p_class(self, p):	101 def p_class(self, p):

101 '''class : CLASS_BEGIN class_content CLASS_END	102 '''class : CLASS_BEGIN class_content CLASS_END

102 \| CLASS_BEGIN NOT class_content CLASS_END'''	103 \| CLASS_BEGIN NOT class_content CLASS_END'''

103 if len(p) == 4:	104 if len(p) == 4:

104 p[0] = ("CLASS", p[2])	105 p[0] = Term("CLASS", p[2])

105 else:	106 else:

106 p[0] = ("NOT_CLASS", p[3])	107 p[0] = Term("NOT_CLASS", p[3])

107	108

108 def p_group(self, p):	109 def p_group(self, p):

109 '''group : GROUP_BEGIN start GROUP_END'''	110 '''group : GROUP_BEGIN start GROUP_END'''

110 p[0] = p[2]	111 p[0] = p[2]

111	112

112 def p_class_content(self, p):	113 def p_class_content(self, p):

113 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content	114 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content

114 \| CLASS_LITERAL maybe_class_content	115 \| CLASS_LITERAL maybe_class_content

115 \| CHARACTER_CLASS maybe_class_content	116 \| CHARACTER_CLASS maybe_class_content

116 \| CLASS_LITERAL_AS_OCTAL maybe_class_content	117 \| CLASS_LITERAL_AS_OCTAL maybe_class_content

117 '''	118 '''

118 if len(p) == 5:	119 if len(p) == 5:

119 left = ("RANGE", p[1], p[3])	120 left = Term("RANGE", p[1], p[3])

120 else:	121 else:

121 if len(p[1]) == 1:	122 if len(p[1]) == 1:

122 left = ('LITERAL', p[1])	123 left = Term('LITERAL', p[1])

123 elif p[1][0] == '\\':	124 elif p[1][0] == '\\':

124 left = ('LITERAL', chr(int(p[1][1:], 8)))	125 left = Term('LITERAL', chr(int(p[1][1:], 8)))

125 else:	126 else:

126 left = ('CHARACTER_CLASS', p[1][1:-1])	127 left = Term('CHARACTER_CLASS', p[1][1:-1])

127 p[0] = self.__cat(left, p[len(p)-1])	128 p[0] = self.__cat(left, p[len(p)-1])

128	129

129 def p_maybe_class_content(self, p):	130 def p_maybe_class_content(self, p):

130 '''maybe_class_content : class_content	131 '''maybe_class_content : class_content

131 \| empty'''	132 \| empty'''

132 p[0] = p[1]	133 p[0] = p[1]

133	134

134 def p_empty(self, p):	135 def p_empty(self, p):

135 'empty :'	136 'empty :'

136	137

137 def p_error(self, p):	138 def p_error(self, p):

138 raise Exception("Syntax error in input '%s'" % str(p))	139 raise Exception("Syntax error in input '%s'" % str(p))

139	140

140 @staticmethod	141 @staticmethod

141 def __cat(left, right):	142 def __cat(left, right):

142 if right == None:	143 assert left

143 return left	144 return left if not right else Term('CAT', left, right)

144 return ('CAT', left, right)

145	145

146 def build(self, **kwargs):	146 def build(self, **kwargs):

147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)	147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

148 self.lexer = RegexLexer()	148 self.lexer = RegexLexer()

149 self.lexer.build(**kwargs)	149 self.lexer.build(**kwargs)

150	150

151 __static_instance = None	151 __static_instance = None

152 @staticmethod	152 @staticmethod

153 def parse(data):	153 def parse(data):

154 parser = RegexParser.__static_instance	154 parser = RegexParser.__static_instance

155 if not parser:	155 if not parser:

156 parser = RegexParser()	156 parser = RegexParser()

157 parser.build()	157 parser.build()

158 RegexParser.__static_instance = parser	158 RegexParser.__static_instance = parser

159 try:	159 try:

160 return parser.parser.parse(data, lexer=parser.lexer.lexer)	160 return parser.parser.parse(data, lexer=parser.lexer.lexer)

161 except Exception:	161 except Exception:

162 RegexParser.__static_instance = None	162 RegexParser.__static_instance = None

163 raise	163 raise

OLD	NEW

« no previous file with comments | « tools/lexer_generator/nfa_builder.py ('k') | tools/lexer_generator/rule_parser.py » ('j') | no next file with comments »