tools/lexer_generator/rule_parser.py - Issue 59033005: Experimental parser: build regex parse trees for all rules

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 59033005: Experimental parser: build regex parse trees for all rules (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 10 matching lines...) Expand all Loading...
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.yacc as yacc	28 import ply.yacc as yacc

29 from rule_lexer import RuleLexer	29 from rule_lexer import RuleLexer

30 from regex_parser import RegexParser	30 from regex_parser import RegexParser

	31 from nfa import NfaBuilder

31	32

32 class RuleParser:	33 class RuleParser:

33	34

34 tokens = RuleLexer.tokens	35 tokens = RuleLexer.tokens

35	36

36 def __init__(self):	37 def __init__(self):

37 self.aliases = {	38 self.aliases = {

38 'eof' : "eof rule",	39 'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),

39 'any' : "any rule",	40 'any' : RegexParser.parse("."),

40 }	41 }

41 self.current_transition = None	42 self.current_transition = None

42 self.rules = {}	43 self.rules = {}

43	44

44 def p_statements(self, p):	45 def p_statements(self, p):

45 'statements : statement maybe_statements'	46 'statements : statement maybe_statements'

46	47

47 def p_maybe_statement(self, p):	48 def p_maybe_statement(self, p):

48 '''maybe_statements : statements	49 '''maybe_statements : statements

49 \| empty'''	50 \| empty'''

(...skipping 29 matching lines...) Expand all Loading...
79 if p[1]:	80 if p[1]:

80 self.current_transition = p[2]	81 self.current_transition = p[2]

81 assert self.current_transition	82 assert self.current_transition

82 if not self.current_transition in self.rules:	83 if not self.current_transition in self.rules:

83 self.rules[self.current_transition] = []	84 self.rules[self.current_transition] = []

84 p[0] = self.current_transition	85 p[0] = self.current_transition

85	86

86 def p_composite_regex(self, p):	87 def p_composite_regex(self, p):

87 '''composite_regex : regex_parts OR regex_parts	88 '''composite_regex : regex_parts OR regex_parts

88 \| regex_parts'''	89 \| regex_parts'''

89 if p[len(p)-1]:	90 if len(p) == 2:

90 p[0] = p[1:]	91 p[0] = p[1]

91 else:	92 else:

92 p[0] = p[1:-1]	93 p[0] = NfaBuilder.or_graphs([p[1], p[3]])

	94 # NfaBuilder().nfa(p[0])

93	95

94 def p_regex_parts(self, p):	96 def p_regex_parts(self, p):

95 '''regex_parts : regex_part	97 '''regex_parts : regex_part

96 \| regex_part regex_parts'''	98 \| regex_part regex_parts'''

97 p[0] = p[1:]	99 p[0] = NfaBuilder.cat_graphs(p[1:])

98	100

99 def p_regex_part(self, p):	101 def p_regex_part(self, p):

100 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier	102 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier

101 \| regex_string_literal modifier	103 \| regex_string_literal modifier

102 \| regex_class modifier	104 \| regex_class modifier

103 \| regex modifier	105 \| regex modifier

104 \| regex_alias modifier'''	106 \| regex_alias modifier'''

105 if p[len(p)-1]:	107 modifier = p[len(p)-1]

106 p[0] = p[1:]	108 graph = p[2] if len(p) == 5 else p[1]

	109 if modifier:

	110 p[0] = NfaBuilder.apply_modifier(modifier, graph)

107 else:	111 else:

108 p[0] = p[1:-1]	112 p[0] = graph

109	113

110 def p_regex_string_literal(self, p):	114 def p_regex_string_literal(self, p):

111 'regex_string_literal : STRING'	115 'regex_string_literal : STRING'

112 string = p[1][1:-1]	116 string = p[1][1:-1]

113 for c in "\+?\|*[]()":	117 for c in "\+?*\|.[](){}":

114 string = string.replace(c, "\\" + c)	118 string = string.replace(c, "\\" + c)

115 p[0] = RegexParser.parse(string)	119 p[0] = RegexParser.parse(string)

116	120

117 def p_regex(self, p):	121 def p_regex(self, p):

118 'regex : REGEX'	122 'regex : REGEX'

119 p[0] = RegexParser.parse(p[1][1:-1])	123 p[0] = RegexParser.parse(p[1][1:-1])

120	124

121 def p_regex_class(self, p):	125 def p_regex_class(self, p):

122 'regex_class : CHARACTER_CLASS_REGEX'	126 'regex_class : CHARACTER_CLASS_REGEX'

123 p[0] = RegexParser.parse(p[1])	127 p[0] = RegexParser.parse(p[1])

(...skipping 26 matching lines...) Expand all Loading...
150 def p_error(self, p):	154 def p_error(self, p):

151 raise Exception("Syntax error in input '%s'" % p)	155 raise Exception("Syntax error in input '%s'" % p)

152	156

153 def build(self, **kwargs):	157 def build(self, **kwargs):

154 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)	158 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

155 self.lexer = RuleLexer()	159 self.lexer = RuleLexer()

156 self.lexer.build(**kwargs)	160 self.lexer.build(**kwargs)

157	161

158 def parse(self, data):	162 def parse(self, data):

159 return self.parser.parse(data, lexer=self.lexer.lexer)	163 return self.parser.parse(data, lexer=self.lexer.lexer)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | tools/lexer_generator/transition_keys.py » ('j') | no next file with comments »