tools/lexer_generator/rule_parser.py - Issue 138973007: Experimental parser: support subgraph inlining

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

	28 import ply.lex as lex

28 import ply.yacc as yacc	29 import ply.yacc as yacc

29 from action import Term, Action	30 from action import Term, Action

30 from rule_lexer import RuleLexer

31 from regex_parser import RegexParser	31 from regex_parser import RegexParser

32 from nfa_builder import NfaBuilder	32 from nfa_builder import NfaBuilder

33 from dfa import Dfa	33 from dfa import Dfa

34 from dfa_optimizer import DfaOptimizer	34 from dfa_optimizer import DfaOptimizer

35 from transition_keys import TransitionKey, KeyEncoding	35 from transition_keys import TransitionKey, KeyEncoding

36	36

	37 class RuleLexer:

	38

	39 tokens = (

	40 'DEFAULT_ACTION',

	41 'EPSILON',

	42 'EOS',

	43 'CATCH_ALL',

	44

	45 'IDENTIFIER',

	46 'STRING',

	47 'REGEX',

	48 'CHARACTER_CLASS_REGEX',

	49

	50 'PLUS',

	51 'QUESTION_MARK',

	52 'EQUALS',

	53 'OR',

	54 'STAR',

	55 'LEFT_PARENTHESIS',

	56 'RIGHT_PARENTHESIS',

	57 'GRAPH_OPEN',

	58 'GRAPH_CLOSE',

	59 'SEMICOLON',

	60 'ACTION_OPEN',

	61 'ACTION_CLOSE',

	62

	63 'COMMA',

	64 )

	65

	66 t_ignore = " \t\n\r"

	67

	68 def t_COMMENT(self, t):

	69 r'\#.*[\n\r]+'

	70 pass

	71

	72 __special_identifiers = set(

	73 ['default_action', 'epsilon', 'catch_all', 'eos'])

	74

	75 def t_IDENTIFIER(self, t):

	76 r'[a-zA-Z0-9_]+'

	77 if t.value in self.__special_identifiers:

	78 t.type = t.value.upper()

	79 return t

	80

	81 t_STRING = r'"((\\("\|\w\|\\))\|[^\\"])+"'

	82 t_REGEX = r'/(\\/\|[^/])+/'

	83 t_CHARACTER_CLASS_REGEX = r'\[([^\]]\|\\\])+\]'

	84

	85 t_PLUS = r'\+'

	86 t_QUESTION_MARK = r'\?'

	87 t_STAR = r'\*'

	88 t_OR = r'\\|'

	89 t_EQUALS = '='

	90 t_LEFT_PARENTHESIS = r'\('

	91 t_RIGHT_PARENTHESIS = r'\)'

	92 t_GRAPH_OPEN = '<<'

	93 t_GRAPH_CLOSE = '>>'

	94 t_SEMICOLON = ';'

	95 t_ACTION_OPEN = '<'

	96 t_ACTION_CLOSE = '>'

	97 t_COMMA = ','

	98

	99 def t_LEFT_BRACKET(self, t):

	100 r'{'

	101 self.lexer.push_state('code')

	102 self.nesting = 1

	103 return t

	104

	105 def t_ANY_error(self, t):

	106 raise Exception("Illegal character '%s'" % t.value[0])

	107

	108 def build(self, **kwargs):

	109 self.lexer = lex.lex(module=self, **kwargs)

	110

37 class RuleParserState:	111 class RuleParserState:

38	112

39 def __init__(self, encoding):	113 def __init__(self, encoding):

40 self.aliases = {}	114 self.aliases = {}

41 self.character_classes = {}	115 self.character_classes = {}

42 self.current_state = None	116 self.current_state = None

43 self.rules = {}	117 self.rules = {}

44 self.transitions = set()	118 self.transitions = set()

45 self.encoding = encoding	119 self.encoding = encoding

46	120

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
81	155

82 def p_state_change(self, p):	156 def p_state_change(self, p):

83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'	157 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'

84 state = self.__state	158 state = self.__state

85 state.current_state = p[2]	159 state.current_state = p[2]

86 assert state.current_state	160 assert state.current_state

87 if not state.current_state in state.rules:	161 if not state.current_state in state.rules:

88 state.rules[state.current_state] = {	162 state.rules[state.current_state] = {

89 'default_action': Term.empty_term(),	163 'default_action': Term.empty_term(),

90 'uniques' : {},	164 'uniques' : {},

91 'regex' : []	165 'trees' : []

92 }	166 }

93 p[0] = state.current_state	167 p[0] = state.current_state

94	168

95 def p_transition_rules(self, p):	169 def p_transition_rules(self, p):

96 '''transition_rules : transition_rule transition_rules	170 '''transition_rules : transition_rule transition_rules

97 \| empty'''	171 \| empty'''

98	172

99 def p_transition_rule(self, p):	173 def p_transition_rule(self, p):

100 '''transition_rule : composite_regex action	174 '''transition_rule : composite_regex action

101 \| DEFAULT_ACTION default_action	175 \| DEFAULT_ACTION default_action

102 \| EOS eos	176 \| EOS eos

	177 \| EPSILON epsilon

103 \| CATCH_ALL action'''	178 \| CATCH_ALL action'''

104 precedence = RuleParser.__rule_precedence_counter	179 precedence = RuleParser.__rule_precedence_counter

105 RuleParser.__rule_precedence_counter += 1	180 RuleParser.__rule_precedence_counter += 1

106 action = p[2]	181 action = p[2]

107 (entry_action, match_action, transition) = action	182 (entry_action, match_action, transition) = action

108 if transition and not transition in self.__keyword_transitions:	183 if transition and not transition.name() in self.__keyword_transitions:

109 assert not transition == 'default', "can't append default graph"	184 assert not transition.name() == 'default', "can't append default graph"

110 self.__state.transitions.add(transition)	185 self.__state.transitions.add(transition.name())

111 rules = self.__state.rules[self.__state.current_state]	186 rules = self.__state.rules[self.__state.current_state]

	187 # process default action

112 if p[1] == 'default_action':	188 if p[1] == 'default_action':

113 assert self.__state.current_state == 'default'	189 assert self.__state.current_state == 'default'

114 assert not rules['default_action']	190 assert not rules['default_action']

115 assert not entry_action	191 assert not entry_action

116 rules['default_action'] = match_action	192 rules['default_action'] = match_action

117 elif p[1] == 'eos' or p[1] == 'catch_all':	193 return

	194 # process tree

	195 if p[1] == 'eos' or p[1] == 'catch_all':

118 assert p[1] not in rules['uniques']	196 assert p[1] not in rules['uniques']

119 rules['uniques'][p[1]] = True	197 rules['uniques'][p[1]] = True

120 rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action))	198 tree = NfaBuilder.unique_key(p[1])

	199 elif p[1] == 'epsilon':

	200 tree = Term.empty_term()

121 else:	201 else:

122 regex = p[1]	202 tree = p[1] # regex case

123 rules['regex'].append((regex, precedence, action))	203 # handle actions

	204 if entry_action or match_action:

	205 tree = NfaBuilder.add_action(

	206 tree, Action(entry_action, match_action, precedence))

	207 # handle transitions

	208 if not transition:

	209 pass

	210 elif transition.name() == 'continue':

	211 tree = NfaBuilder.add_continue(tree, transition.args()[0])

	212 else:

	213 tree = NfaBuilder.join_subtree(tree, transition.name())

	214 # store tree

	215 rules['trees'].append(tree)

124	216

125 def p_action(self, p):	217 def p_action(self, p):

126 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O R maybe_transition ACTION_CLOSE'''	218 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O R maybe_transition ACTION_CLOSE'''

127 p[0] = (p[2], p[4], p[6])	219 p[0] = (p[2], p[4], p[6])

128	220

129 def p_default_action(self, p):	221 def p_default_action(self, p):

130 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'	222 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'

131 p[0] = (Term.empty_term(), p[2], None)	223 p[0] = (Term.empty_term(), p[2], Term.empty_term())

132	224

133 def p_eos(self, p):	225 def p_eos(self, p):

134 'eos : ACTION_OPEN identifier_action ACTION_CLOSE'	226 'eos : ACTION_OPEN identifier_action ACTION_CLOSE'

135 p[0] = (Term.empty_term(), p[2], None)	227 p[0] = (Term.empty_term(), p[2], Term.empty_term())

	228

	229 def p_epsilon(self, p):

	230 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'

	231 assert p[2], 'cannot have an empty epsilon transition'

	232 p[0] = (Term.empty_term(), Term.empty_term(), p[2])

136	233

137 def p_maybe_identifier_action(self, p):	234 def p_maybe_identifier_action(self, p):

138 '''maybe_identifier_action : identifier_action	235 '''maybe_identifier_action : identifier_action

139 \| empty'''	236 \| empty'''

140 p[0] = p[1] if p[1] else Term.empty_term()	237 p[0] = p[1] if p[1] else Term.empty_term()

141	238

142 def p_maybe_transition(self, p):	239 def p_maybe_transition(self, p):

143 '''maybe_transition : IDENTIFIER	240 '''maybe_transition : IDENTIFIER

	241 \| IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHES IS

144 \| empty'''	242 \| empty'''

145 p[0] = p[1]	243 if len(p) == 5:

	244 assert p[1] == 'continue', 'only continue can take arguments'

	245 p[0] = Term(p[1], p[3])

	246 else:

	247 assert len(p) == 2

	248 p[0] = Term(p[1], '0') if p[1] else Term.empty_term()

146	249

147 def p_identifier_action(self, p):	250 def p_identifier_action(self, p):

148 '''identifier_action : IDENTIFIER	251 '''identifier_action : IDENTIFIER

149 \| IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS	252 \| IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS

150 \| IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN THESIS'''	253 \| IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN THESIS'''

151 if len(p) == 2 or len(p) == 4:	254 if len(p) == 2 or len(p) == 4:

152 p[0] = Term(p[1])	255 p[0] = Term(p[1])

153 elif len(p) == 5:	256 elif len(p) == 5:

154 p[0] = Term(p[1], *p[3])	257 p[0] = Term(p[1], *p[3])

155 else:	258 else:

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
305 self.__minimial_dfa = None	408 self.__minimial_dfa = None

306	409

307 def minimal_dfa(self):	410 def minimal_dfa(self):

308 if not self.__minimial_dfa:	411 if not self.__minimial_dfa:

309 self.__minimial_dfa = self.dfa().minimize()	412 self.__minimial_dfa = self.dfa().minimize()

310 return self.__minimial_dfa	413 return self.__minimial_dfa

311	414

312 def __process_parser_state(self, parser_state):	415 def __process_parser_state(self, parser_state):

313 assert 'default' in parser_state.rules	416 assert 'default' in parser_state.rules

314 rule_map = {}	417 rule_map = {}

315 def process(tree_name, v):

316 trees = []

317 for tree, precedence, action in v['regex']:

318 (entry_action, match_action, transition) = action

319 if entry_action or match_action:

320 tree = NfaBuilder.add_action(

321 tree, Action(entry_action, match_action, precedence))

322 if not transition:

323 pass

324 elif transition == 'continue':

325 tree = NfaBuilder.add_continue(tree)

326 else:

327 tree = NfaBuilder.join_subtree(tree, transition)

328 trees.append(tree)

329 rule_map[tree_name] = NfaBuilder.or_terms(trees)

330 # process all subgraphs	418 # process all subgraphs

331 for k, v in parser_state.rules.items():	419 for tree_name, v in parser_state.rules.items():

332 process(k, v)	420 if not v['trees']:

	421 continue

	422 rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])

333 # build the automata	423 # build the automata

334 for name, tree in rule_map.items():	424 for name, tree in rule_map.items():

335 self.__automata[name] = RuleProcessor.Automata(	425 self.__automata[name] = RuleProcessor.Automata(

336 parser_state.encoding, parser_state.character_classes, rule_map, name)	426 parser_state.encoding, parser_state.character_classes, rule_map, name)

337 # process default_action	427 # process default_action

338 default_action = parser_state.rules['default']['default_action']	428 default_action = parser_state.rules['default']['default_action']

339 self.__default_action = Action(Term.empty_term(), default_action)	429 self.__default_action = Action(Term.empty_term(), default_action)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »