Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(193)

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
11 # with the distribution. 11 # with the distribution.
12 # * Neither the name of Google Inc. nor the names of its 12 # * Neither the name of Google Inc. nor the names of its
13 # contributors may be used to endorse or promote products derived 13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission. 14 # from this software without specific prior written permission.
15 # 15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 import ply.lex as lex
28 import ply.yacc as yacc 29 import ply.yacc as yacc
29 from action import Term, Action 30 from action import Term, Action
30 from rule_lexer import RuleLexer
31 from regex_parser import RegexParser 31 from regex_parser import RegexParser
32 from nfa_builder import NfaBuilder 32 from nfa_builder import NfaBuilder
33 from dfa import Dfa 33 from dfa import Dfa
34 from dfa_optimizer import DfaOptimizer 34 from dfa_optimizer import DfaOptimizer
35 from transition_keys import TransitionKey, KeyEncoding 35 from transition_keys import TransitionKey, KeyEncoding
36 36
37 class RuleLexer:
38
39 tokens = (
40 'DEFAULT_ACTION',
41 'EPSILON',
42 'EOS',
43 'CATCH_ALL',
44
45 'IDENTIFIER',
46 'STRING',
47 'REGEX',
48 'CHARACTER_CLASS_REGEX',
49
50 'PLUS',
51 'QUESTION_MARK',
52 'EQUALS',
53 'OR',
54 'STAR',
55 'LEFT_PARENTHESIS',
56 'RIGHT_PARENTHESIS',
57 'GRAPH_OPEN',
58 'GRAPH_CLOSE',
59 'SEMICOLON',
60 'ACTION_OPEN',
61 'ACTION_CLOSE',
62
63 'COMMA',
64 )
65
66 t_ignore = " \t\n\r"
67
68 def t_COMMENT(self, t):
69 r'\#.*[\n\r]+'
70 pass
71
72 __special_identifiers = set(
73 ['default_action', 'epsilon', 'catch_all', 'eos'])
74
75 def t_IDENTIFIER(self, t):
76 r'[a-zA-Z0-9_]+'
77 if t.value in self.__special_identifiers:
78 t.type = t.value.upper()
79 return t
80
81 t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
82 t_REGEX = r'/(\\/|[^/])+/'
83 t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
84
85 t_PLUS = r'\+'
86 t_QUESTION_MARK = r'\?'
87 t_STAR = r'\*'
88 t_OR = r'\|'
89 t_EQUALS = '='
90 t_LEFT_PARENTHESIS = r'\('
91 t_RIGHT_PARENTHESIS = r'\)'
92 t_GRAPH_OPEN = '<<'
93 t_GRAPH_CLOSE = '>>'
94 t_SEMICOLON = ';'
95 t_ACTION_OPEN = '<'
96 t_ACTION_CLOSE = '>'
97 t_COMMA = ','
98
99 def t_LEFT_BRACKET(self, t):
100 r'{'
101 self.lexer.push_state('code')
102 self.nesting = 1
103 return t
104
105 def t_ANY_error(self, t):
106 raise Exception("Illegal character '%s'" % t.value[0])
107
108 def build(self, **kwargs):
109 self.lexer = lex.lex(module=self, **kwargs)
110
37 class RuleParserState: 111 class RuleParserState:
38 112
39 def __init__(self, encoding): 113 def __init__(self, encoding):
40 self.aliases = {} 114 self.aliases = {}
41 self.character_classes = {} 115 self.character_classes = {}
42 self.current_state = None 116 self.current_state = None
43 self.rules = {} 117 self.rules = {}
44 self.transitions = set() 118 self.transitions = set()
45 self.encoding = encoding 119 self.encoding = encoding
46 120
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 155
82 def p_state_change(self, p): 156 def p_state_change(self, p):
83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' 157 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'
84 state = self.__state 158 state = self.__state
85 state.current_state = p[2] 159 state.current_state = p[2]
86 assert state.current_state 160 assert state.current_state
87 if not state.current_state in state.rules: 161 if not state.current_state in state.rules:
88 state.rules[state.current_state] = { 162 state.rules[state.current_state] = {
89 'default_action': Term.empty_term(), 163 'default_action': Term.empty_term(),
90 'uniques' : {}, 164 'uniques' : {},
91 'regex' : [] 165 'trees' : []
92 } 166 }
93 p[0] = state.current_state 167 p[0] = state.current_state
94 168
95 def p_transition_rules(self, p): 169 def p_transition_rules(self, p):
96 '''transition_rules : transition_rule transition_rules 170 '''transition_rules : transition_rule transition_rules
97 | empty''' 171 | empty'''
98 172
99 def p_transition_rule(self, p): 173 def p_transition_rule(self, p):
100 '''transition_rule : composite_regex action 174 '''transition_rule : composite_regex action
101 | DEFAULT_ACTION default_action 175 | DEFAULT_ACTION default_action
102 | EOS eos 176 | EOS eos
177 | EPSILON epsilon
103 | CATCH_ALL action''' 178 | CATCH_ALL action'''
104 precedence = RuleParser.__rule_precedence_counter 179 precedence = RuleParser.__rule_precedence_counter
105 RuleParser.__rule_precedence_counter += 1 180 RuleParser.__rule_precedence_counter += 1
106 action = p[2] 181 action = p[2]
107 (entry_action, match_action, transition) = action 182 (entry_action, match_action, transition) = action
108 if transition and not transition in self.__keyword_transitions: 183 if transition and not transition.name() in self.__keyword_transitions:
109 assert not transition == 'default', "can't append default graph" 184 assert not transition.name() == 'default', "can't append default graph"
110 self.__state.transitions.add(transition) 185 self.__state.transitions.add(transition.name())
111 rules = self.__state.rules[self.__state.current_state] 186 rules = self.__state.rules[self.__state.current_state]
187 # process default action
112 if p[1] == 'default_action': 188 if p[1] == 'default_action':
113 assert self.__state.current_state == 'default' 189 assert self.__state.current_state == 'default'
114 assert not rules['default_action'] 190 assert not rules['default_action']
115 assert not entry_action 191 assert not entry_action
116 rules['default_action'] = match_action 192 rules['default_action'] = match_action
117 elif p[1] == 'eos' or p[1] == 'catch_all': 193 return
194 # process tree
195 if p[1] == 'eos' or p[1] == 'catch_all':
118 assert p[1] not in rules['uniques'] 196 assert p[1] not in rules['uniques']
119 rules['uniques'][p[1]] = True 197 rules['uniques'][p[1]] = True
120 rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action)) 198 tree = NfaBuilder.unique_key(p[1])
199 elif p[1] == 'epsilon':
200 tree = Term.empty_term()
121 else: 201 else:
122 regex = p[1] 202 tree = p[1] # regex case
123 rules['regex'].append((regex, precedence, action)) 203 # handle actions
204 if entry_action or match_action:
205 tree = NfaBuilder.add_action(
206 tree, Action(entry_action, match_action, precedence))
207 # handle transitions
208 if not transition:
209 pass
210 elif transition.name() == 'continue':
211 tree = NfaBuilder.add_continue(tree, transition.args()[0])
212 else:
213 tree = NfaBuilder.join_subtree(tree, transition.name())
214 # store tree
215 rules['trees'].append(tree)
124 216
125 def p_action(self, p): 217 def p_action(self, p):
126 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O R maybe_transition ACTION_CLOSE''' 218 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O R maybe_transition ACTION_CLOSE'''
127 p[0] = (p[2], p[4], p[6]) 219 p[0] = (p[2], p[4], p[6])
128 220
129 def p_default_action(self, p): 221 def p_default_action(self, p):
130 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE' 222 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'
131 p[0] = (Term.empty_term(), p[2], None) 223 p[0] = (Term.empty_term(), p[2], Term.empty_term())
132 224
133 def p_eos(self, p): 225 def p_eos(self, p):
134 'eos : ACTION_OPEN identifier_action ACTION_CLOSE' 226 'eos : ACTION_OPEN identifier_action ACTION_CLOSE'
135 p[0] = (Term.empty_term(), p[2], None) 227 p[0] = (Term.empty_term(), p[2], Term.empty_term())
228
229 def p_epsilon(self, p):
230 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'
231 assert p[2], 'cannot have an empty epsilon transition'
232 p[0] = (Term.empty_term(), Term.empty_term(), p[2])
136 233
137 def p_maybe_identifier_action(self, p): 234 def p_maybe_identifier_action(self, p):
138 '''maybe_identifier_action : identifier_action 235 '''maybe_identifier_action : identifier_action
139 | empty''' 236 | empty'''
140 p[0] = p[1] if p[1] else Term.empty_term() 237 p[0] = p[1] if p[1] else Term.empty_term()
141 238
142 def p_maybe_transition(self, p): 239 def p_maybe_transition(self, p):
143 '''maybe_transition : IDENTIFIER 240 '''maybe_transition : IDENTIFIER
241 | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHES IS
144 | empty''' 242 | empty'''
145 p[0] = p[1] 243 if len(p) == 5:
244 assert p[1] == 'continue', 'only continue can take arguments'
245 p[0] = Term(p[1], p[3])
246 else:
247 assert len(p) == 2
248 p[0] = Term(p[1], '0') if p[1] else Term.empty_term()
146 249
147 def p_identifier_action(self, p): 250 def p_identifier_action(self, p):
148 '''identifier_action : IDENTIFIER 251 '''identifier_action : IDENTIFIER
149 | IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS 252 | IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS
150 | IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN THESIS''' 253 | IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN THESIS'''
151 if len(p) == 2 or len(p) == 4: 254 if len(p) == 2 or len(p) == 4:
152 p[0] = Term(p[1]) 255 p[0] = Term(p[1])
153 elif len(p) == 5: 256 elif len(p) == 5:
154 p[0] = Term(p[1], *p[3]) 257 p[0] = Term(p[1], *p[3])
155 else: 258 else:
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
305 self.__minimial_dfa = None 408 self.__minimial_dfa = None
306 409
307 def minimal_dfa(self): 410 def minimal_dfa(self):
308 if not self.__minimial_dfa: 411 if not self.__minimial_dfa:
309 self.__minimial_dfa = self.dfa().minimize() 412 self.__minimial_dfa = self.dfa().minimize()
310 return self.__minimial_dfa 413 return self.__minimial_dfa
311 414
312 def __process_parser_state(self, parser_state): 415 def __process_parser_state(self, parser_state):
313 assert 'default' in parser_state.rules 416 assert 'default' in parser_state.rules
314 rule_map = {} 417 rule_map = {}
315 def process(tree_name, v):
316 trees = []
317 for tree, precedence, action in v['regex']:
318 (entry_action, match_action, transition) = action
319 if entry_action or match_action:
320 tree = NfaBuilder.add_action(
321 tree, Action(entry_action, match_action, precedence))
322 if not transition:
323 pass
324 elif transition == 'continue':
325 tree = NfaBuilder.add_continue(tree)
326 else:
327 tree = NfaBuilder.join_subtree(tree, transition)
328 trees.append(tree)
329 rule_map[tree_name] = NfaBuilder.or_terms(trees)
330 # process all subgraphs 418 # process all subgraphs
331 for k, v in parser_state.rules.items(): 419 for tree_name, v in parser_state.rules.items():
332 process(k, v) 420 if not v['trees']:
421 continue
422 rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])
333 # build the automata 423 # build the automata
334 for name, tree in rule_map.items(): 424 for name, tree in rule_map.items():
335 self.__automata[name] = RuleProcessor.Automata( 425 self.__automata[name] = RuleProcessor.Automata(
336 parser_state.encoding, parser_state.character_classes, rule_map, name) 426 parser_state.encoding, parser_state.character_classes, rule_map, name)
337 # process default_action 427 # process default_action
338 default_action = parser_state.rules['default']['default_action'] 428 default_action = parser_state.rules['default']['default_action']
339 self.__default_action = Action(Term.empty_term(), default_action) 429 self.__default_action = Action(Term.empty_term(), default_action)
OLDNEW
« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698