| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex |
| 28 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 29 from action import Term, Action | 30 from action import Term, Action |
| 30 from rule_lexer import RuleLexer | |
| 31 from regex_parser import RegexParser | 31 from regex_parser import RegexParser |
| 32 from nfa_builder import NfaBuilder | 32 from nfa_builder import NfaBuilder |
| 33 from dfa import Dfa | 33 from dfa import Dfa |
| 34 from dfa_optimizer import DfaOptimizer | 34 from dfa_optimizer import DfaOptimizer |
| 35 from transition_keys import TransitionKey, KeyEncoding | 35 from transition_keys import TransitionKey, KeyEncoding |
| 36 | 36 |
| 37 class RuleLexer: |
| 38 |
| 39 tokens = ( |
| 40 'DEFAULT_ACTION', |
| 41 'EPSILON', |
| 42 'EOS', |
| 43 'CATCH_ALL', |
| 44 |
| 45 'IDENTIFIER', |
| 46 'STRING', |
| 47 'REGEX', |
| 48 'CHARACTER_CLASS_REGEX', |
| 49 |
| 50 'PLUS', |
| 51 'QUESTION_MARK', |
| 52 'EQUALS', |
| 53 'OR', |
| 54 'STAR', |
| 55 'LEFT_PARENTHESIS', |
| 56 'RIGHT_PARENTHESIS', |
| 57 'GRAPH_OPEN', |
| 58 'GRAPH_CLOSE', |
| 59 'SEMICOLON', |
| 60 'ACTION_OPEN', |
| 61 'ACTION_CLOSE', |
| 62 |
| 63 'COMMA', |
| 64 ) |
| 65 |
| 66 t_ignore = " \t\n\r" |
| 67 |
| 68 def t_COMMENT(self, t): |
| 69 r'\#.*[\n\r]+' |
| 70 pass |
| 71 |
| 72 __special_identifiers = set( |
| 73 ['default_action', 'epsilon', 'catch_all', 'eos']) |
| 74 |
| 75 def t_IDENTIFIER(self, t): |
| 76 r'[a-zA-Z0-9_]+' |
| 77 if t.value in self.__special_identifiers: |
| 78 t.type = t.value.upper() |
| 79 return t |
| 80 |
| 81 t_STRING = r'"((\\("|\w|\\))|[^\\"])+"' |
| 82 t_REGEX = r'/(\\/|[^/])+/' |
| 83 t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]' |
| 84 |
| 85 t_PLUS = r'\+' |
| 86 t_QUESTION_MARK = r'\?' |
| 87 t_STAR = r'\*' |
| 88 t_OR = r'\|' |
| 89 t_EQUALS = '=' |
| 90 t_LEFT_PARENTHESIS = r'\(' |
| 91 t_RIGHT_PARENTHESIS = r'\)' |
| 92 t_GRAPH_OPEN = '<<' |
| 93 t_GRAPH_CLOSE = '>>' |
| 94 t_SEMICOLON = ';' |
| 95 t_ACTION_OPEN = '<' |
| 96 t_ACTION_CLOSE = '>' |
| 97 t_COMMA = ',' |
| 98 |
| 99 def t_LEFT_BRACKET(self, t): |
| 100 r'{' |
| 101 self.lexer.push_state('code') |
| 102 self.nesting = 1 |
| 103 return t |
| 104 |
| 105 def t_ANY_error(self, t): |
| 106 raise Exception("Illegal character '%s'" % t.value[0]) |
| 107 |
| 108 def build(self, **kwargs): |
| 109 self.lexer = lex.lex(module=self, **kwargs) |
| 110 |
| 37 class RuleParserState: | 111 class RuleParserState: |
| 38 | 112 |
| 39 def __init__(self, encoding): | 113 def __init__(self, encoding): |
| 40 self.aliases = {} | 114 self.aliases = {} |
| 41 self.character_classes = {} | 115 self.character_classes = {} |
| 42 self.current_state = None | 116 self.current_state = None |
| 43 self.rules = {} | 117 self.rules = {} |
| 44 self.transitions = set() | 118 self.transitions = set() |
| 45 self.encoding = encoding | 119 self.encoding = encoding |
| 46 | 120 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 81 | 155 |
| 82 def p_state_change(self, p): | 156 def p_state_change(self, p): |
| 83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' | 157 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' |
| 84 state = self.__state | 158 state = self.__state |
| 85 state.current_state = p[2] | 159 state.current_state = p[2] |
| 86 assert state.current_state | 160 assert state.current_state |
| 87 if not state.current_state in state.rules: | 161 if not state.current_state in state.rules: |
| 88 state.rules[state.current_state] = { | 162 state.rules[state.current_state] = { |
| 89 'default_action': Term.empty_term(), | 163 'default_action': Term.empty_term(), |
| 90 'uniques' : {}, | 164 'uniques' : {}, |
| 91 'regex' : [] | 165 'trees' : [] |
| 92 } | 166 } |
| 93 p[0] = state.current_state | 167 p[0] = state.current_state |
| 94 | 168 |
| 95 def p_transition_rules(self, p): | 169 def p_transition_rules(self, p): |
| 96 '''transition_rules : transition_rule transition_rules | 170 '''transition_rules : transition_rule transition_rules |
| 97 | empty''' | 171 | empty''' |
| 98 | 172 |
| 99 def p_transition_rule(self, p): | 173 def p_transition_rule(self, p): |
| 100 '''transition_rule : composite_regex action | 174 '''transition_rule : composite_regex action |
| 101 | DEFAULT_ACTION default_action | 175 | DEFAULT_ACTION default_action |
| 102 | EOS eos | 176 | EOS eos |
| 177 | EPSILON epsilon |
| 103 | CATCH_ALL action''' | 178 | CATCH_ALL action''' |
| 104 precedence = RuleParser.__rule_precedence_counter | 179 precedence = RuleParser.__rule_precedence_counter |
| 105 RuleParser.__rule_precedence_counter += 1 | 180 RuleParser.__rule_precedence_counter += 1 |
| 106 action = p[2] | 181 action = p[2] |
| 107 (entry_action, match_action, transition) = action | 182 (entry_action, match_action, transition) = action |
| 108 if transition and not transition in self.__keyword_transitions: | 183 if transition and not transition.name() in self.__keyword_transitions: |
| 109 assert not transition == 'default', "can't append default graph" | 184 assert not transition.name() == 'default', "can't append default graph" |
| 110 self.__state.transitions.add(transition) | 185 self.__state.transitions.add(transition.name()) |
| 111 rules = self.__state.rules[self.__state.current_state] | 186 rules = self.__state.rules[self.__state.current_state] |
| 187 # process default action |
| 112 if p[1] == 'default_action': | 188 if p[1] == 'default_action': |
| 113 assert self.__state.current_state == 'default' | 189 assert self.__state.current_state == 'default' |
| 114 assert not rules['default_action'] | 190 assert not rules['default_action'] |
| 115 assert not entry_action | 191 assert not entry_action |
| 116 rules['default_action'] = match_action | 192 rules['default_action'] = match_action |
| 117 elif p[1] == 'eos' or p[1] == 'catch_all': | 193 return |
| 194 # process tree |
| 195 if p[1] == 'eos' or p[1] == 'catch_all': |
| 118 assert p[1] not in rules['uniques'] | 196 assert p[1] not in rules['uniques'] |
| 119 rules['uniques'][p[1]] = True | 197 rules['uniques'][p[1]] = True |
| 120 rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action)) | 198 tree = NfaBuilder.unique_key(p[1]) |
| 199 elif p[1] == 'epsilon': |
| 200 tree = Term.empty_term() |
| 121 else: | 201 else: |
| 122 regex = p[1] | 202 tree = p[1] # regex case |
| 123 rules['regex'].append((regex, precedence, action)) | 203 # handle actions |
| 204 if entry_action or match_action: |
| 205 tree = NfaBuilder.add_action( |
| 206 tree, Action(entry_action, match_action, precedence)) |
| 207 # handle transitions |
| 208 if not transition: |
| 209 pass |
| 210 elif transition.name() == 'continue': |
| 211 tree = NfaBuilder.add_continue(tree, transition.args()[0]) |
| 212 else: |
| 213 tree = NfaBuilder.join_subtree(tree, transition.name()) |
| 214 # store tree |
| 215 rules['trees'].append(tree) |
| 124 | 216 |
| 125 def p_action(self, p): | 217 def p_action(self, p): |
| 126 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O
R maybe_transition ACTION_CLOSE''' | 218 '''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action O
R maybe_transition ACTION_CLOSE''' |
| 127 p[0] = (p[2], p[4], p[6]) | 219 p[0] = (p[2], p[4], p[6]) |
| 128 | 220 |
| 129 def p_default_action(self, p): | 221 def p_default_action(self, p): |
| 130 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE' | 222 'default_action : ACTION_OPEN identifier_action ACTION_CLOSE' |
| 131 p[0] = (Term.empty_term(), p[2], None) | 223 p[0] = (Term.empty_term(), p[2], Term.empty_term()) |
| 132 | 224 |
| 133 def p_eos(self, p): | 225 def p_eos(self, p): |
| 134 'eos : ACTION_OPEN identifier_action ACTION_CLOSE' | 226 'eos : ACTION_OPEN identifier_action ACTION_CLOSE' |
| 135 p[0] = (Term.empty_term(), p[2], None) | 227 p[0] = (Term.empty_term(), p[2], Term.empty_term()) |
| 228 |
| 229 def p_epsilon(self, p): |
| 230 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE' |
| 231 assert p[2], 'cannot have an empty epsilon transition' |
| 232 p[0] = (Term.empty_term(), Term.empty_term(), p[2]) |
| 136 | 233 |
| 137 def p_maybe_identifier_action(self, p): | 234 def p_maybe_identifier_action(self, p): |
| 138 '''maybe_identifier_action : identifier_action | 235 '''maybe_identifier_action : identifier_action |
| 139 | empty''' | 236 | empty''' |
| 140 p[0] = p[1] if p[1] else Term.empty_term() | 237 p[0] = p[1] if p[1] else Term.empty_term() |
| 141 | 238 |
| 142 def p_maybe_transition(self, p): | 239 def p_maybe_transition(self, p): |
| 143 '''maybe_transition : IDENTIFIER | 240 '''maybe_transition : IDENTIFIER |
| 241 | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHES
IS |
| 144 | empty''' | 242 | empty''' |
| 145 p[0] = p[1] | 243 if len(p) == 5: |
| 244 assert p[1] == 'continue', 'only continue can take arguments' |
| 245 p[0] = Term(p[1], p[3]) |
| 246 else: |
| 247 assert len(p) == 2 |
| 248 p[0] = Term(p[1], '0') if p[1] else Term.empty_term() |
| 146 | 249 |
| 147 def p_identifier_action(self, p): | 250 def p_identifier_action(self, p): |
| 148 '''identifier_action : IDENTIFIER | 251 '''identifier_action : IDENTIFIER |
| 149 | IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS | 252 | IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS |
| 150 | IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN
THESIS''' | 253 | IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PAREN
THESIS''' |
| 151 if len(p) == 2 or len(p) == 4: | 254 if len(p) == 2 or len(p) == 4: |
| 152 p[0] = Term(p[1]) | 255 p[0] = Term(p[1]) |
| 153 elif len(p) == 5: | 256 elif len(p) == 5: |
| 154 p[0] = Term(p[1], *p[3]) | 257 p[0] = Term(p[1], *p[3]) |
| 155 else: | 258 else: |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 self.__minimial_dfa = None | 408 self.__minimial_dfa = None |
| 306 | 409 |
| 307 def minimal_dfa(self): | 410 def minimal_dfa(self): |
| 308 if not self.__minimial_dfa: | 411 if not self.__minimial_dfa: |
| 309 self.__minimial_dfa = self.dfa().minimize() | 412 self.__minimial_dfa = self.dfa().minimize() |
| 310 return self.__minimial_dfa | 413 return self.__minimial_dfa |
| 311 | 414 |
| 312 def __process_parser_state(self, parser_state): | 415 def __process_parser_state(self, parser_state): |
| 313 assert 'default' in parser_state.rules | 416 assert 'default' in parser_state.rules |
| 314 rule_map = {} | 417 rule_map = {} |
| 315 def process(tree_name, v): | |
| 316 trees = [] | |
| 317 for tree, precedence, action in v['regex']: | |
| 318 (entry_action, match_action, transition) = action | |
| 319 if entry_action or match_action: | |
| 320 tree = NfaBuilder.add_action( | |
| 321 tree, Action(entry_action, match_action, precedence)) | |
| 322 if not transition: | |
| 323 pass | |
| 324 elif transition == 'continue': | |
| 325 tree = NfaBuilder.add_continue(tree) | |
| 326 else: | |
| 327 tree = NfaBuilder.join_subtree(tree, transition) | |
| 328 trees.append(tree) | |
| 329 rule_map[tree_name] = NfaBuilder.or_terms(trees) | |
| 330 # process all subgraphs | 418 # process all subgraphs |
| 331 for k, v in parser_state.rules.items(): | 419 for tree_name, v in parser_state.rules.items(): |
| 332 process(k, v) | 420 if not v['trees']: |
| 421 continue |
| 422 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) |
| 333 # build the automata | 423 # build the automata |
| 334 for name, tree in rule_map.items(): | 424 for name, tree in rule_map.items(): |
| 335 self.__automata[name] = RuleProcessor.Automata( | 425 self.__automata[name] = RuleProcessor.Automata( |
| 336 parser_state.encoding, parser_state.character_classes, rule_map, name) | 426 parser_state.encoding, parser_state.character_classes, rule_map, name) |
| 337 # process default_action | 427 # process default_action |
| 338 default_action = parser_state.rules['default']['default_action'] | 428 default_action = parser_state.rules['default']['default_action'] |
| 339 self.__default_action = Action(Term.empty_term(), default_action) | 429 self.__default_action = Action(Term.empty_term(), default_action) |
| OLD | NEW |