| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex | 28 import ply.lex as lex |
| 29 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 30 from action import Term, Action | 30 from action import Term, Action |
| 31 from regex_parser import RegexParser | 31 from regex_parser import RegexParser, ParserBuilder |
| 32 from nfa_builder import NfaBuilder | 32 from nfa_builder import NfaBuilder |
| 33 from dfa import Dfa | 33 from dfa import Dfa |
| 34 from dfa_optimizer import DfaOptimizer | 34 from dfa_optimizer import DfaOptimizer |
| 35 from transition_keys import TransitionKey, KeyEncoding | 35 from transition_keys import TransitionKey, KeyEncoding |
| 36 | 36 |
| 37 class RuleLexer: | 37 class RuleLexer: |
| 38 | 38 |
| 39 tokens = ( | 39 tokens = ( |
| 40 'DEFAULT_ACTION', | 40 'DEFAULT_ACTION', |
| 41 'EPSILON', | 41 'EPSILON', |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 89 t_EQUALS = '=' | 89 t_EQUALS = '=' |
| 90 t_LEFT_PARENTHESIS = r'\(' | 90 t_LEFT_PARENTHESIS = r'\(' |
| 91 t_RIGHT_PARENTHESIS = r'\)' | 91 t_RIGHT_PARENTHESIS = r'\)' |
| 92 t_GRAPH_OPEN = '<<' | 92 t_GRAPH_OPEN = '<<' |
| 93 t_GRAPH_CLOSE = '>>' | 93 t_GRAPH_CLOSE = '>>' |
| 94 t_SEMICOLON = ';' | 94 t_SEMICOLON = ';' |
| 95 t_ACTION_OPEN = '<' | 95 t_ACTION_OPEN = '<' |
| 96 t_ACTION_CLOSE = '>' | 96 t_ACTION_CLOSE = '>' |
| 97 t_COMMA = ',' | 97 t_COMMA = ',' |
| 98 | 98 |
| 99 def t_LEFT_BRACKET(self, t): | |
| 100 r'{' | |
| 101 self.lexer.push_state('code') | |
| 102 self.nesting = 1 | |
| 103 return t | |
| 104 | |
| 105 def t_ANY_error(self, t): | 99 def t_ANY_error(self, t): |
| 106 raise Exception("Illegal character '%s'" % t.value[0]) | 100 raise Exception("Illegal character '%s'" % t.value[0]) |
| 107 | 101 |
| 108 def build(self, **kwargs): | |
| 109 self.lexer = lex.lex(module=self, **kwargs) | |
| 110 | |
| 111 class RuleParserState: | 102 class RuleParserState: |
| 112 | 103 |
| 113 def __init__(self, encoding): | 104 def __init__(self, encoding): |
| 114 self.aliases = {} | 105 self.aliases = {} |
| 115 self.character_classes = {} | 106 self.character_classes = {} |
| 116 self.current_state = None | 107 self.current_state = None |
| 117 self.rules = {} | 108 self.rules = {} |
| 118 self.transitions = set() | 109 self.transitions = set() |
| 119 self.encoding = encoding | 110 self.encoding = encoding |
| 120 | 111 |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 290 p[0] = NfaBuilder.apply_modifier(modifier, term) | 281 p[0] = NfaBuilder.apply_modifier(modifier, term) |
| 291 else: | 282 else: |
| 292 p[0] = term | 283 p[0] = term |
| 293 | 284 |
| 294 def p_regex_string_literal(self, p): | 285 def p_regex_string_literal(self, p): |
| 295 'regex_string_literal : STRING' | 286 'regex_string_literal : STRING' |
| 296 string = p[1][1:-1] | 287 string = p[1][1:-1] |
| 297 escape_char = lambda string, char: string.replace(char, "\\" + char) | 288 escape_char = lambda string, char: string.replace(char, "\\" + char) |
| 298 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") | 289 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") |
| 299 p[0] = RegexParser.parse(string) | 290 p[0] = RegexParser.parse(string) |
| 291 |
| 300 def p_regex(self, p): | 292 def p_regex(self, p): |
| 301 'regex : REGEX' | 293 'regex : REGEX' |
| 302 string = p[1][1:-1].replace("\\/", "/") | 294 string = p[1][1:-1].replace("\\/", "/") |
| 303 p[0] = RegexParser.parse(string) | 295 p[0] = RegexParser.parse(string) |
| 304 | 296 |
| 305 def p_regex_class(self, p): | 297 def p_regex_class(self, p): |
| 306 'regex_class : CHARACTER_CLASS_REGEX' | 298 'regex_class : CHARACTER_CLASS_REGEX' |
| 307 p[0] = RegexParser.parse(p[1]) | 299 p[0] = RegexParser.parse(p[1]) |
| 308 | 300 |
| 309 def p_regex_alias(self, p): | 301 def p_regex_alias(self, p): |
| 310 'regex_alias : IDENTIFIER' | 302 'regex_alias : IDENTIFIER' |
| 311 p[0] = self.__state.aliases[p[1]] | 303 p[0] = self.__state.aliases[p[1]] |
| 312 | 304 |
| 313 def p_modifier(self, p): | 305 def p_modifier(self, p): |
| 314 '''modifier : PLUS | 306 '''modifier : PLUS |
| 315 | QUESTION_MARK | 307 | QUESTION_MARK |
| 316 | STAR | 308 | STAR |
| 317 | empty''' | 309 | empty''' |
| 318 p[0] = p[1] | 310 p[0] = p[1] |
| 319 | 311 |
| 320 def p_empty(self, p): | 312 def p_empty(self, p): |
| 321 'empty :' | 313 'empty :' |
| 322 | 314 |
| 323 def p_error(self, p): | 315 def p_error(self, p): |
| 324 raise Exception("Syntax error in input '%s'" % str(p)) | 316 raise Exception("Syntax error in input '%s'" % str(p)) |
| 325 | 317 |
| 326 def build(self, **kwargs): | |
| 327 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) | |
| 328 self.lexer = RuleLexer() | |
| 329 self.lexer.build(**kwargs) | |
| 330 | |
| 331 __static_instance = None | |
| 332 @staticmethod | 318 @staticmethod |
| 333 def parse(data, parser_state): | 319 def parse(string, parser_state): |
| 334 parser = RuleParser.__static_instance | 320 new_lexer = lambda: RuleLexer() |
| 335 if not parser: | 321 new_parser = lambda: RuleParser() |
| 336 parser = RuleParser() | 322 def preparse(parser): |
| 337 parser.build() | 323 parser.__state = parser_state |
| 338 RuleParser.__static_instance = parser | 324 def postparse(parser): |
| 339 parser.__state = parser_state | 325 parser.__state = None |
| 340 try: | 326 return ParserBuilder.parse( |
| 341 parser.parser.parse(data, lexer=parser.lexer.lexer) | 327 string, "RuleParser", new_lexer, new_parser, preparse, postparse) |
| 342 except Exception: | |
| 343 RuleParser.__static_instance = None | |
| 344 raise | |
| 345 parser.__state = None | |
| 346 | 328 |
| 347 class RuleProcessor(object): | 329 class RuleProcessor(object): |
| 348 | 330 |
| 349 def __init__(self, string, encoding_name): | 331 def __init__(self, string, encoding_name): |
| 350 self.__automata = {} | 332 self.__automata = {} |
| 351 self.__default_action = None | 333 self.__default_action = None |
| 352 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name)) | 334 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name)) |
| 353 RuleParser.parse(string, self.__parser_state) | 335 RuleParser.parse(string, self.__parser_state) |
| 354 self.__process_parser_state() | 336 self.__process_parser_state() |
| 355 | 337 |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 414 for tree_name, v in parser_state.rules.items(): | 396 for tree_name, v in parser_state.rules.items(): |
| 415 assert v['trees'], "lexer state %s is empty" % tree_name | 397 assert v['trees'], "lexer state %s is empty" % tree_name |
| 416 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) | 398 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) |
| 417 # build the automata | 399 # build the automata |
| 418 for name, tree in rule_map.items(): | 400 for name, tree in rule_map.items(): |
| 419 self.__automata[name] = RuleProcessor.Automata( | 401 self.__automata[name] = RuleProcessor.Automata( |
| 420 parser_state.encoding, parser_state.character_classes, rule_map, name) | 402 parser_state.encoding, parser_state.character_classes, rule_map, name) |
| 421 # process default_action | 403 # process default_action |
| 422 default_action = parser_state.rules['default']['default_action'] | 404 default_action = parser_state.rules['default']['default_action'] |
| 423 self.__default_action = Action(Term.empty_term(), default_action) | 405 self.__default_action = Action(Term.empty_term(), default_action) |
| OLD | NEW |