| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.yacc as yacc | 28 import ply.yacc as yacc |
| 29 from types import ListType, TupleType |
| 29 from regex_lexer import RegexLexer | 30 from regex_lexer import RegexLexer |
| 30 from types import ListType, TupleType | 31 from action import Term |
| 31 | 32 |
| 32 class RegexParser: | 33 class RegexParser: |
| 33 | 34 |
| 34 tokens = RegexLexer.tokens | 35 tokens = RegexLexer.tokens |
| 35 | 36 |
| 36 token_map = { | 37 token_map = { |
| 37 '+': 'ONE_OR_MORE', | 38 '+': 'ONE_OR_MORE', |
| 38 '?': 'ZERO_OR_ONE', | 39 '?': 'ZERO_OR_ONE', |
| 39 '*': 'ZERO_OR_MORE', | 40 '*': 'ZERO_OR_MORE', |
| 40 '|': 'OR', | 41 '|': 'OR', |
| 41 '.': 'ANY', | 42 '.': 'ANY', |
| 42 } | 43 } |
| 43 | 44 |
| 44 def p_start(self, p): | 45 def p_start(self, p): |
| 45 '''start : fragments OR fragments | 46 '''start : fragments OR fragments |
| 46 | fragments''' | 47 | fragments''' |
| 47 if len(p) == 2: | 48 if len(p) == 2: |
| 48 p[0] = p[1] | 49 p[0] = p[1] |
| 49 else: | 50 else: |
| 50 p[0] = (self.token_map[p[2]], p[1], p[3]) | 51 p[0] = Term(self.token_map[p[2]], p[1], p[3]) |
| 51 | 52 |
| 52 def p_fragments(self, p): | 53 def p_fragments(self, p): |
| 53 '''fragments : fragment | 54 '''fragments : fragment |
| 54 | fragment fragments''' | 55 | fragment fragments''' |
| 55 if len(p) == 2: | 56 if len(p) == 2: |
| 56 p[0] = p[1] | 57 p[0] = p[1] |
| 57 else: | 58 else: |
| 58 p[0] = self.__cat(p[1], p[2]) | 59 p[0] = self.__cat(p[1], p[2]) |
| 59 | 60 |
| 60 def p_fragment(self, p): | 61 def p_fragment(self, p): |
| 61 '''fragment : literal maybe_modifier | 62 '''fragment : literal maybe_modifier |
| 62 | class maybe_modifier | 63 | class maybe_modifier |
| 63 | group maybe_modifier | 64 | group maybe_modifier |
| 64 | any maybe_modifier | 65 | any maybe_modifier |
| 65 ''' | 66 ''' |
| 66 if p[2] != None: | 67 if p[2] != None: |
| 67 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT': | 68 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT': |
| 68 p[0] = (p[2][0], p[2][1], p[2][2], p[1]) | 69 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1]) |
| 69 else: | 70 else: |
| 70 p[0] = (p[2], p[1]) | 71 p[0] = Term(p[2], p[1]) |
| 71 else: | 72 else: |
| 72 p[0] = p[1] | 73 p[0] = p[1] |
| 73 | 74 |
| 74 def p_maybe_modifier(self, p): | 75 def p_maybe_modifier(self, p): |
| 75 '''maybe_modifier : ONE_OR_MORE | 76 '''maybe_modifier : ONE_OR_MORE |
| 76 | ZERO_OR_ONE | 77 | ZERO_OR_ONE |
| 77 | ZERO_OR_MORE | 78 | ZERO_OR_MORE |
| 78 | repetition | 79 | repetition |
| 79 | empty''' | 80 | empty''' |
| 80 p[0] = p[1] | 81 p[0] = p[1] |
| 81 if p[1] in self.token_map: | 82 if p[1] in self.token_map: |
| 82 p[0] = self.token_map[p[1]] | 83 p[0] = self.token_map[p[1]] |
| 83 | 84 |
| 84 def p_repetition(self, p): | 85 def p_repetition(self, p): |
| 85 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END | 86 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END |
| 86 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END''' | 87 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END''' |
| 87 if len(p) == 4: | 88 if len(p) == 4: |
| 88 p[0] = ("REPEAT", p[2], p[2]) | 89 p[0] = ("REPEAT", p[2], p[2]) |
| 89 else: | 90 else: |
| 90 p[0] = ("REPEAT", p[2], p[4]) | 91 p[0] = ("REPEAT", p[2], p[4]) |
| 91 | 92 |
| 92 def p_literal(self, p): | 93 def p_literal(self, p): |
| 93 '''literal : LITERAL''' | 94 '''literal : LITERAL''' |
| 94 p[0] = ('LITERAL', p[1]) | 95 p[0] = Term('LITERAL', p[1]) |
| 95 | 96 |
| 96 def p_any(self, p): | 97 def p_any(self, p): |
| 97 '''any : ANY''' | 98 '''any : ANY''' |
| 98 p[0] = (self.token_map[p[1]],) | 99 p[0] = Term(self.token_map[p[1]]) |
| 99 | 100 |
| 100 def p_class(self, p): | 101 def p_class(self, p): |
| 101 '''class : CLASS_BEGIN class_content CLASS_END | 102 '''class : CLASS_BEGIN class_content CLASS_END |
| 102 | CLASS_BEGIN NOT class_content CLASS_END''' | 103 | CLASS_BEGIN NOT class_content CLASS_END''' |
| 103 if len(p) == 4: | 104 if len(p) == 4: |
| 104 p[0] = ("CLASS", p[2]) | 105 p[0] = Term("CLASS", p[2]) |
| 105 else: | 106 else: |
| 106 p[0] = ("NOT_CLASS", p[3]) | 107 p[0] = Term("NOT_CLASS", p[3]) |
| 107 | 108 |
| 108 def p_group(self, p): | 109 def p_group(self, p): |
| 109 '''group : GROUP_BEGIN start GROUP_END''' | 110 '''group : GROUP_BEGIN start GROUP_END''' |
| 110 p[0] = p[2] | 111 p[0] = p[2] |
| 111 | 112 |
| 112 def p_class_content(self, p): | 113 def p_class_content(self, p): |
| 113 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content | 114 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content |
| 114 | CLASS_LITERAL maybe_class_content | 115 | CLASS_LITERAL maybe_class_content |
| 115 | CHARACTER_CLASS maybe_class_content | 116 | CHARACTER_CLASS maybe_class_content |
| 116 | CLASS_LITERAL_AS_OCTAL maybe_class_content | 117 | CLASS_LITERAL_AS_OCTAL maybe_class_content |
| 117 ''' | 118 ''' |
| 118 if len(p) == 5: | 119 if len(p) == 5: |
| 119 left = ("RANGE", p[1], p[3]) | 120 left = Term("RANGE", p[1], p[3]) |
| 120 else: | 121 else: |
| 121 if len(p[1]) == 1: | 122 if len(p[1]) == 1: |
| 122 left = ('LITERAL', p[1]) | 123 left = Term('LITERAL', p[1]) |
| 123 elif p[1][0] == '\\': | 124 elif p[1][0] == '\\': |
| 124 left = ('LITERAL', chr(int(p[1][1:], 8))) | 125 left = Term('LITERAL', chr(int(p[1][1:], 8))) |
| 125 else: | 126 else: |
| 126 left = ('CHARACTER_CLASS', p[1][1:-1]) | 127 left = Term('CHARACTER_CLASS', p[1][1:-1]) |
| 127 p[0] = self.__cat(left, p[len(p)-1]) | 128 p[0] = self.__cat(left, p[len(p)-1]) |
| 128 | 129 |
| 129 def p_maybe_class_content(self, p): | 130 def p_maybe_class_content(self, p): |
| 130 '''maybe_class_content : class_content | 131 '''maybe_class_content : class_content |
| 131 | empty''' | 132 | empty''' |
| 132 p[0] = p[1] | 133 p[0] = p[1] |
| 133 | 134 |
| 134 def p_empty(self, p): | 135 def p_empty(self, p): |
| 135 'empty :' | 136 'empty :' |
| 136 | 137 |
| 137 def p_error(self, p): | 138 def p_error(self, p): |
| 138 raise Exception("Syntax error in input '%s'" % str(p)) | 139 raise Exception("Syntax error in input '%s'" % str(p)) |
| 139 | 140 |
| 140 @staticmethod | 141 @staticmethod |
| 141 def __cat(left, right): | 142 def __cat(left, right): |
| 142 if right == None: | 143 assert left |
| 143 return left | 144 return left if not right else Term('CAT', left, right) |
| 144 return ('CAT', left, right) | |
| 145 | 145 |
| 146 def build(self, **kwargs): | 146 def build(self, **kwargs): |
| 147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) | 147 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) |
| 148 self.lexer = RegexLexer() | 148 self.lexer = RegexLexer() |
| 149 self.lexer.build(**kwargs) | 149 self.lexer.build(**kwargs) |
| 150 | 150 |
| 151 __static_instance = None | 151 __static_instance = None |
| 152 @staticmethod | 152 @staticmethod |
| 153 def parse(data): | 153 def parse(data): |
| 154 parser = RegexParser.__static_instance | 154 parser = RegexParser.__static_instance |
| 155 if not parser: | 155 if not parser: |
| 156 parser = RegexParser() | 156 parser = RegexParser() |
| 157 parser.build() | 157 parser.build() |
| 158 RegexParser.__static_instance = parser | 158 RegexParser.__static_instance = parser |
| 159 try: | 159 try: |
| 160 return parser.parser.parse(data, lexer=parser.lexer.lexer) | 160 return parser.parser.parse(data, lexer=parser.lexer.lexer) |
| 161 except Exception: | 161 except Exception: |
| 162 RegexParser.__static_instance = None | 162 RegexParser.__static_instance = None |
| 163 raise | 163 raise |
| OLD | NEW |