| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex |
| 28 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 29 from types import ListType, TupleType | 30 from types import ListType, TupleType |
| 30 from regex_lexer import RegexLexer | 31 from regex_lexer import RegexLexer |
| 31 from action import Term | 32 from action import Term |
| 32 | 33 |
| 34 def build_escape_map(chars): |
| 35 def add_escape(d, char): |
| 36 d['\\' + char] = char |
| 37 return d |
| 38 return reduce(add_escape, chars, |
| 39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'}) |
| 40 |
| 41 class RegexLexer: |
| 42 |
| 43 tokens = ( |
| 44 |
| 45 'GROUP_BEGIN', |
| 46 'GROUP_END', |
| 47 |
| 48 'CLASS_BEGIN', |
| 49 'CLASS_END', |
| 50 |
| 51 'OR', |
| 52 'ONE_OR_MORE', |
| 53 'ZERO_OR_MORE', |
| 54 'ZERO_OR_ONE', |
| 55 'ANY', |
| 56 |
| 57 'REPEAT_BEGIN', |
| 58 'REPEAT_END', |
| 59 |
| 60 'NUMBER', |
| 61 'COMMA', |
| 62 'LITERAL', |
| 63 |
| 64 'RANGE', |
| 65 'NOT', |
| 66 'CLASS_LITERAL', |
| 67 'CLASS_LITERAL_AS_OCTAL', |
| 68 'CHARACTER_CLASS', |
| 69 ) |
| 70 |
| 71 states = ( |
| 72 ('class','exclusive'), |
| 73 ('repeat','exclusive'), |
| 74 ) |
| 75 |
| 76 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\") |
| 77 |
| 78 def t_ESCAPED_LITERAL(self, t): |
| 79 r'\\.' |
| 80 t.type = 'LITERAL' |
| 81 t.value = RegexLexer.__escaped_literals[t.value] |
| 82 return t |
| 83 |
| 84 t_GROUP_BEGIN = r'\(' |
| 85 t_GROUP_END = r'\)' |
| 86 |
| 87 t_OR = r'\|' |
| 88 t_ONE_OR_MORE = r'\+' |
| 89 t_ZERO_OR_MORE = r'\*' |
| 90 t_ZERO_OR_ONE = r'\?' |
| 91 |
| 92 t_ANY = r'\.' |
| 93 |
| 94 t_LITERAL = r'.' |
| 95 |
| 96 def t_CLASS_BEGIN(self, t): |
| 97 r'\[' |
| 98 self.lexer.push_state('class') |
| 99 return t |
| 100 |
| 101 def t_class_CLASS_END(self, t): |
| 102 r'\]' |
| 103 self.lexer.pop_state() |
| 104 return t |
| 105 |
| 106 t_class_RANGE = '-' |
| 107 t_class_NOT = '\^' |
| 108 t_class_CHARACTER_CLASS = r':\w+:' |
| 109 |
| 110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): |
| 111 r'\\\d+' |
| 112 return t |
| 113 |
| 114 __escaped_class_literals = build_escape_map("^[]-:\\") |
| 115 |
| 116 def t_class_ESCAPED_CLASS_LITERAL(self, t): |
| 117 r'\\.' |
| 118 t.type = 'CLASS_LITERAL' |
| 119 t.value = RegexLexer.__escaped_class_literals[t.value] |
| 120 return t |
| 121 |
| 122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]' |
| 123 |
| 124 def t_REPEAT_BEGIN(self, t): |
| 125 r'\{' |
| 126 self.lexer.push_state('repeat') |
| 127 return t |
| 128 |
| 129 def t_repeat_REPEAT_END(self, t): |
| 130 r'\}' |
| 131 self.lexer.pop_state() |
| 132 return t |
| 133 |
| 134 t_repeat_NUMBER = r'[0-9]+' |
| 135 t_repeat_COMMA = r',' |
| 136 |
| 137 t_ANY_ignore = '\n' |
| 138 |
| 139 def t_ANY_error(self, t): |
| 140 raise Exception("Illegal character '%s'" % t.value[0]) |
| 141 |
| 142 def build(self, **kwargs): |
| 143 self.lexer = lex.lex(module=self, **kwargs) |
| 144 |
| 33 class RegexParser: | 145 class RegexParser: |
| 34 | 146 |
| 35 tokens = RegexLexer.tokens | 147 tokens = RegexLexer.tokens |
| 36 | 148 |
| 37 token_map = { | 149 token_map = { |
| 38 '+': 'ONE_OR_MORE', | 150 '+': 'ONE_OR_MORE', |
| 39 '?': 'ZERO_OR_ONE', | 151 '?': 'ZERO_OR_ONE', |
| 40 '*': 'ZERO_OR_MORE', | 152 '*': 'ZERO_OR_MORE', |
| 41 '|': 'OR', | 153 '|': 'OR', |
| 42 '.': 'ANY', | 154 '.': 'ANY', |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 154 parser = RegexParser.__static_instance | 266 parser = RegexParser.__static_instance |
| 155 if not parser: | 267 if not parser: |
| 156 parser = RegexParser() | 268 parser = RegexParser() |
| 157 parser.build() | 269 parser.build() |
| 158 RegexParser.__static_instance = parser | 270 RegexParser.__static_instance = parser |
| 159 try: | 271 try: |
| 160 return parser.parser.parse(data, lexer=parser.lexer.lexer) | 272 return parser.parser.parse(data, lexer=parser.lexer.lexer) |
| 161 except Exception: | 273 except Exception: |
| 162 RegexParser.__static_instance = None | 274 RegexParser.__static_instance = None |
| 163 raise | 275 raise |
| OLD | NEW |