| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex | 28 import ply.lex as lex |
| 29 | 29 |
| 30 def build_escape_map(chars): |
| 31 def add_escape(d, char): |
| 32 d['\\' + char] = char |
| 33 return d |
| 34 return reduce(add_escape, chars, |
| 35 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'}) |
| 36 |
| 30 class RegexLexer: | 37 class RegexLexer: |
| 31 | 38 |
| 32 tokens = ( | 39 tokens = ( |
| 33 | 40 |
| 34 'GROUP_BEGIN', | 41 'GROUP_BEGIN', |
| 35 'GROUP_END', | 42 'GROUP_END', |
| 36 | 43 |
| 37 'CLASS_BEGIN', | 44 'CLASS_BEGIN', |
| 38 'CLASS_END', | 45 'CLASS_END', |
| 39 | 46 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 55 'CLASS_LITERAL', | 62 'CLASS_LITERAL', |
| 56 'CLASS_LITERAL_AS_OCTAL', | 63 'CLASS_LITERAL_AS_OCTAL', |
| 57 'CHARACTER_CLASS', | 64 'CHARACTER_CLASS', |
| 58 ) | 65 ) |
| 59 | 66 |
| 60 states = ( | 67 states = ( |
| 61 ('class','exclusive'), | 68 ('class','exclusive'), |
| 62 ('repeat','exclusive'), | 69 ('repeat','exclusive'), |
| 63 ) | 70 ) |
| 64 | 71 |
| 72 __escaped_literals = build_escape_map("(){}[]?+.*|\\") |
| 73 |
| 65 def t_ESCAPED_LITERAL(self, t): | 74 def t_ESCAPED_LITERAL(self, t): |
| 66 r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}' | 75 r'\\.' |
| 67 t.type = 'LITERAL' | 76 t.type = 'LITERAL' |
| 68 t.value = t.value[1:] | 77 t.value = RegexLexer.__escaped_literals[t.value] |
| 69 return t | 78 return t |
| 70 | 79 |
| 71 t_GROUP_BEGIN = r'\(' | 80 t_GROUP_BEGIN = r'\(' |
| 72 t_GROUP_END = r'\)' | 81 t_GROUP_END = r'\)' |
| 73 | 82 |
| 74 t_OR = r'\|' | 83 t_OR = r'\|' |
| 75 t_ONE_OR_MORE = r'\+' | 84 t_ONE_OR_MORE = r'\+' |
| 76 t_ZERO_OR_MORE = r'\*' | 85 t_ZERO_OR_MORE = r'\*' |
| 77 t_ZERO_OR_ONE = r'\?' | 86 t_ZERO_OR_ONE = r'\?' |
| 78 | 87 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 91 return t | 100 return t |
| 92 | 101 |
| 93 t_class_RANGE = '-' | 102 t_class_RANGE = '-' |
| 94 t_class_NOT = '\^' | 103 t_class_NOT = '\^' |
| 95 t_class_CHARACTER_CLASS = r':\w+:' | 104 t_class_CHARACTER_CLASS = r':\w+:' |
| 96 | 105 |
| 97 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): | 106 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): |
| 98 r'\\\d+' | 107 r'\\\d+' |
| 99 return t | 108 return t |
| 100 | 109 |
| 101 escaped_class_literals = { | 110 __escaped_class_literals = build_escape_map("^[]-:") |
| 102 '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f', | |
| 103 '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':', | |
| 104 } | |
| 105 | 111 |
| 106 def t_class_ESCAPED_CLASS_LITERAL(self, t): | 112 def t_class_ESCAPED_CLASS_LITERAL(self, t): |
| 107 r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w' | 113 r'\\.' |
| 108 t.type = 'CLASS_LITERAL' | 114 t.type = 'CLASS_LITERAL' |
| 109 t.value = RegexLexer.escaped_class_literals[t.value] | 115 t.value = RegexLexer.__escaped_class_literals[t.value] |
| 110 return t | 116 return t |
| 111 | 117 |
| 112 t_class_CLASS_LITERAL = r'[\w $_+]' | 118 t_class_CLASS_LITERAL = r'[\w $_+]' |
| 113 | 119 |
| 114 def t_REPEAT_BEGIN(self, t): | 120 def t_REPEAT_BEGIN(self, t): |
| 115 r'\{' | 121 r'\{' |
| 116 self.lexer.push_state('repeat') | 122 self.lexer.push_state('repeat') |
| 117 return t | 123 return t |
| 118 | 124 |
| 119 def t_repeat_REPEAT_END(self, t): | 125 def t_repeat_REPEAT_END(self, t): |
| 120 r'\}' | 126 r'\}' |
| 121 self.lexer.pop_state() | 127 self.lexer.pop_state() |
| 122 return t | 128 return t |
| 123 | 129 |
| 124 t_repeat_NUMBER = r'[0-9]+' | 130 t_repeat_NUMBER = r'[0-9]+' |
| 125 t_repeat_COMMA = r',' | 131 t_repeat_COMMA = r',' |
| 126 | 132 |
| 127 t_ANY_ignore = '\n' | 133 t_ANY_ignore = '\n' |
| 128 | 134 |
| 129 def t_ANY_error(self, t): | 135 def t_ANY_error(self, t): |
| 130 raise Exception("Illegal character '%s'" % t.value[0]) | 136 raise Exception("Illegal character '%s'" % t.value[0]) |
| 131 | 137 |
| 132 def build(self, **kwargs): | 138 def build(self, **kwargs): |
| 133 self.lexer = lex.lex(module=self, **kwargs) | 139 self.lexer = lex.lex(module=self, **kwargs) |
| OLD | NEW |