| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex | 28 import ply.yacc as yacc |
| 29 from rule_lexer import RuleLexer |
| 29 | 30 |
| 30 class RegexLexer: | 31 class RuleParser: |
| 31 | 32 |
| 32 tokens = ( | 33 tokens = RuleLexer.tokens |
| 33 | 34 |
| 34 'GROUP_BEGIN', | 35 aliases = dict() |
| 35 'GROUP_END', | 36 transitions = dict() |
| 36 | 37 |
| 37 'CLASS_BEGIN', | 38 def p_statement_alias(self, p): |
| 38 'CLASS_END', | 39 'statement : ALIAS' |
| 40 name = self.lexer.lexer.lexmatch.group('name') |
| 41 regex = self.lexer.lexer.lexmatch.group('regex') |
| 42 self.aliases[name] = regex |
| 39 | 43 |
| 40 'OR', | 44 def p_statement_condition_transition(self, p): |
| 41 'ONE_OR_MORE', | 45 'statement : CONDITION_TRANSITION' |
| 42 'ZERO_OR_MORE', | 46 old_condition = self.lexer.lexer.lexmatch.group('old') |
| 43 'ZERO_OR_ONE', | 47 regex = self.lexer.lexer.lexmatch.group('regex') |
| 44 'ANY', | 48 new_condition = self.lexer.lexer.lexmatch.group('new') |
| 49 if old_condition not in self.transitions: |
| 50 self.transitions[old_condition] = [] |
| 51 self.transitions[old_condition].append((regex, new_condition)) |
| 45 | 52 |
| 46 'LITERAL', | 53 def p_statement_condition(self, p): |
| 54 'statement : CONDITION' |
| 55 old_condition = self.lexer.lexer.lexmatch.group('old') |
| 56 regex = self.lexer.lexer.lexmatch.group('regex') |
| 57 body = self.lexer.lexer.lexmatch.group('body') |
| 58 if old_condition not in self.transitions: |
| 59 self.transitions[old_condition] = [] |
| 60 self.transitions[old_condition].append((regex, body)) |
| 47 | 61 |
| 48 'RANGE', | 62 def p_empty(self, p): |
| 49 'NOT', | 63 'empty :' |
| 50 'CLASS_LITERAL', | |
| 51 ) | |
| 52 | 64 |
| 53 states = ( | 65 def p_error(self, p): |
| 54 ('class','exclusive'), | 66 raise Exception("Syntax error in input '%s'" % p) |
| 55 ) | |
| 56 | |
| 57 def t_ESCAPED_LITERAL(self, t): | |
| 58 r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\' | |
| 59 t.type = 'LITERAL' | |
| 60 t.value = t.value[1:] | |
| 61 return t | |
| 62 | |
| 63 t_GROUP_BEGIN = r'\(' | |
| 64 t_GROUP_END = r'\)' | |
| 65 | |
| 66 t_OR = r'\|' | |
| 67 t_ONE_OR_MORE = r'\+' | |
| 68 t_ZERO_OR_MORE = r'\*' | |
| 69 t_ZERO_OR_ONE = r'\?' | |
| 70 | |
| 71 t_ANY = r'\.' | |
| 72 | |
| 73 t_LITERAL = r'.' | |
| 74 | |
| 75 def t_CLASS_BEGIN(self, t): | |
| 76 r'\[' | |
| 77 self.lexer.push_state('class') | |
| 78 return t | |
| 79 | |
| 80 def t_class_CLASS_END(self, t): | |
| 81 r'\]' | |
| 82 self.lexer.pop_state() | |
| 83 return t | |
| 84 | |
| 85 t_class_RANGE = '-' | |
| 86 t_class_NOT = '\^' | |
| 87 | |
| 88 def t_class_ESCAPED_CLASS_LITERAL(self, t): | |
| 89 r'\\\^|\\-' | |
| 90 t.type = 'CLASS_LITERAL' | |
| 91 t.value = t.value[1:] | |
| 92 return t | |
| 93 | |
| 94 t_class_CLASS_LITERAL = r'[a-zA-Z]' # fix this | |
| 95 | |
| 96 t_ANY_ignore = '\n' | |
| 97 | |
| 98 def t_ANY_error(self, t): | |
| 99 raise Exception("Illegal character '%s'" % t.value[0]) | |
| 100 | 67 |
| 101 def build(self, **kwargs): | 68 def build(self, **kwargs): |
| 102 self.lexer = lex.lex(module=self, **kwargs) | 69 self.parser = yacc.yacc(module=self, **kwargs) |
| 70 self.lexer = RuleLexer() |
| 71 self.lexer.build(**kwargs) |
| 72 |
| 73 def parse(self, data): |
| 74 return self.parser.parse(data, lexer=self.lexer.lexer) |
| OLD | NEW |