| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex | 28 import ply.lex as lex |
| 29 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 30 from types import ListType, TupleType | 30 from types import ListType, TupleType |
| 31 from regex_lexer import RegexLexer | 31 from regex_lexer import RegexLexer |
| 32 from action import Term | 32 from action import Term |
| 33 | 33 |
| 34 class ParserBuilder: |
| 35 |
| 36 class Logger(object): |
| 37 def debug(self,msg,*args,**kwargs): |
| 38 pass |
| 39 |
| 40 def info(self,msg,*args,**kwargs): |
| 41 pass |
| 42 |
| 43 def warning(self,msg,*args,**kwargs): |
| 44 pass |
| 45 # assert False, "warning: "+ (msg % args) + "\n" |
| 46 |
| 47 def error(self,msg,*args,**kwargs): |
| 48 assert False, "error: "+ (msg % args) + "\n" |
| 49 |
| 50 __static_instances = {} |
| 51 @staticmethod |
| 52 def parse( |
| 53 string, name, new_lexer, new_parser, preparse = None, postparse = None): |
| 54 if not name in ParserBuilder.__static_instances: |
| 55 logger = ParserBuilder.Logger() |
| 56 lexer_instance = new_lexer() |
| 57 lexer_instance.lex = lex.lex(module=lexer_instance) |
| 58 instance = new_parser() |
| 59 instance.yacc = yacc.yacc( |
| 60 module=instance, debug=True, write_tables=0, |
| 61 debuglog=logger, errorlog=logger) |
| 62 ParserBuilder.__static_instances[name] = (lexer_instance, instance) |
| 63 (lexer_instance, instance) = ParserBuilder.__static_instances[name] |
| 64 if preparse: |
| 65 preparse(instance) |
| 66 try: |
| 67 return_value = instance.yacc.parse(string, lexer=lexer_instance.lex) |
| 68 except Exception: |
| 69 del ParserBuilder.__static_instances[name] |
| 70 raise |
| 71 if postparse: |
| 72 postparse(instance) |
| 73 return return_value |
| 74 |
| 34 def build_escape_map(chars): | 75 def build_escape_map(chars): |
| 35 def add_escape(d, char): | 76 def add_escape(d, char): |
| 36 d['\\' + char] = char | 77 d['\\' + char] = char |
| 37 return d | 78 return d |
| 38 return reduce(add_escape, chars, | 79 return reduce(add_escape, chars, |
| 39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'}) | 80 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'}) |
| 40 | 81 |
| 41 class RegexLexer: | 82 class RegexLexer: |
| 42 | 83 |
| 43 tokens = ( | 84 tokens = ( |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 t_ONE_OR_MORE = r'\+' | 129 t_ONE_OR_MORE = r'\+' |
| 89 t_ZERO_OR_MORE = r'\*' | 130 t_ZERO_OR_MORE = r'\*' |
| 90 t_ZERO_OR_ONE = r'\?' | 131 t_ZERO_OR_ONE = r'\?' |
| 91 | 132 |
| 92 t_ANY = r'\.' | 133 t_ANY = r'\.' |
| 93 | 134 |
| 94 t_LITERAL = r'.' | 135 t_LITERAL = r'.' |
| 95 | 136 |
| 96 def t_CLASS_BEGIN(self, t): | 137 def t_CLASS_BEGIN(self, t): |
| 97 r'\[' | 138 r'\[' |
| 98 self.lexer.push_state('class') | 139 self.lex.push_state('class') |
| 99 return t | 140 return t |
| 100 | 141 |
| 101 def t_class_CLASS_END(self, t): | 142 def t_class_CLASS_END(self, t): |
| 102 r'\]' | 143 r'\]' |
| 103 self.lexer.pop_state() | 144 self.lex.pop_state() |
| 104 return t | 145 return t |
| 105 | 146 |
| 106 t_class_RANGE = '-' | 147 t_class_RANGE = '-' |
| 107 t_class_NOT = '\^' | 148 t_class_NOT = '\^' |
| 108 t_class_CHARACTER_CLASS = r':\w+:' | 149 t_class_CHARACTER_CLASS = r':\w+:' |
| 109 | 150 |
| 110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): | 151 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): |
| 111 r'\\\d+' | 152 r'\\\d+' |
| 112 return t | 153 return t |
| 113 | 154 |
| 114 __escaped_class_literals = build_escape_map("^[]-:\\") | 155 __escaped_class_literals = build_escape_map("^[]-:\\") |
| 115 | 156 |
| 116 def t_class_ESCAPED_CLASS_LITERAL(self, t): | 157 def t_class_ESCAPED_CLASS_LITERAL(self, t): |
| 117 r'\\.' | 158 r'\\.' |
| 118 t.type = 'CLASS_LITERAL' | 159 t.type = 'CLASS_LITERAL' |
| 119 t.value = RegexLexer.__escaped_class_literals[t.value] | 160 t.value = RegexLexer.__escaped_class_literals[t.value] |
| 120 return t | 161 return t |
| 121 | 162 |
| 122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]' | 163 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]' |
| 123 | 164 |
| 124 def t_REPEAT_BEGIN(self, t): | 165 def t_REPEAT_BEGIN(self, t): |
| 125 r'\{' | 166 r'\{' |
| 126 self.lexer.push_state('repeat') | 167 self.lex.push_state('repeat') |
| 127 return t | 168 return t |
| 128 | 169 |
| 129 def t_repeat_REPEAT_END(self, t): | 170 def t_repeat_REPEAT_END(self, t): |
| 130 r'\}' | 171 r'\}' |
| 131 self.lexer.pop_state() | 172 self.lex.pop_state() |
| 132 return t | 173 return t |
| 133 | 174 |
| 134 t_repeat_NUMBER = r'[0-9]+' | 175 t_repeat_NUMBER = r'[0-9]+' |
| 135 t_repeat_COMMA = r',' | 176 t_repeat_COMMA = r',' |
| 136 | 177 |
| 137 t_ANY_ignore = '\n' | 178 t_ANY_ignore = '\n' |
| 138 | 179 |
| 139 def t_ANY_error(self, t): | 180 def t_ANY_error(self, t): |
| 140 raise Exception("Illegal character '%s'" % t.value[0]) | 181 raise Exception("Illegal character '%s'" % t.value[0]) |
| 141 | 182 |
| 142 def build(self, **kwargs): | |
| 143 self.lexer = lex.lex(module=self, **kwargs) | |
| 144 | |
| 145 class RegexParser: | 183 class RegexParser: |
| 146 | 184 |
| 147 tokens = RegexLexer.tokens | 185 tokens = RegexLexer.tokens |
| 148 | 186 |
| 149 token_map = { | 187 token_map = { |
| 150 '+': 'ONE_OR_MORE', | 188 '+': 'ONE_OR_MORE', |
| 151 '?': 'ZERO_OR_ONE', | 189 '?': 'ZERO_OR_ONE', |
| 152 '*': 'ZERO_OR_MORE', | 190 '*': 'ZERO_OR_MORE', |
| 153 '|': 'OR', | 191 '|': 'OR', |
| 154 '.': 'ANY', | 192 '.': 'ANY', |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 261 'empty :' | 299 'empty :' |
| 262 | 300 |
| 263 def p_error(self, p): | 301 def p_error(self, p): |
| 264 raise Exception("Syntax error in input '%s'" % str(p)) | 302 raise Exception("Syntax error in input '%s'" % str(p)) |
| 265 | 303 |
| 266 @staticmethod | 304 @staticmethod |
| 267 def __cat(left, right): | 305 def __cat(left, right): |
| 268 assert left | 306 assert left |
| 269 return left if not right else Term('CAT', left, right) | 307 return left if not right else Term('CAT', left, right) |
| 270 | 308 |
| 271 def build(self, **kwargs): | |
| 272 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) | |
| 273 self.lexer = RegexLexer() | |
| 274 self.lexer.build(**kwargs) | |
| 275 | |
| 276 __static_instance = None | |
| 277 @staticmethod | 309 @staticmethod |
| 278 def parse(data): | 310 def parse(string): |
| 279 parser = RegexParser.__static_instance | 311 new_lexer = lambda: RegexLexer() |
| 280 if not parser: | 312 new_parser = lambda: RegexParser() |
| 281 parser = RegexParser() | 313 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser) |
| 282 parser.build() | |
| 283 RegexParser.__static_instance = parser | |
| 284 try: | |
| 285 return parser.parser.parse(data, lexer=parser.lexer.lexer) | |
| 286 except Exception: | |
| 287 RegexParser.__static_instance = None | |
| 288 raise | |
| OLD | NEW |