| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.lex as lex | 28 import ply.lex as lex |
| 29 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 30 from types import ListType, TupleType | |
| 31 from regex_lexer import RegexLexer | |
| 32 from action import Term | 30 from action import Term |
| 31 from nfa_builder import NfaBuilder |
| 33 | 32 |
| 34 class ParserBuilder: | 33 class ParserBuilder: |
| 35 | 34 |
| 36 class Logger(object): | 35 class Logger(object): |
| 37 def debug(self,msg,*args,**kwargs): | 36 def debug(self,msg,*args,**kwargs): |
| 38 pass | 37 pass |
| 39 | 38 |
| 40 def info(self,msg,*args,**kwargs): | 39 def info(self,msg,*args,**kwargs): |
| 41 pass | 40 pass |
| 42 | 41 |
| 43 def warning(self,msg,*args,**kwargs): | 42 def warning(self,msg,*args,**kwargs): |
| 44 pass | 43 raise Exception("warning: "+ (msg % args) + "\n") |
| 45 # assert False, "warning: "+ (msg % args) + "\n" | |
| 46 | 44 |
| 47 def error(self,msg,*args,**kwargs): | 45 def error(self,msg,*args,**kwargs): |
| 48 assert False, "error: "+ (msg % args) + "\n" | 46 raise Exception("error: "+ (msg % args) + "\n") |
| 49 | 47 |
| 50 __static_instances = {} | 48 __static_instances = {} |
| 51 @staticmethod | 49 @staticmethod |
| 52 def parse( | 50 def parse( |
| 53 string, name, new_lexer, new_parser, preparse = None, postparse = None): | 51 string, name, new_lexer, new_parser, preparse = None, postparse = None): |
| 54 if not name in ParserBuilder.__static_instances: | 52 if not name in ParserBuilder.__static_instances: |
| 55 logger = ParserBuilder.Logger() | 53 logger = ParserBuilder.Logger() |
| 56 lexer_instance = new_lexer() | 54 lexer_instance = new_lexer() |
| 57 lexer_instance.lex = lex.lex(module=lexer_instance) | 55 lexer_instance.lex = lex.lex(module=lexer_instance) |
| 58 instance = new_parser() | 56 instance = new_parser() |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 'REPEAT_BEGIN', | 96 'REPEAT_BEGIN', |
| 99 'REPEAT_END', | 97 'REPEAT_END', |
| 100 | 98 |
| 101 'NUMBER', | 99 'NUMBER', |
| 102 'COMMA', | 100 'COMMA', |
| 103 'LITERAL', | 101 'LITERAL', |
| 104 | 102 |
| 105 'RANGE', | 103 'RANGE', |
| 106 'NOT', | 104 'NOT', |
| 107 'CLASS_LITERAL', | 105 'CLASS_LITERAL', |
| 108 'CLASS_LITERAL_AS_OCTAL', | |
| 109 'CHARACTER_CLASS', | 106 'CHARACTER_CLASS', |
| 110 ) | 107 ) |
| 111 | 108 |
| 112 states = ( | 109 states = ( |
| 113 ('class','exclusive'), | 110 ('class','exclusive'), |
| 114 ('repeat','exclusive'), | 111 ('repeat','exclusive'), |
| 115 ) | 112 ) |
| 116 | 113 |
| 117 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\") | 114 __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\") |
| 118 | 115 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 143 r'\]' | 140 r'\]' |
| 144 self.lex.pop_state() | 141 self.lex.pop_state() |
| 145 return t | 142 return t |
| 146 | 143 |
| 147 t_class_RANGE = '-' | 144 t_class_RANGE = '-' |
| 148 t_class_NOT = '\^' | 145 t_class_NOT = '\^' |
| 149 t_class_CHARACTER_CLASS = r':\w+:' | 146 t_class_CHARACTER_CLASS = r':\w+:' |
| 150 | 147 |
| 151 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): | 148 def t_class_CLASS_LITERAL_AS_OCTAL(self, t): |
| 152 r'\\\d+' | 149 r'\\\d+' |
| 150 t.type = 'CLASS_LITERAL' |
| 151 t.value = chr(int(t.value[1:], 8)) |
| 153 return t | 152 return t |
| 154 | 153 |
| 155 __escaped_class_literals = build_escape_map("^[]-:\\") | 154 __escaped_class_literals = build_escape_map("^[]-:\\") |
| 156 | 155 |
| 157 def t_class_ESCAPED_CLASS_LITERAL(self, t): | 156 def t_class_ESCAPED_CLASS_LITERAL(self, t): |
| 158 r'\\.' | 157 r'\\.' |
| 159 t.type = 'CLASS_LITERAL' | 158 t.type = 'CLASS_LITERAL' |
| 160 t.value = RegexLexer.__escaped_class_literals[t.value] | 159 t.value = RegexLexer.__escaped_class_literals[t.value] |
| 161 return t | 160 return t |
| 162 | 161 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 181 raise Exception("Illegal character '%s'" % t.value[0]) | 180 raise Exception("Illegal character '%s'" % t.value[0]) |
| 182 | 181 |
| 183 class RegexParser: | 182 class RegexParser: |
| 184 | 183 |
| 185 tokens = RegexLexer.tokens | 184 tokens = RegexLexer.tokens |
| 186 | 185 |
| 187 token_map = { | 186 token_map = { |
| 188 '+': 'ONE_OR_MORE', | 187 '+': 'ONE_OR_MORE', |
| 189 '?': 'ZERO_OR_ONE', | 188 '?': 'ZERO_OR_ONE', |
| 190 '*': 'ZERO_OR_MORE', | 189 '*': 'ZERO_OR_MORE', |
| 191 '|': 'OR', | |
| 192 '.': 'ANY', | 190 '.': 'ANY', |
| 193 } | 191 } |
| 194 | 192 |
| 195 def p_start(self, p): | 193 def p_start(self, p): |
| 196 '''start : fragments OR fragments | 194 '''start : fragments OR fragments |
| 197 | fragments''' | 195 | fragments''' |
| 198 if len(p) == 2: | 196 if len(p) == 2: |
| 199 p[0] = p[1] | 197 p[0] = p[1] |
| 200 else: | 198 else: |
| 201 p[0] = Term(self.token_map[p[2]], p[1], p[3]) | 199 p[0] = NfaBuilder.or_terms([p[1], p[3]]) |
| 202 | 200 |
| 203 def p_fragments(self, p): | 201 def p_fragments(self, p): |
| 204 '''fragments : fragment | 202 '''fragments : fragment |
| 205 | fragment fragments''' | 203 | fragment fragments''' |
| 206 if len(p) == 2: | 204 if len(p) == 2: |
| 207 p[0] = p[1] | 205 p[0] = p[1] |
| 208 else: | 206 else: |
| 209 p[0] = self.__cat(p[1], p[2]) | 207 p[0] = self.__cat(p[1], p[2]) |
| 210 | 208 |
| 211 def p_fragment(self, p): | 209 def p_fragment(self, p): |
| 212 '''fragment : literal_array maybe_modifier | 210 '''fragment : literal maybe_modifier |
| 213 | class maybe_modifier | 211 | class maybe_modifier |
| 214 | group maybe_modifier | 212 | group maybe_modifier |
| 215 | any maybe_modifier | 213 | any maybe_modifier |
| 216 ''' | 214 ''' |
| 217 if p[2] != None: | 215 if p[2]: |
| 218 if isinstance(p[2], tuple) and p[2][0] == 'REPEAT': | 216 if p[2][0] == 'REPEAT': |
| 219 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1]) | 217 p[0] = Term(p[2][0], p[2][1], p[2][2], p[1]) |
| 220 else: | 218 else: |
| 221 p[0] = Term(p[2], p[1]) | 219 p[0] = Term(p[2][0], p[1]) |
| 222 else: | 220 else: |
| 223 p[0] = p[1] | 221 p[0] = p[1] |
| 224 | 222 |
| 225 def p_maybe_modifier(self, p): | 223 def p_maybe_modifier(self, p): |
| 226 '''maybe_modifier : ONE_OR_MORE | 224 '''maybe_modifier : ONE_OR_MORE |
| 227 | ZERO_OR_ONE | 225 | ZERO_OR_ONE |
| 228 | ZERO_OR_MORE | 226 | ZERO_OR_MORE |
| 229 | repetition | 227 | REPEAT_BEGIN NUMBER REPEAT_END |
| 228 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END |
| 230 | empty''' | 229 | empty''' |
| 231 p[0] = p[1] | |
| 232 if p[1] in self.token_map: | |
| 233 p[0] = self.token_map[p[1]] | |
| 234 | |
| 235 def p_repetition(self, p): | |
| 236 '''repetition : REPEAT_BEGIN NUMBER REPEAT_END | |
| 237 | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END''' | |
| 238 if len(p) == 4: | 230 if len(p) == 4: |
| 239 p[0] = ("REPEAT", p[2], p[2]) | 231 p[0] = ("REPEAT", p[2], p[2]) |
| 232 elif len(p) == 5: |
| 233 p[0] = ("REPEAT", p[2], p[4]) |
| 234 elif p[1]: |
| 235 p[0] = (self.token_map[p[1]],) |
| 240 else: | 236 else: |
| 241 p[0] = ("REPEAT", p[2], p[4]) | 237 p[0] = None |
| 242 | 238 |
| 243 def p_literal_array(self, p): | 239 def p_literal(self, p): |
| 244 '''literal_array : literals''' | 240 '''literal : LITERAL''' |
| 245 p[0] = Term('LITERAL', ''.join(reversed(p[1]))) | 241 p[0] = Term('LITERAL', p[1]) |
| 246 | |
| 247 def p_literals(self, p): | |
| 248 '''literals : LITERAL maybe_literals''' | |
| 249 if not p[2]: | |
| 250 p[0] = [p[1]] | |
| 251 else: | |
| 252 p[2].append(p[1]) | |
| 253 p[0] = p[2] | |
| 254 | |
| 255 def p_maybe_literals(self, p): | |
| 256 '''maybe_literals : literals | |
| 257 | empty''' | |
| 258 p[0] = p[1] | |
| 259 | 242 |
| 260 def p_any(self, p): | 243 def p_any(self, p): |
| 261 '''any : ANY''' | 244 '''any : ANY''' |
| 262 p[0] = Term(self.token_map[p[1]]) | 245 p[0] = Term(self.token_map[p[1]]) |
| 263 | 246 |
| 264 def p_class(self, p): | 247 def p_class(self, p): |
| 265 '''class : CLASS_BEGIN class_content CLASS_END | 248 '''class : CLASS_BEGIN class_content CLASS_END |
| 266 | CLASS_BEGIN NOT class_content CLASS_END''' | 249 | CLASS_BEGIN NOT class_content CLASS_END''' |
| 267 if len(p) == 4: | 250 if len(p) == 4: |
| 268 p[0] = Term("CLASS", p[2]) | 251 p[0] = Term("CLASS", p[2]) |
| 269 else: | 252 else: |
| 270 p[0] = Term("NOT_CLASS", p[3]) | 253 p[0] = Term("NOT_CLASS", p[3]) |
| 271 | 254 |
| 272 def p_group(self, p): | 255 def p_group(self, p): |
| 273 '''group : GROUP_BEGIN start GROUP_END''' | 256 '''group : GROUP_BEGIN start GROUP_END''' |
| 274 p[0] = p[2] | 257 p[0] = p[2] |
| 275 | 258 |
| 276 def p_class_content(self, p): | 259 def p_class_content(self, p): |
| 277 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content | 260 '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content |
| 278 | CLASS_LITERAL maybe_class_content | 261 | CLASS_LITERAL maybe_class_content |
| 279 | CHARACTER_CLASS maybe_class_content | 262 | CHARACTER_CLASS maybe_class_content |
| 280 | CLASS_LITERAL_AS_OCTAL maybe_class_content | |
| 281 ''' | 263 ''' |
| 282 if len(p) == 5: | 264 if len(p) == 5: |
| 283 left = Term("RANGE", p[1], p[3]) | 265 left = Term("RANGE", p[1], p[3]) |
| 284 else: | 266 else: |
| 285 if len(p[1]) == 1: | 267 if len(p[1]) == 1: |
| 286 left = Term('LITERAL', p[1]) | 268 left = Term('LITERAL', p[1]) |
| 287 elif p[1][0] == '\\': | |
| 288 left = Term('LITERAL', chr(int(p[1][1:], 8))) | |
| 289 else: | 269 else: |
| 290 left = Term('CHARACTER_CLASS', p[1][1:-1]) | 270 left = Term('CHARACTER_CLASS', p[1][1:-1]) |
| 291 p[0] = self.__cat(left, p[len(p)-1]) | 271 p[0] = self.__cat(left, p[len(p)-1]) |
| 292 | 272 |
| 293 def p_maybe_class_content(self, p): | 273 def p_maybe_class_content(self, p): |
| 294 '''maybe_class_content : class_content | 274 '''maybe_class_content : class_content |
| 295 | empty''' | 275 | empty''' |
| 296 p[0] = p[1] | 276 p[0] = p[1] |
| 297 | 277 |
| 298 def p_empty(self, p): | 278 def p_empty(self, p): |
| 299 'empty :' | 279 'empty :' |
| 300 | 280 |
| 301 def p_error(self, p): | 281 def p_error(self, p): |
| 302 raise Exception("Syntax error in input '%s'" % str(p)) | 282 raise Exception("Syntax error in input '%s'" % str(p)) |
| 303 | 283 |
| 304 @staticmethod | 284 @staticmethod |
| 305 def __cat(left, right): | 285 def __cat(left, right): |
| 306 assert left | 286 assert left |
| 307 return left if not right else Term('CAT', left, right) | 287 return NfaBuilder.cat_terms([left] if not right else [left, right]) |
| 308 | 288 |
| 309 @staticmethod | 289 @staticmethod |
| 310 def parse(string): | 290 def parse(string): |
| 311 new_lexer = lambda: RegexLexer() | 291 new_lexer = lambda: RegexLexer() |
| 312 new_parser = lambda: RegexParser() | 292 new_parser = lambda: RegexParser() |
| 313 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser) | 293 return ParserBuilder.parse(string, "RegexParser", new_lexer, new_parser) |
| OLD | NEW |