| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 import ply.lex as lex | 28 import ply.lex as lex |
| 29 import ply.yacc as yacc | 29 import ply.yacc as yacc |
| 30 from term import Term | 30 from term import Term |
| 31 from regex_parser import RegexParser, ParserBuilder | 31 from regex_parser import RegexParser, ParserBuilder |
| 32 from automaton import Action | 32 from automaton import Action |
| 33 from nfa_builder import NfaBuilder | 33 from nfa_builder import NfaBuilder |
| 34 from dfa import Dfa | 34 from dfa import Dfa |
| 35 from dfa_optimizer import DfaOptimizer | 35 from dfa_optimizer import DfaOptimizer |
| 36 from dfa_minimizer import DfaMinimizer | 36 from dfa_minimizer import DfaMinimizer |
| 37 from transition_key import TransitionKey, KeyEncoding | 37 from transition_key import TransitionKey, KeyEncoding |
| 38 from backtracking_generator import BacktrackingGenerator |
| 38 | 39 |
| 39 class RuleLexer: | 40 class RuleLexer: |
| 40 | 41 |
| 41 tokens = ( | 42 tokens = ( |
| 42 'DEFAULT_ACTION', | 43 'DEFAULT_ACTION', |
| 43 'EPSILON', | 44 'EPSILON', |
| 44 'EOS', | 45 'EOS', |
| 45 'CATCH_ALL', | 46 'CATCH_ALL', |
| 46 | 47 |
| 47 'IDENTIFIER', | 48 'IDENTIFIER', |
| (...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 347 | 348 |
| 348 def default_automata(self): | 349 def default_automata(self): |
| 349 return self.__automata['default'] | 350 return self.__automata['default'] |
| 350 | 351 |
| 351 def default_action(self): | 352 def default_action(self): |
| 352 return self.__default_action | 353 return self.__default_action |
| 353 | 354 |
| 354 class Automata(object): | 355 class Automata(object): |
| 355 'a container for the resulting automata, which are lazily built' | 356 'a container for the resulting automata, which are lazily built' |
| 356 | 357 |
| 357 def __init__(self, encoding, character_classes, rule_map, name): | 358 def __init__(self, rule_processor, character_classes, rule_map, name): |
| 358 self.__encoding = encoding | 359 self.__rule_processor = rule_processor |
| 359 self.__character_classes = character_classes | 360 self.__character_classes = character_classes |
| 360 self.__rule_map = rule_map | 361 self.__rule_map = rule_map |
| 361 self.__name = name | 362 self.__name = name |
| 362 self.__nfa = None | 363 self.__nfa = None |
| 363 self.__dfa = None | 364 self.__dfa = None |
| 364 self.__minimial_dfa = None | 365 self.__minimial_dfa = None |
| 365 | 366 |
| 367 def encoding(self): |
| 368 return self.__rule_processor.encoding() |
| 369 |
| 366 def name(self): | 370 def name(self): |
| 367 return self.__name | 371 return self.__name |
| 368 | 372 |
| 369 def rule_term(self): | 373 def rule_term(self): |
| 370 return self.__rule_map[self.__name] | 374 return self.__rule_map[self.__name] |
| 371 | 375 |
| 372 def nfa(self): | 376 def nfa(self): |
| 373 if not self.__nfa: | 377 if not self.__nfa: |
| 374 self.__nfa = NfaBuilder.nfa( | 378 self.__nfa = NfaBuilder.nfa( |
| 375 self.__encoding, self.__character_classes, | 379 self.encoding(), self.__character_classes, |
| 376 self.__rule_map, self.__name) | 380 self.__rule_map, self.__name) |
| 377 return self.__nfa | 381 return self.__nfa |
| 378 | 382 |
| 379 def dfa(self): | 383 def dfa(self): |
| 380 if not self.__dfa: | 384 if not self.__dfa: |
| 381 (start, dfa_nodes) = self.nfa().compute_dfa() | 385 (start, dfa_nodes) = self.nfa().compute_dfa() |
| 382 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes) | 386 dfa = Dfa(self.encoding(), start, dfa_nodes) |
| 387 if self.name() == 'default': |
| 388 dfa = BacktrackingGenerator.generate( |
| 389 dfa, self.__rule_processor.default_action()) |
| 390 self.__dfa = dfa |
| 383 return self.__dfa | 391 return self.__dfa |
| 384 | 392 |
| 385 def optimize_dfa(self, log = False): | 393 def optimize_dfa(self, log = False): |
| 386 assert not self.__dfa | 394 assert not self.__dfa |
| 387 assert not self.__minimial_dfa | 395 assert not self.__minimial_dfa |
| 388 self.__dfa = DfaOptimizer.optimize(self.minimal_dfa(), log) | 396 self.__dfa = DfaOptimizer.optimize(self.minimal_dfa(), log) |
| 389 self.__minimial_dfa = None | 397 self.__minimial_dfa = None |
| 390 | 398 |
| 391 def minimal_dfa(self): | 399 def minimal_dfa(self): |
| 392 if not self.__minimial_dfa: | 400 if not self.__minimial_dfa: |
| 393 self.__minimial_dfa = DfaMinimizer(self.dfa()).minimize() | 401 self.__minimial_dfa = DfaMinimizer(self.dfa()).minimize() |
| 394 return self.__minimial_dfa | 402 return self.__minimial_dfa |
| 395 | 403 |
| 396 def __process_parser_state(self): | 404 def __process_parser_state(self): |
| 397 parser_state = self.__parser_state | 405 parser_state = self.__parser_state |
| 398 assert 'default' in parser_state.rules, "default lexer state required" | 406 assert 'default' in parser_state.rules, "default lexer state required" |
| 399 # Check that we don't transition to a nonexistent state. | 407 # Check that we don't transition to a nonexistent state. |
| 400 assert parser_state.transitions <= set(parser_state.rules.keys()) | 408 assert parser_state.transitions <= set(parser_state.rules.keys()) |
| 401 rule_map = {} | 409 rule_map = {} |
| 402 # process all subgraphs | 410 # process all subgraphs |
| 403 for tree_name, v in parser_state.rules.items(): | 411 for tree_name, v in parser_state.rules.items(): |
| 404 assert v['trees'], "lexer state %s is empty" % tree_name | 412 assert v['trees'], "lexer state %s is empty" % tree_name |
| 405 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) | 413 rule_map[tree_name] = NfaBuilder.or_terms(v['trees']) |
| 406 # build the automata | 414 # build the automata |
| 407 for name, tree in rule_map.items(): | 415 for name, tree in rule_map.items(): |
| 408 self.__automata[name] = RuleProcessor.Automata( | 416 self.__automata[name] = RuleProcessor.Automata( |
| 409 parser_state.encoding, parser_state.character_classes, rule_map, name) | 417 self, parser_state.character_classes, rule_map, name) |
| 410 # process default_action | 418 # process default_action |
| 411 default_action = parser_state.rules['default']['default_action'] | 419 default_action = parser_state.rules['default']['default_action'] |
| 412 self.__default_action = Action(default_action, 0) | 420 self.__default_action = Action(default_action, 0) |
| OLD | NEW |