| Index: tools/lexer_generator/automata_test.py
|
| diff --git a/tools/lexer_generator/automata_test.py b/tools/lexer_generator/automata_test.py
|
| index a81adb23a6e75b3f50bfb6b6c84fe389f3a0d017..158a602543206cc3665370340c63e34a52644b44 100644
|
| --- a/tools/lexer_generator/automata_test.py
|
| +++ b/tools/lexer_generator/automata_test.py
|
| @@ -28,70 +28,78 @@
|
| import unittest
|
| from automaton import Action
|
| from regex_parser import RegexParser
|
| -from transition_keys import TransitionKey
|
| +from transition_keys import TransitionKey, KeyEncoding
|
| from nfa_builder import NfaBuilder
|
| from dfa import Dfa
|
|
|
| class AutomataTestCase(unittest.TestCase):
|
|
|
| + __encoding = KeyEncoding.get('latin1')
|
| +
|
| @staticmethod
|
| def __build_automata(string):
|
| - nfa = NfaBuilder().nfa(RegexParser.parse(string))
|
| + encoding = AutomataTestCase.__encoding
|
| + trees = {'main' : RegexParser.parse(string)}
|
| + nfa = NfaBuilder.nfa(encoding, {}, trees, 'main')
|
| (start_name, dfa_nodes) = nfa.compute_dfa()
|
| - dfa = Dfa(start_name, dfa_nodes)
|
| + dfa = Dfa(encoding, start_name, dfa_nodes)
|
| return (nfa, dfa, dfa.minimize())
|
|
|
| - # (pattern, should match, should not match)
|
| - __test_data = [
|
| - ("a", ["a"], ["b", ""]),
|
| - ("ab", ["ab"], ["bb", ""]),
|
| - ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),
|
| - ("a?b", ["ab", "b"], ["a", "c", ""]),
|
| - ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),
|
| - ("a|b", ["a", "b"], ["ab", "c", ""]),
|
| - (".", ["a", "b"], ["", "aa"]),
|
| - (".*", ["", "a", "abcaabbcc"], []),
|
| - ("a.b", ["aab", "abb", "acb"], ["ab", ""]),
|
| - ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
|
| - ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
|
| - (".|.", ["a", "b"], ["aa", ""]),
|
| - ("//.", ["//a"], ["aa", ""]),
|
| - ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),
|
| - ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],
|
| - ["", "a", "b", "aaaa", "bbba"]),
|
| - ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],
|
| - ["", "a", "b", "aaaba", "bbbaa"]),
|
| - ("[\\101]", ["A"], ["B"])
|
| - ]
|
| + # (pattern, should match, should not match)
|
| + __test_data = [
|
| + ("a", ["a"], ["b", ""]),
|
| + ("ab", ["ab"], ["bb", ""]),
|
| + ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),
|
| + ("a?b", ["ab", "b"], ["a", "c", ""]),
|
| + ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),
|
| + ("a|b", ["a", "b"], ["ab", "c", ""]),
|
| + (".", ["a", "b"], ["", "aa"]),
|
| + (".*", ["", "a", "abcaabbcc"], []),
|
| + ("a.b", ["aab", "abb", "acb"], ["ab", ""]),
|
| + ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
|
| + ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
|
| + (".|.", ["a", "b"], ["aa", ""]),
|
| + ("//.", ["//a"], ["aa", ""]),
|
| + ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),
|
| + ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],
|
| + ["", "a", "b", "aaaa", "bbba"]),
|
| + ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],
|
| + ["", "a", "b", "aaaba", "bbbaa"]),
|
| + ("[\\101]", ["A"], ["B"])
|
| + ]
|
|
|
| - def test_matches(self):
|
| - for (regex, matches, not_matches) in self.__test_data:
|
| - automata = self.__build_automata(regex)
|
| - for string in matches:
|
| - for automaton in automata:
|
| - self.assertTrue(automaton.matches(string))
|
| - for string in not_matches:
|
| - for automaton in automata:
|
| - self.assertFalse(automaton.matches(string))
|
| + def test_matches(self):
|
| + for (regex, matches, not_matches) in self.__test_data:
|
| + automata = self.__build_automata(regex)
|
| + for string in matches:
|
| + for automaton in automata:
|
| + self.assertTrue(automaton.matches(string))
|
| + for string in not_matches:
|
| + for automaton in automata:
|
| + self.assertFalse(automaton.matches(string))
|
|
|
| - def test_can_construct_dot(self):
|
| - for (regex, matches, not_matches) in self.__test_data:
|
| - for automaton in self.__build_automata(regex):
|
| - automaton.to_dot()
|
| + def test_can_construct_dot(self):
|
| + for (regex, matches, not_matches) in self.__test_data:
|
| + for automaton in self.__build_automata(regex):
|
| + automaton.to_dot()
|
|
|
| - def test_minimization(self):
|
| - def empty_node():
|
| - return { 'transitions' : {}, 'terminal' : False, 'action' : None }
|
| - mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }
|
| - key_a = TransitionKey.single_char('a')
|
| - key_b = TransitionKey.single_char('b')
|
| - key_c = TransitionKey.single_char('c')
|
| + def test_minimization(self):
|
| + encoding = self.__encoding
|
| + def empty_node():
|
| + return {
|
| + 'transitions' : {},
|
| + 'terminal' : False,
|
| + 'action' : Action.empty_action() }
|
| + mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }
|
| + key_a = TransitionKey.single_char(encoding, 'a')
|
| + key_b = TransitionKey.single_char(encoding, 'b')
|
| + key_c = TransitionKey.single_char(encoding, 'c')
|
|
|
| - mapping['S_0']['transitions'][key_a] = 'S_1'
|
| - mapping['S_0']['transitions'][key_b] = 'S_2'
|
| - mapping['S_1']['transitions'][key_c] = 'S_3'
|
| - mapping['S_2']['transitions'][key_c] = 'S_3'
|
| - mapping['S_3']['terminal'] = True
|
| + mapping['S_0']['transitions'][key_a] = 'S_1'
|
| + mapping['S_0']['transitions'][key_b] = 'S_2'
|
| + mapping['S_1']['transitions'][key_c] = 'S_3'
|
| + mapping['S_2']['transitions'][key_c] = 'S_3'
|
| + mapping['S_3']['terminal'] = True
|
|
|
| - mdfa = Dfa('S_0', mapping).minimize()
|
| - self.assertEqual(3, mdfa.node_count())
|
| + mdfa = Dfa(encoding, 'S_0', mapping).minimize()
|
| + self.assertEqual(3, mdfa.node_count())
|
|
|