tools/lexer_generator/automata_test.py - Issue 158823002: Experimental parser: refactor TransitionKey to use Term

Unified Diff: tools/lexer_generator/automata_test.py

Issue 158823002: Experimental parser: refactor TransitionKey to use Term (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/automata_test.py

diff --git a/tools/lexer_generator/automata_test.py b/tools/lexer_generator/automata_test.py

index a81adb23a6e75b3f50bfb6b6c84fe389f3a0d017..158a602543206cc3665370340c63e34a52644b44 100644

--- a/tools/lexer_generator/automata_test.py

+++ b/tools/lexer_generator/automata_test.py

@@ -28,70 +28,78 @@

import unittest

from automaton import Action

from regex_parser import RegexParser

-from transition_keys import TransitionKey

+from transition_keys import TransitionKey, KeyEncoding

from nfa_builder import NfaBuilder

from dfa import Dfa

class AutomataTestCase(unittest.TestCase):

+ __encoding = KeyEncoding.get('latin1')

@staticmethod

def __build_automata(string):

- nfa = NfaBuilder().nfa(RegexParser.parse(string))

+ encoding = AutomataTestCase.__encoding

+ trees = {'main' : RegexParser.parse(string)}

+ nfa = NfaBuilder.nfa(encoding, {}, trees, 'main')

(start_name, dfa_nodes) = nfa.compute_dfa()

- dfa = Dfa(start_name, dfa_nodes)

+ dfa = Dfa(encoding, start_name, dfa_nodes)

return (nfa, dfa, dfa.minimize())

- # (pattern, should match, should not match)

- __test_data = [

- ("a", ["a"], ["b", ""]),

- ("ab", ["ab"], ["bb", ""]),

- ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),

- ("a?b", ["ab", "b"], ["a", "c", ""]),

- ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),

- ("a|b", ["a", "b"], ["ab", "c", ""]),

- (".", ["a", "b"], ["", "aa"]),

- (".*", ["", "a", "abcaabbcc"], []),

- ("a.b", ["aab", "abb", "acb"], ["ab", ""]),

- ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),

- ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),

- (".|.", ["a", "b"], ["aa", ""]),

- ("//.", ["//a"], ["aa", ""]),

- ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),

- ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],

- ["", "a", "b", "aaaa", "bbba"]),

- ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],

- ["", "a", "b", "aaaba", "bbbaa"]),

- ("[\\101]", ["A"], ["B"])

- ]

+ # (pattern, should match, should not match)

+ __test_data = [

+ ("a", ["a"], ["b", ""]),

+ ("ab", ["ab"], ["bb", ""]),

+ ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),

+ ("a?b", ["ab", "b"], ["a", "c", ""]),

+ ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),

+ ("a|b", ["a", "b"], ["ab", "c", ""]),

+ (".", ["a", "b"], ["", "aa"]),

+ (".*", ["", "a", "abcaabbcc"], []),

+ ("a.b", ["aab", "abb", "acb"], ["ab", ""]),

+ ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),

+ ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),

+ (".|.", ["a", "b"], ["aa", ""]),

+ ("//.", ["//a"], ["aa", ""]),

+ ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),

+ ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],

+ ["", "a", "b", "aaaa", "bbba"]),

+ ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],

+ ["", "a", "b", "aaaba", "bbbaa"]),

+ ("[\\101]", ["A"], ["B"])

+ ]

- def test_matches(self):

- for (regex, matches, not_matches) in self.__test_data:

- automata = self.__build_automata(regex)

- for string in matches:

- for automaton in automata:

- self.assertTrue(automaton.matches(string))

- for string in not_matches:

- for automaton in automata:

- self.assertFalse(automaton.matches(string))

+ def test_matches(self):

+ for (regex, matches, not_matches) in self.__test_data:

+ automata = self.__build_automata(regex)

+ for string in matches:

+ for automaton in automata:

+ self.assertTrue(automaton.matches(string))

+ for string in not_matches:

+ for automaton in automata:

+ self.assertFalse(automaton.matches(string))

- def test_can_construct_dot(self):

- for (regex, matches, not_matches) in self.__test_data:

- for automaton in self.__build_automata(regex):

- automaton.to_dot()

+ def test_can_construct_dot(self):

+ for (regex, matches, not_matches) in self.__test_data:

+ for automaton in self.__build_automata(regex):

+ automaton.to_dot()

- def test_minimization(self):

- def empty_node():

- return { 'transitions' : {}, 'terminal' : False, 'action' : None }

- mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }

- key_a = TransitionKey.single_char('a')

- key_b = TransitionKey.single_char('b')

- key_c = TransitionKey.single_char('c')

+ def test_minimization(self):

+ encoding = self.__encoding

+ def empty_node():

+ return {

+ 'transitions' : {},

+ 'terminal' : False,

+ 'action' : Action.empty_action() }

+ mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }

+ key_a = TransitionKey.single_char(encoding, 'a')

+ key_b = TransitionKey.single_char(encoding, 'b')

+ key_c = TransitionKey.single_char(encoding, 'c')

- mapping['S_0']['transitions'][key_a] = 'S_1'

- mapping['S_0']['transitions'][key_b] = 'S_2'

- mapping['S_1']['transitions'][key_c] = 'S_3'

- mapping['S_2']['transitions'][key_c] = 'S_3'

- mapping['S_3']['terminal'] = True

+ mapping['S_0']['transitions'][key_a] = 'S_1'

+ mapping['S_0']['transitions'][key_b] = 'S_2'

+ mapping['S_1']['transitions'][key_c] = 'S_3'

+ mapping['S_2']['transitions'][key_c] = 'S_3'

+ mapping['S_3']['terminal'] = True

- mdfa = Dfa('S_0', mapping).minimize()

- self.assertEqual(3, mdfa.node_count())

+ mdfa = Dfa(encoding, 'S_0', mapping).minimize()

+ self.assertEqual(3, mdfa.node_count())

« no previous file with comments | « tools/lexer_generator/action.py ('k') | tools/lexer_generator/code_generator.py » ('j') | no next file with comments »