| Index: tools/lexer_generator/nfa.py
|
| diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..586920974b0b2943d59cecb5854825c5fb1777f1
|
| --- /dev/null
|
| +++ b/tools/lexer_generator/nfa.py
|
| @@ -0,0 +1,335 @@
|
| +# Copyright 2013 the V8 project authors. All rights reserved.
|
| +# Redistribution and use in source and binary forms, with or without
|
| +# modification, are permitted provided that the following conditions are
|
| +# met:
|
| +#
|
| +# * Redistributions of source code must retain the above copyright
|
| +# notice, this list of conditions and the following disclaimer.
|
| +# * Redistributions in binary form must reproduce the above
|
| +# copyright notice, this list of conditions and the following
|
| +# disclaimer in the documentation and/or other materials provided
|
| +# with the distribution.
|
| +# * Neither the name of Google Inc. nor the names of its
|
| +# contributors may be used to endorse or promote products derived
|
| +# from this software without specific prior written permission.
|
| +#
|
| +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| +
|
| +from types import TupleType
|
| +from inspect import getmembers
|
| +
|
| +
|
| +class NfaState:
|
| +
|
| + __epsilon_key = "epsilon"
|
| + __any_key = "any"
|
| +
|
| + @staticmethod
|
| + def epsilon_key():
|
| + return NfaState.__epsilon_key
|
| +
|
| + @staticmethod
|
| + def any_key():
|
| + return NfaState.__any_key
|
| +
|
| + def __init__(self, node_number):
|
| + self.__transitions = {}
|
| + self.__unclosed = set()
|
| + self.__node_number = node_number
|
| + self.__epsilon_closure = None
|
| +
|
| + def node_number(self):
|
| + return self.__node_number
|
| +
|
| + def epsilon_closure(self):
|
| + return self.__epsilon_closure
|
| +
|
| + def set_epsilon_closure(self, closure):
|
| + assert self.is_closed()
|
| + assert self.__epsilon_closure == None
|
| + self.__epsilon_closure = frozenset(closure)
|
| +
|
| + def transitions(self):
|
| + assert self.is_closed()
|
| + return self.__transitions
|
| +
|
| + def get_transition_set(self, key):
|
| + if not self.__transitions.has_key(key): return frozenset()
|
| + return frozenset(self.__transitions[key])
|
| +
|
| + def __add_transition(self, key, value):
|
| + if value == None:
|
| + assert not self.is_closed(), "already closed"
|
| + self.__unclosed.add(key)
|
| + return
|
| + if not self.__transitions.has_key(key):
|
| + self.__transitions[key] = set()
|
| + self.__transitions[key].add(value)
|
| +
|
| + def add_character_transition(self, char):
|
| + self.__add_transition(char, None)
|
| +
|
| + def add_any_transition(self):
|
| + self.__add_transition(self.__any_key, None)
|
| +
|
| + def add_class_transition(self, class_data, inverted):
|
| + self.__add_transition('class_data', None)
|
| +
|
| + def add_epsilon_transition(self, state):
|
| + assert state != None
|
| + self.__add_transition(self.__epsilon_key, state)
|
| +
|
| + def is_closed(self):
|
| + return self.__unclosed == None
|
| +
|
| + def close(self, end):
|
| + assert not self.is_closed()
|
| + if end == None:
|
| + assert not self.__unclosed
|
| + else:
|
| + for key in self.__unclosed:
|
| + self.__add_transition(key, end)
|
| + if not self.__unclosed:
|
| + self.add_epsilon_transition(end)
|
| + self.__unclosed = None
|
| +
|
| + def matches(self, char):
|
| + matches = self.get_transition_set(char)
|
| + matches = matches.union(self.get_transition_set(self.__any_key))
|
| + # TODO class matches
|
| + return matches
|
| +
|
| + @staticmethod
|
| + def gather_transition_keys(state_set):
|
| + keys = set()
|
| + for state in state_set:
|
| + for key, values in state.transitions().items():
|
| + if key == NfaState.__epsilon_key: continue
|
| + keys.add(key)
|
| + return keys
|
| +
|
| +class NfaBuilder:
|
| +
|
| + def __init__(self):
|
| + self.__operation_map = {}
|
| + self.__members = getmembers(self)
|
| +
|
| + def __new_state(self):
|
| + node_number = self.node_number
|
| + self.node_number = self.node_number + 1
|
| + return NfaState(node_number)
|
| +
|
| + def __or(self, graph):
|
| + start = self.__new_state()
|
| + ends = []
|
| + for x in [self.__process(graph[1]), self.__process(graph[2])]:
|
| + start.add_epsilon_transition(x[0])
|
| + ends += x[1]
|
| + start.close(None)
|
| + return (start, ends)
|
| +
|
| + def __one_or_more(self, graph):
|
| + (start, ends) = self.__process(graph[1])
|
| + end = self.__new_state()
|
| + end.add_epsilon_transition(start)
|
| + self.__patch_ends(ends, end)
|
| + return (start, [end])
|
| +
|
| + def __zero_or_more(self, graph):
|
| + (node, ends) = self.__process(graph[1])
|
| + start = self.__new_state()
|
| + start.add_epsilon_transition(node)
|
| + self.__patch_ends(ends, start)
|
| + return (start, [start])
|
| +
|
| + def __zero_or_one(self, graph):
|
| + (node, ends) = self.__process(graph[1])
|
| + start = self.__new_state()
|
| + start.add_epsilon_transition(node)
|
| + return (start, ends + [start])
|
| +
|
| + def __cat(self, graph):
|
| + (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
|
| + self.__patch_ends(left[1], right[0])
|
| + return (left[0], right[1])
|
| +
|
| + def __literal(self, graph):
|
| + contents = graph[1]
|
| + state = self.__new_state()
|
| + state.add_character_transition(contents)
|
| + return (state, [state])
|
| +
|
| + def __class(self, graph):
|
| + state = self.__new_state()
|
| + state.add_class_transition(graph[1], False)
|
| + return (state, [state])
|
| +
|
| + def __not_class(self, graph):
|
| + state = self.__new_state()
|
| + state.add_class_transition(graph[1], True)
|
| + return (state, [state])
|
| +
|
| + def __any(self, graph):
|
| + state = self.__new_state()
|
| + state.add_any_transition()
|
| + return (state, [state])
|
| +
|
| + def __process(self, graph):
|
| + assert type(graph) == TupleType
|
| + method = "_NfaBuilder__" + graph[0].lower()
|
| + if (not self.__operation_map.has_key(method)):
|
| + matches = [func for (name, func) in self.__members if name == method]
|
| + assert len(matches) == 1
|
| + self.__operation_map[method] = matches[0]
|
| + return self.__operation_map[method](graph)
|
| +
|
| + def __patch_ends(self, ends, new_end):
|
| + for end in ends:
|
| + end.close(new_end)
|
| +
|
| + def nfa(self, graph):
|
| + self.node_number = 0
|
| + (start, ends) = self.__process(graph)
|
| + end = self.__new_state()
|
| + self.__patch_ends(ends, end)
|
| + end.close(None)
|
| + return Nfa(start, end, self.node_number)
|
| +
|
| +
|
| +class Nfa:
|
| +
|
| + def __init__(self, start, end, nodes_created):
|
| + self.__start = start
|
| + self.__end = end
|
| + self.__epsilon_closure_computed = False
|
| + self.__verify(nodes_created)
|
| +
|
| + @staticmethod
|
| + def __visit_edges(start, include_start, just_epsilon, function, state):
|
| +
|
| + def compute_next_edge(node, next_edge):
|
| + if just_epsilon:
|
| + return next_edge.union(node.get_transition_set(node.epsilon_key()))
|
| + else:
|
| + for key, values in node.transitions().items():
|
| + next_edge = next_edge.union(values)
|
| + return next_edge
|
| +
|
| + edge = set()
|
| + if include_start:
|
| + edge.add(start)
|
| + else:
|
| + edge = compute_next_edge(start, edge)
|
| +
|
| + visited = set()
|
| + while edge:
|
| + next_edge = set()
|
| + for node in edge:
|
| + next_edge = compute_next_edge(node, next_edge)
|
| + state = function(node, state)
|
| + visited = visited.union(edge)
|
| + edge = next_edge.difference(visited)
|
| + return state
|
| +
|
| + def __visit_all_edges(self, function, state):
|
| + return Nfa.__visit_edges(self.__start, True, False, function, state)
|
| +
|
| + def __verify(self, nodes_created):
|
| + def f(node, node_list):
|
| + assert node.is_closed()
|
| + node_list.append(node)
|
| + return node_list
|
| + node_list = self.__visit_all_edges(f, [])
|
| + assert len(node_list) == nodes_created
|
| +
|
| + def __compute_epsilon_closures(self):
|
| + if self.__epsilon_closure_computed:
|
| + return
|
| + self.__epsilon_closure_computed = True
|
| + def outer(node, state):
|
| + def inner(node, closure):
|
| + closure.add(node)
|
| + return closure
|
| + closure = Nfa.__visit_edges(node, False, True, inner, set())
|
| + node.set_epsilon_closure(closure)
|
| + self.__visit_all_edges(outer, None)
|
| +
|
| + @staticmethod
|
| + def __close(states):
|
| + for node in states:
|
| + states = states.union(node.epsilon_closure())
|
| + return states
|
| +
|
| + def matches(self, string):
|
| + self.__compute_epsilon_closures()
|
| + valid_states = Nfa.__close(set([self.__start]))
|
| + for c in string:
|
| + next_valid_states = set()
|
| + for valid_state in valid_states:
|
| + next_valid_states = next_valid_states.union(valid_state.matches(c))
|
| + valid_states = Nfa.__close(next_valid_states)
|
| + if not valid_states:
|
| + return False
|
| + return self.__end in valid_states
|
| +
|
| + @staticmethod
|
| + def __to_dfa(nfa_state_set, dfa_builder):
|
| + nfa_state_set = Nfa.__close(nfa_state_set)
|
| + name = str([x.node_number() for x in nfa_state_set])
|
| + (dfa_nodes, end_nodes, end_node) = dfa_builder
|
| + if dfa_nodes.has_key(name):
|
| + return name
|
| + dfa_node = {}
|
| + dfa_nodes[name] = dfa_node
|
| + for key in NfaState.gather_transition_keys(nfa_state_set):
|
| + reachable_set = set()
|
| + for nfa_state in nfa_state_set:
|
| + reachable_set = reachable_set | nfa_state.matches(key)
|
| + dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
|
| + if end_node in nfa_state_set:
|
| + end_nodes.add(name)
|
| + return name
|
| +
|
| + def compute_dfa(self):
|
| + self.__compute_epsilon_closures()
|
| + dfa_nodes = {}
|
| + end_nodes = set()
|
| + dfa_builder = (dfa_nodes, end_nodes, self.__end)
|
| + start_name = self.__to_dfa(set([self.__start]), dfa_builder)
|
| + return (start_name, dfa_nodes, end_nodes)
|
| +
|
| + def to_dot(self):
|
| +
|
| + def f(node, node_content):
|
| + for key, values in node.transitions().items():
|
| + if key == node.epsilon_key(): key = "ε"
|
| + if key != node.epsilon_key() and len(key) == 1:
|
| + key = "'%s'" % key
|
| + for value in values:
|
| + node_content.append(
|
| + " S_%d -> S_%d [ label = \"%s\" ];" %
|
| + (node.node_number(), value.node_number(), key))
|
| + return node_content
|
| +
|
| + node_content = self.__visit_all_edges(f, [])
|
| +
|
| + return '''
|
| +digraph finite_state_machine {
|
| + rankdir=LR;
|
| + node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
|
| + node [shape = doublecircle, style=unfilled]; S_%s
|
| + node [shape = circle];
|
| +%s
|
| +}
|
| + ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_content))
|
| +
|
|
|