Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1245)

Unified Diff: tools/lexer_generator/nfa.py

Issue 50873003: Experimental Parser: add lexer generator (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
new file mode 100644
index 0000000000000000000000000000000000000000..586920974b0b2943d59cecb5854825c5fb1777f1
--- /dev/null
+++ b/tools/lexer_generator/nfa.py
@@ -0,0 +1,335 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import TupleType
+from inspect import getmembers
+
+
+class NfaState:
+
+ __epsilon_key = "epsilon"
+ __any_key = "any"
+
+ @staticmethod
+ def epsilon_key():
+ return NfaState.__epsilon_key
+
+ @staticmethod
+ def any_key():
+ return NfaState.__any_key
+
+ def __init__(self, node_number):
+ self.__transitions = {}
+ self.__unclosed = set()
+ self.__node_number = node_number
+ self.__epsilon_closure = None
+
+ def node_number(self):
+ return self.__node_number
+
+ def epsilon_closure(self):
+ return self.__epsilon_closure
+
+ def set_epsilon_closure(self, closure):
+ assert self.is_closed()
+ assert self.__epsilon_closure == None
+ self.__epsilon_closure = frozenset(closure)
+
+ def transitions(self):
+ assert self.is_closed()
+ return self.__transitions
+
+ def get_transition_set(self, key):
+ if not self.__transitions.has_key(key): return frozenset()
+ return frozenset(self.__transitions[key])
+
+ def __add_transition(self, key, value):
+ if value == None:
+ assert not self.is_closed(), "already closed"
+ self.__unclosed.add(key)
+ return
+ if not self.__transitions.has_key(key):
+ self.__transitions[key] = set()
+ self.__transitions[key].add(value)
+
+ def add_character_transition(self, char):
+ self.__add_transition(char, None)
+
+ def add_any_transition(self):
+ self.__add_transition(self.__any_key, None)
+
+ def add_class_transition(self, class_data, inverted):
+ self.__add_transition('class_data', None)
+
+ def add_epsilon_transition(self, state):
+ assert state != None
+ self.__add_transition(self.__epsilon_key, state)
+
+ def is_closed(self):
+ return self.__unclosed == None
+
+ def close(self, end):
+ assert not self.is_closed()
+ if end == None:
+ assert not self.__unclosed
+ else:
+ for key in self.__unclosed:
+ self.__add_transition(key, end)
+ if not self.__unclosed:
+ self.add_epsilon_transition(end)
+ self.__unclosed = None
+
+ def matches(self, char):
+ matches = self.get_transition_set(char)
+ matches = matches.union(self.get_transition_set(self.__any_key))
+ # TODO class matches
+ return matches
+
+ @staticmethod
+ def gather_transition_keys(state_set):
+ keys = set()
+ for state in state_set:
+ for key, values in state.transitions().items():
+ if key == NfaState.__epsilon_key: continue
+ keys.add(key)
+ return keys
+
+class NfaBuilder:
+
+ def __init__(self):
+ self.__operation_map = {}
+ self.__members = getmembers(self)
+
+ def __new_state(self):
+ node_number = self.node_number
+ self.node_number = self.node_number + 1
+ return NfaState(node_number)
+
+ def __or(self, graph):
+ start = self.__new_state()
+ ends = []
+ for x in [self.__process(graph[1]), self.__process(graph[2])]:
+ start.add_epsilon_transition(x[0])
+ ends += x[1]
+ start.close(None)
+ return (start, ends)
+
+ def __one_or_more(self, graph):
+ (start, ends) = self.__process(graph[1])
+ end = self.__new_state()
+ end.add_epsilon_transition(start)
+ self.__patch_ends(ends, end)
+ return (start, [end])
+
+ def __zero_or_more(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ self.__patch_ends(ends, start)
+ return (start, [start])
+
+ def __zero_or_one(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ return (start, ends + [start])
+
+ def __cat(self, graph):
+ (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+ self.__patch_ends(left[1], right[0])
+ return (left[0], right[1])
+
+ def __literal(self, graph):
+ contents = graph[1]
+ state = self.__new_state()
+ state.add_character_transition(contents)
+ return (state, [state])
+
+ def __class(self, graph):
+ state = self.__new_state()
+ state.add_class_transition(graph[1], False)
+ return (state, [state])
+
+ def __not_class(self, graph):
+ state = self.__new_state()
+ state.add_class_transition(graph[1], True)
+ return (state, [state])
+
+ def __any(self, graph):
+ state = self.__new_state()
+ state.add_any_transition()
+ return (state, [state])
+
+ def __process(self, graph):
+ assert type(graph) == TupleType
+ method = "_NfaBuilder__" + graph[0].lower()
+ if (not self.__operation_map.has_key(method)):
+ matches = [func for (name, func) in self.__members if name == method]
+ assert len(matches) == 1
+ self.__operation_map[method] = matches[0]
+ return self.__operation_map[method](graph)
+
+ def __patch_ends(self, ends, new_end):
+ for end in ends:
+ end.close(new_end)
+
+ def nfa(self, graph):
+ self.node_number = 0
+ (start, ends) = self.__process(graph)
+ end = self.__new_state()
+ self.__patch_ends(ends, end)
+ end.close(None)
+ return Nfa(start, end, self.node_number)
+
+
+class Nfa:
+
+ def __init__(self, start, end, nodes_created):
+ self.__start = start
+ self.__end = end
+ self.__epsilon_closure_computed = False
+ self.__verify(nodes_created)
+
+ @staticmethod
+ def __visit_edges(start, include_start, just_epsilon, function, state):
+
+ def compute_next_edge(node, next_edge):
+ if just_epsilon:
+ return next_edge.union(node.get_transition_set(node.epsilon_key()))
+ else:
+ for key, values in node.transitions().items():
+ next_edge = next_edge.union(values)
+ return next_edge
+
+ edge = set()
+ if include_start:
+ edge.add(start)
+ else:
+ edge = compute_next_edge(start, edge)
+
+ visited = set()
+ while edge:
+ next_edge = set()
+ for node in edge:
+ next_edge = compute_next_edge(node, next_edge)
+ state = function(node, state)
+ visited = visited.union(edge)
+ edge = next_edge.difference(visited)
+ return state
+
+ def __visit_all_edges(self, function, state):
+ return Nfa.__visit_edges(self.__start, True, False, function, state)
+
+ def __verify(self, nodes_created):
+ def f(node, node_list):
+ assert node.is_closed()
+ node_list.append(node)
+ return node_list
+ node_list = self.__visit_all_edges(f, [])
+ assert len(node_list) == nodes_created
+
+ def __compute_epsilon_closures(self):
+ if self.__epsilon_closure_computed:
+ return
+ self.__epsilon_closure_computed = True
+ def outer(node, state):
+ def inner(node, closure):
+ closure.add(node)
+ return closure
+ closure = Nfa.__visit_edges(node, False, True, inner, set())
+ node.set_epsilon_closure(closure)
+ self.__visit_all_edges(outer, None)
+
+ @staticmethod
+ def __close(states):
+ for node in states:
+ states = states.union(node.epsilon_closure())
+ return states
+
+ def matches(self, string):
+ self.__compute_epsilon_closures()
+ valid_states = Nfa.__close(set([self.__start]))
+ for c in string:
+ next_valid_states = set()
+ for valid_state in valid_states:
+ next_valid_states = next_valid_states.union(valid_state.matches(c))
+ valid_states = Nfa.__close(next_valid_states)
+ if not valid_states:
+ return False
+ return self.__end in valid_states
+
+ @staticmethod
+ def __to_dfa(nfa_state_set, dfa_builder):
+ nfa_state_set = Nfa.__close(nfa_state_set)
+ name = str([x.node_number() for x in nfa_state_set])
+ (dfa_nodes, end_nodes, end_node) = dfa_builder
+ if dfa_nodes.has_key(name):
+ return name
+ dfa_node = {}
+ dfa_nodes[name] = dfa_node
+ for key in NfaState.gather_transition_keys(nfa_state_set):
+ reachable_set = set()
+ for nfa_state in nfa_state_set:
+ reachable_set = reachable_set | nfa_state.matches(key)
+ dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
+ if end_node in nfa_state_set:
+ end_nodes.add(name)
+ return name
+
+ def compute_dfa(self):
+ self.__compute_epsilon_closures()
+ dfa_nodes = {}
+ end_nodes = set()
+ dfa_builder = (dfa_nodes, end_nodes, self.__end)
+ start_name = self.__to_dfa(set([self.__start]), dfa_builder)
+ return (start_name, dfa_nodes, end_nodes)
+
+ def to_dot(self):
+
+ def f(node, node_content):
+ for key, values in node.transitions().items():
+ if key == node.epsilon_key(): key = "ε"
+ if key != node.epsilon_key() and len(key) == 1:
+ key = "'%s'" % key
+ for value in values:
+ node_content.append(
+ " S_%d -> S_%d [ label = \"%s\" ];" %
+ (node.node_number(), value.node_number(), key))
+ return node_content
+
+ node_content = self.__visit_all_edges(f, [])
+
+ return '''
+digraph finite_state_machine {
+ rankdir=LR;
+ node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
+ node [shape = doublecircle, style=unfilled]; S_%s
+ node [shape = circle];
+%s
+}
+ ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_content))
+
« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698