Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1550)

Unified Diff: tools/lexer_generator/nfa_builder.py

Issue 160443002: Experimental parser: remove match actions (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/nfa_builder.py
diff --git a/tools/lexer_generator/nfa_builder.py b/tools/lexer_generator/nfa_builder.py
index dfb57504a63745348e0fb5672354de21b9aa8b23..ba26749bdde9ffa9b8bb1884c7484ede200ad3d2 100644
--- a/tools/lexer_generator/nfa_builder.py
+++ b/tools/lexer_generator/nfa_builder.py
@@ -46,7 +46,7 @@ class NfaBuilder(object):
self.__encoding = encoding
self.__character_classes = character_classes
self.__states = []
- self.__global_end_node = None
+ self.__global_end = self.__new_state()
self.__operation_map = None
self.__subtree_map = subtree_map
@@ -54,11 +54,6 @@ class NfaBuilder(object):
self.__node_number += 1
return NfaState()
- def __global_end(self):
- if not self.__global_end_node:
- self.__global_end_node = self.__new_state()
- return self.__global_end_node
-
def __or(self, *trees):
start = self.__new_state()
ends = []
@@ -89,7 +84,7 @@ class NfaBuilder(object):
start.add_epsilon_transition(node)
return (start, ends + [start])
- def __repeat(self, param_min, param_max, subtree):
+ def __repeat(self, subtree, param_min, param_max):
'process regex of form subtree{param_min, param_max}'
(param_min, param_max) = (int(param_min), int(param_max))
assert param_min > 1 and param_min <= param_max
@@ -147,17 +142,27 @@ class NfaBuilder(object):
def __any(self):
return self.__key_state(TransitionKey.any(self.__encoding))
- def __action(self, subtree, action_term):
+ def __unique_key(self, name):
+ return self.__key_state(TransitionKey.unique(name))
+
+ def __entry_action(self, action, precedence, subtree):
(start, ends) = self.__process(subtree)
- action = Action.from_term(action_term)
- end = self.__new_state()
- self.__patch_ends(ends, end)
- end.set_action(action)
+ node = self.__new_state()
+ node.set_action(Action(action, precedence))
+ self.__patch_ends(ends, node)
+ return (start, [node])
+
+ def __match_action(self, action, precedence, subtree):
+ (start, ends) = self.__process(subtree)
+ node = self.__new_state()
+ node.set_action(Action(action, precedence))
# Force all match actions to be terminal.
- if action.match_action():
- global_end = self.__global_end()
- end.add_epsilon_transition(global_end)
- return (start, [end])
+ node.close(self.__global_end)
+ # patch via a match edge into existing graph
+ match_node = self.__new_state()
+ match_node.add_transition(TransitionKey.omega(), node)
+ self.__patch_ends(ends, match_node)
+ return (start, [match_node])
def __continue(self, subtree, depth):
'add an epsilon transitions to the start node of the current subtree'
@@ -169,20 +174,11 @@ class NfaBuilder(object):
self.__patch_ends(ends, state['start_node'])
return (start, [])
- def __unique_key(self, name):
- return self.__key_state(TransitionKey.unique(name))
-
def __join(self, tree, name):
- (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
- if tree:
- (start, ends) = self.__process(tree)
- self.__patch_ends(ends, subtree_start)
- else:
- start = subtree_start
- if subtree_end:
- return (start, [subtree_end])
- else:
- return (start, [])
+ (start, ends) = self.__subgraph(name)
+ (new_start, new_ends) = self.__process(tree)
+ self.__patch_ends(new_ends, start)
+ return (new_start, ends)
def __get_method(self, name):
'lazily build map of all private bound methods and return the one requested'
@@ -209,79 +205,24 @@ class NfaBuilder(object):
'name' : name
})
- def __pop_state(self):
- return self.__states.pop()
-
- def __nfa(self, name):
+ def __subgraph(self, name):
tree = self.__subtree_map[name]
- start_node_number = self.__node_number
self.__push_state(name)
(start, ends) = self.__process(tree)
- state = self.__pop_state()
+ state = self.__states.pop()
# move saved start node into start
if state['start_node']:
state['start_node'].close(start)
start = state['start_node']
- # Don't create an end node for the subgraph if it would be unused (ends can
- # be an empty list e.g., in the case when everything inside the subgraph is
- # "continue").
- end = None
- if ends:
- end = self.__new_state()
- self.__patch_ends(ends, end)
- return (start, end, self.__node_number - start_node_number)
-
- @staticmethod
- def __compute_epsilon_closures(start_state):
- def outer(node, state):
- def inner(node, closure):
- closure.add(node)
- return closure
- is_epsilon = lambda k: k == TransitionKey.epsilon()
- state_iter = lambda node : node.state_iter(key_filter = is_epsilon)
- edge = set(state_iter(node))
- closure = Automaton.visit_states(
- edge, inner, state_iter=state_iter, visit_state=set())
- node.set_epsilon_closure(closure)
- Automaton.visit_states(set([start_state]), outer)
-
- @staticmethod
- def __replace_catch_all(encoding, state):
- catch_all = TransitionKey.unique('catch_all')
- states = list(state.state_iter_for_key(catch_all))
- if not states:
- return
- f = lambda acc, state: acc | set(state.epsilon_closure_iter())
- reachable_states = reduce(f, states, set())
- f = lambda acc, state: acc | set(state.key_iter())
- keys = reduce(f, reachable_states, set())
- keys.discard(TransitionKey.epsilon())
- keys.discard(catch_all)
- keys.discard(TransitionKey.unique('eos'))
- inverse_key = TransitionKey.inverse_key(encoding, keys)
- if not inverse_key:
- inverse_key = TransitionKey.unique('no_match')
- state.swap_key(catch_all, inverse_key)
-
- @staticmethod
- def nfa(encoding, character_classes, subtree_map, name):
- self = NfaBuilder(encoding, character_classes, subtree_map)
- (start, end, nodes_created) = self.__nfa(name)
- # close all ends
- global_end_to_return = self.__global_end_node or end
- assert global_end_to_return, "no terminal nodes, default tree continues"
- if end and self.__global_end_node:
- end.close(self.__global_end_node)
- global_end_to_return.close(None)
- # Prepare the nfa
- self.__compute_epsilon_closures(start)
- f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
- Automaton.visit_states(set([start]), f)
- return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
+ return (start, ends)
@staticmethod
- def add_action(term, action):
- return Term('ACTION', term, action.to_term())
+ def add_action(tree, entry_action, match_action, precedence):
+ if entry_action:
+ tree = Term('ENTRY_ACTION', entry_action, precedence, tree)
+ if match_action:
+ tree = Term('MATCH_ACTION', match_action, precedence, tree)
+ return tree
@staticmethod
def add_continue(tree, depth):
@@ -293,6 +234,8 @@ class NfaBuilder(object):
@staticmethod
def join_subtree(tree, subtree_name):
+ if not tree:
+ return Term('SUBGRAPH', subtree_name)
return Term('JOIN', tree, subtree_name)
__modifer_map = {
@@ -345,3 +288,56 @@ class NfaBuilder(object):
terms = list(NfaBuilder.__flatten_literals(terms))
assert terms
return terms[0] if len(terms) == 1 else Term('CAT', *terms)
+
+ @staticmethod
+ def nfa(encoding, character_classes, subtree_map, name):
+ self = NfaBuilder(encoding, character_classes, subtree_map)
+ (start, ends) = self.__subgraph(name)
+ # close all ends
+ end = self.__global_end
+ end.close(None)
+ # TODO(dcarney): patch in default action
+ self.__patch_ends(ends, end)
+ # Prepare the nfa
+ self.__compute_epsilon_closures(start)
+ f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
+ Automaton.visit_states(set([start]), f)
+ return Nfa(self.__encoding, start, end, self.__node_number)
+
+ @staticmethod
+ def __compute_epsilon_closures(start_state):
+ def outer(node, state):
+ def inner(node, closure):
+ closure.add(node)
+ return closure
+ is_epsilon = lambda k: k == TransitionKey.epsilon()
+ state_iter = lambda node : node.state_iter(key_filter = is_epsilon)
+ edge = set(state_iter(node))
+ closure = Automaton.visit_states(
+ edge, inner, state_iter=state_iter, visit_state=set())
+ node.set_epsilon_closure(closure)
+ Automaton.visit_states(set([start_state]), outer)
+
+ @staticmethod
+ def __replace_catch_all(encoding, state):
+ catch_all = TransitionKey.unique('catch_all')
+ states = list(state.state_iter_for_key(catch_all))
+ if not states:
+ return
+ f = lambda acc, state: acc | set(state.epsilon_closure_iter())
+ reachable_states = reduce(f, states, set())
+ f = lambda acc, state: acc | set(state.key_iter())
+ keys = reduce(f, reachable_states, set())
+ keys.discard(TransitionKey.epsilon())
+ keys.discard(catch_all)
+ keys.discard(TransitionKey.unique('eos'))
+ inverse_key = TransitionKey.inverse_key(encoding, keys)
+ if not inverse_key:
+ inverse_key = TransitionKey.unique('no_match')
+ state.swap_key(catch_all, inverse_key)
+
+ @staticmethod
+ def __install_omega_transitions(start_state, default_action):
+ '''install a match transition, a backtrack transition,
+ or a default transition into all nodes'''
+ pass
« no previous file with comments | « tools/lexer_generator/nfa.py ('k') | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698