Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(93)

Side by Side Diff: tools/lexer_generator/automaton.py

Issue 77153005: Experimental parser: abstract lexing (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/automata_test.py ('k') | tools/lexer_generator/dfa.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 13 matching lines...) Expand all
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 from types import TupleType, ListType 28 from types import TupleType, ListType
29 from itertools import chain 29 from itertools import chain
30 from transition_keys import TransitionKey 30 from transition_keys import TransitionKey
31 31
32 class Action(object): 32 class Action(object):
33 33
34 @staticmethod
35 def dominant_action(state_set):
36 action = None
37 for state in state_set:
38 if not state.action():
39 continue
40 if not action:
41 action = state.action()
42 continue
43 if state.action().precedence() == action.precedence():
44 assert state.action() == action
45 elif state.action().precedence() < action.precedence():
46 action = state.action()
47 return action
48
34 def __init__(self, entry_action, match_action, precedence = -1): 49 def __init__(self, entry_action, match_action, precedence = -1):
35 for action in [entry_action, match_action]: 50 for action in [entry_action, match_action]:
36 if action == None: 51 if action == None:
37 continue 52 continue
38 assert type(action) == TupleType and len(action) 53 assert type(action) == TupleType and len(action)
39 assert action[0] != None 54 assert action[0] != None
40 assert entry_action or match_action 55 assert entry_action or match_action
41 self.__entry_action = entry_action 56 self.__entry_action = entry_action
42 self.__match_action = match_action 57 self.__match_action = match_action
43 self.__precedence = precedence 58 self.__precedence = precedence
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 next_edge_iters = [] 141 next_edge_iters = []
127 def f(visit_state, node): 142 def f(visit_state, node):
128 next_edge_iters.append(state_iter(node)) 143 next_edge_iters.append(state_iter(node))
129 return visitor(node, visit_state) 144 return visitor(node, visit_state)
130 visit_state = reduce(f, edge, visit_state) 145 visit_state = reduce(f, edge, visit_state)
131 next_edge = set(chain(*next_edge_iters)) 146 next_edge = set(chain(*next_edge_iters))
132 visited |= edge 147 visited |= edge
133 edge = next_edge - visited 148 edge = next_edge - visited
134 return visit_state 149 return visit_state
135 150
151 def start_set(self):
152 return set([self.start_state()])
153
136 def visit_all_states(self, visitor, visit_state = None, state_iter = None): 154 def visit_all_states(self, visitor, visit_state = None, state_iter = None):
137 return self.visit_states(self.start_set(), visitor, visit_state, state_iter) 155 return self.visit_states(self.start_set(), visitor, visit_state, state_iter)
138 156
157 # TODO use iters
158 @staticmethod
159 def epsilon_closure(states):
160 f = lambda acc, node: acc | node.epsilon_closure()
161 return reduce(f, states, set(iter(states)))
162
163 @staticmethod
164 def __transition_states_for_char(valid_states, c):
165 f = lambda state : state.transition_state_iter_for_char(c)
166 return set(chain(*map(f, Automaton.epsilon_closure(valid_states))))
167
168 def matches(self, string):
169 valid_states = self.start_set()
170 for c in string:
171 valid_states = Automaton.__transition_states_for_char(valid_states, c)
172 if not valid_states:
173 return False
174 valid_states = self.epsilon_closure(valid_states)
175 return len(self.terminal_set().intersection(valid_states)) > 0
176
177 def lex(self, string):
178 last_position = 0
179 valid_states = self.start_set()
180 for pos, c in enumerate(string):
181 transitions = Automaton.__transition_states_for_char(valid_states, c)
182 if transitions:
183 valid_states = transitions
184 continue
185 action = Action.dominant_action(valid_states)
186 assert action # must invoke default action here
187 yield (action, last_position, pos)
188 last_position = pos
189 # lex next token
190 valid_states = Automaton.__transition_states_for_char(self.start_set(), c)
191 assert valid_states
192 valid_states = self.epsilon_closure(valid_states)
193 action = Action.dominant_action(valid_states)
194 assert action # must invoke default action here
195 yield (action, last_position, len(string))
196
139 def to_dot(self): 197 def to_dot(self):
140 198
141 def escape(v): 199 def escape(v):
142 v = str(v) 200 v = str(v)
143 v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n') 201 v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
144 v = v.replace('\\', '\\\\').replace('\"', '\\\"') 202 v = v.replace('\\', '\\\\').replace('\"', '\\\"')
145 return v 203 return v
146 204
147 def f(node, (node_content, edge_content)): 205 def f(node, (node_content, edge_content)):
148 if node.action(): 206 if node.action():
(...skipping 29 matching lines...) Expand all
178 node [shape = doublecircle, style=unfilled]; %s 236 node [shape = doublecircle, style=unfilled]; %s
179 node [shape = circle]; 237 node [shape = circle];
180 %s 238 %s
181 %s 239 %s
182 } 240 }
183 ''' % (start_shape, 241 ''' % (start_shape,
184 start_number, 242 start_number,
185 " ".join(terminals), 243 " ".join(terminals),
186 "\n".join(edge_content), 244 "\n".join(edge_content),
187 "\n".join(node_content)) 245 "\n".join(node_content))
OLDNEW
« no previous file with comments | « tools/lexer_generator/automata_test.py ('k') | tools/lexer_generator/dfa.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698