tools/lexer_generator/nfa.py - Issue 61893023: Experimental parser: split out NfaBuilder

Side by Side Diff: tools/lexer_generator/nfa.py

Issue 61893023: Experimental parser: split out NfaBuilder (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 from types import TupleType

29 from transition_keys import TransitionKey	28 from transition_keys import TransitionKey

30 from automaton import *	29 from automaton import *

31 from inspect import getmembers

32	30

33 class NfaState(AutomatonState):	31 class NfaState(AutomatonState):

34	32

35 def __init__(self, node_number):	33 def __init__(self, node_number):

36 super(NfaState, self).__init__(node_number)	34 super(NfaState, self).__init__(node_number)

37 self.__transitions = {}	35 self.__transitions = {}

38 self.__unclosed = set()	36 self.__unclosed = set()

39 self.__epsilon_closure = None	37 self.__epsilon_closure = None

40 self.__action = None	38 self.__action = None

41	39

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
121 self.__transitions[inverse_key] = self.__transitions[catch_all]	119 self.__transitions[inverse_key] = self.__transitions[catch_all]

122 del self.__transitions[catch_all]	120 del self.__transitions[catch_all]

123	121

124 @staticmethod	122 @staticmethod

125 def gather_transition_keys(state_set):	123 def gather_transition_keys(state_set):

126 f = lambda acc, state: acc \| set(state.__transitions.keys())	124 f = lambda acc, state: acc \| set(state.__transitions.keys())

127 keys = reduce(f, state_set, set())	125 keys = reduce(f, state_set, set())

128 keys.discard(TransitionKey.epsilon())	126 keys.discard(TransitionKey.epsilon())

129 return TransitionKey.disjoint_keys(keys)	127 return TransitionKey.disjoint_keys(keys)

130	128

131 class NfaBuilder:

132

133 def __init__(self):

134 self.__node_number = 0

135 self.__operation_map = {}

136 self.__members = getmembers(self)

137 self.__character_classes = {}

138 self.__states = []

139

140 def set_character_classes(self, classes):

141 self.__character_classes = classes

142

143 def __new_state(self):

144 self.__node_number += 1

145 return NfaState(self.__node_number - 1)

146

147 def __or(self, graph):

148 start = self.__new_state()

149 ends = []

150 for x in [self.__process(graph[1]), self.__process(graph[2])]:

151 start.add_epsilon_transition(x[0])

152 ends += x[1]

153 start.close(None)

154 return (start, ends)

155

156 def __one_or_more(self, graph):

157 (start, ends) = self.__process(graph[1])

158 end = self.__new_state()

159 end.add_epsilon_transition(start)

160 self.__patch_ends(ends, end)

161 return (start, [end])

162

163 def __zero_or_more(self, graph):

164 (node, ends) = self.__process(graph[1])

165 start = self.__new_state()

166 start.add_epsilon_transition(node)

167 self.__patch_ends(ends, start)

168 return (start, [start])

169

170 def __zero_or_one(self, graph):

171 (node, ends) = self.__process(graph[1])

172 start = self.__new_state()

173 start.add_epsilon_transition(node)

174 return (start, ends + [start])

175

176 def __repeat(self, graph):

177 param_min = int(graph[1])

178 param_max = int(graph[2])

179 subgraph = graph[3]

180 (start, ends) = self.__process(subgraph)

181 for i in xrange(1, param_min):

182 (start2, ends2) = self.__process(subgraph)

183 self.__patch_ends(ends, start2)

184 ends = ends2

185 if param_min == param_max:

186 return (start, ends)

187

188 midpoints = []

189 for i in xrange(param_min, param_max):

190 midpoint = self.__new_state()

191 self.__patch_ends(ends, midpoint)

192 (start2, ends) = self.__process(subgraph)

193 midpoint.add_epsilon_transition(start2)

194 midpoints.append(midpoint)

195

196 return (start, ends + midpoints)

197

198 def __cat(self, graph):

199 (left, right) = (self.__process(graph[1]), self.__process(graph[2]))

200 self.__patch_ends(left[1], right[0])

201 return (left[0], right[1])

202

203 def __key_state(self, key):

204 state = self.__new_state()

205 state.add_unclosed_transition(key)

206 return (state, [state])

207

208 def __literal(self, graph):

209 return self.__key_state(TransitionKey.single_char(graph[1]))

210

211 def __class(self, graph):

212 return self.__key_state(

213 TransitionKey.character_class(graph, self.__character_classes))

214

215 def __not_class(self, graph):

216 return self.__key_state(

217 TransitionKey.character_class(graph, self.__character_classes))

218

219 def __any(self, graph):

220 return self.__key_state(TransitionKey.any())

221

222 def __epsilon(self, graph):

223 start = self.__new_state()

224 end = self.__new_state()

225 start.close(end)

226 return (start, [end])

227

228 def __action(self, graph):

229 (start, ends) = self.__process(graph[1])

230 action = graph[2]

231 end = self.__new_state()

232 self.__patch_ends(ends, end)

233 end.set_action(action)

234 return (start, [end])

235

236 def __continue(self, graph):

237 (start, ends) = self.__process(graph[1])

238 state = self.__peek_state()

239 if not state['start_node']:

240 state['start_node'] = self.__new_state()

241 self.__patch_ends(ends, state['start_node'])

242 return (start, [])

243

244 def __catch_all(self, graph):

245 return self.__key_state(TransitionKey.unique('catch_all'))

246

247 def __join(self, graph):

248 (graph, name, subgraph, modifier) = graph[1:]

249 subgraphs = self.__peek_state()['subgraphs']

250 if not name in subgraphs:

251 subgraphs[name] = self.__nfa(subgraph)

252 (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]

253 (start, ends) = self.__process(graph)

254 if modifier:

255 assert modifier == 'ZERO_OR_MORE'

256 for end in ends:

257 end.add_epsilon_transition(subgraph_end)

258 self.__patch_ends(ends, subgraph_start)

259 end = self.__new_state()

260 subgraph_end.add_epsilon_transition(end)

261 return (start, [end])

262

263 def __process(self, graph):

264 assert type(graph) == TupleType

265 method = "_NfaBuilder__" + graph[0].lower()

266 if not method in self.__operation_map:

267 matches = filter(lambda (name, func): name == method, self.__members)

268 assert len(matches) == 1

269 self.__operation_map[method] = matches[0][1]

270 return self.__operation_map[method](graph)

271

272 def __patch_ends(self, ends, new_end):

273 for end in ends:

274 end.close(new_end)

275

276 def __push_state(self):

277 self.__states.append({

278 'start_node' : None,

279 'subgraphs' : {},

280 'unpatched_ends' : [],

281 })

282

283 def __pop_state(self):

284 return self.__states.pop()

285

286 def __peek_state(self):

287 return self.__states[len(self.__states) - 1]

288

289 def __nfa(self, graph):

290 start_node_number = self.__node_number

291 self.__push_state()

292 (start, ends) = self.__process(graph)

293 state = self.__pop_state()

294 if state['start_node']:

295 state['start_node'].close(start)

296 start = state['start_node']

297 for k, subgraph in state['subgraphs'].items():

298 subgraph[1].close(None)

299 end = self.__new_state()

300 if self.__states:

301 self.__peek_state()['unpatched_ends'] += state['unpatched_ends']

302 else:

303 self.__patch_ends(state['unpatched_ends'], end)

304 self.__patch_ends(ends, end)

305 return (start, end, self.__node_number - start_node_number)

306

307 def nfa(self, graph):

308 (start, end, nodes_created) = self.__nfa(graph)

309 end.close(None)

310 return Nfa(start, end, nodes_created)

311

312 @staticmethod

313 def add_action(graph, action):

314 return ('ACTION', graph, action)

315

316 @staticmethod

317 def add_continue(graph):

318 return ('CONTINUE', graph)

319

320 @staticmethod

321 def catch_all():

322 return ('CATCH_ALL',)

323

324 @staticmethod

325 def epsilon():

326 return ('EPSILON',)

327

328 @staticmethod

329 def join_subgraph(graph, name, subgraph, modifier):

330 if modifier:

331 modifier = NfaBuilder.__modifer_map[modifier]

332 return ('JOIN', graph, name, subgraph, modifier)

333

334 @staticmethod

335 def or_graphs(graphs):

336 return reduce(lambda acc, g: ('OR', acc, g), graphs)

337

338 @staticmethod

339 def cat_graphs(graphs):

340 return reduce(lambda acc, g: ('CAT', acc, g), graphs)

341

342 __modifer_map = {

343 '+': 'ONE_OR_MORE',

344 '?': 'ZERO_OR_ONE',

345 '*': 'ZERO_OR_MORE',

346 }

347

348 @staticmethod

349 def apply_modifier(modifier, graph):

350 return (NfaBuilder.__modifer_map[modifier], graph)

351

352 class Nfa(Automaton):	129 class Nfa(Automaton):

353	130

354 def __init__(self, start, end, nodes_created):	131 def __init__(self, start, end, nodes_created):

355 super(Nfa, self).__init__()	132 super(Nfa, self).__init__()

356 self.__start = start	133 self.__start = start

357 self.__end = end	134 self.__end = end

358 self.__epsilon_closure_computed = False	135 self.__epsilon_closure_computed = False

359 self.__verify(nodes_created)	136 self.__verify(nodes_created)

360	137

361 def __visit_all_edges(self, visitor, state):	138 def __visit_all_edges(self, visitor, state):

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
434 self.__compute_epsilon_closures()	211 self.__compute_epsilon_closures()

435 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None)	212 self.__visit_all_edges(lambda node, state: node.replace_catch_all(), None)

436 dfa_nodes = {}	213 dfa_nodes = {}

437 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)	214 start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)

438 return (start_name, dfa_nodes)	215 return (start_name, dfa_nodes)

439	216

440 def to_dot(self):	217 def to_dot(self):

441 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)	218 iterator = lambda visitor, state: self.__visit_all_edges(visitor, state)

442 state_iterator = lambda x : x	219 state_iterator = lambda x : x

443 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)	220 return self.generate_dot(self.__start, set([self.__end]), iterator, state_it erator)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/generator.py ('k') | tools/lexer_generator/nfa_builder.py » ('j') | no next file with comments »