tools/lexer_generator/dfa.py - Issue 169523003: Experimental parser: split and rename some files

Side by Side Diff: tools/lexer_generator/dfa.py

Issue 169523003: Experimental parser: split and rename some files (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 from itertools import chain	28 from itertools import chain

29 from automaton import *	29 from automaton import *

30 from transition_keys import TransitionKey	30 from transition_key import TransitionKey

31	31

32 class DfaState(AutomatonState):	32 class DfaState(AutomatonState):

33	33

34 def __init__(self, name, action, transitions):	34 def __init__(self, name, action, transitions):

35 super(DfaState, self).__init__()	35 super(DfaState, self).__init__()

36 self.__name = name	36 self.__name = name

37 self.__transitions = transitions	37 self.__transitions = transitions

38 self.__action = action	38 self.__action = action

39 assert isinstance(action, Action)	39 assert isinstance(action, Action)

40	40

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 assert self.__node_count == state_count	109 assert self.__node_count == state_count

110	110

111 def node_count(self):	111 def node_count(self):

112 return self.__node_count	112 return self.__node_count

113	113

114 def start_state(self):	114 def start_state(self):

115 return self.__start	115 return self.__start

116	116

117 def terminal_set(self):	117 def terminal_set(self):

118 return set(self.__terminal_set)	118 return set(self.__terminal_set)

119

120 def minimize(self):

121 return DfaMinimizer(self).minimize()

122

123 class StatePartition(object):

124

125 def __init__(self, node_numbers):

126 self.__node_numbers = node_numbers

127 assert self.__node_numbers

128 self.__hash = None

129

130 def set(self):

131 return self.__node_numbers

132

133 def __hash__(self):

134 if not self.__hash:

135 self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)

136 return self.__hash

137

138 def __eq__(self, other):

139 return (isinstance(other, self.__class__) and

140 self.__node_numbers == other.__node_numbers)

141

142 def __len__(self):

143 return len(self.__node_numbers)

144

145 def __iter__(self):

146 return self.__node_numbers.__iter__()

147

148 def __str__(self):

149 return str([x for x in self.__node_numbers])

150

151 class DfaMinimizer:

152

153 __verify = True

154

155 @staticmethod

156 def set_verify(verify):

157 DfaMinimizer.__verify = verify

158

159 def __init__(self, dfa):

160 self.__dfa = dfa

161 self.__id_to_state = None

162 self.__state_to_id = None

163 self.__alphabet = None

164

165 def __create_initial_partitions(self):

166 assert not self.__id_to_state

167 assert not self.__state_to_id

168 assert not self.__alphabet

169

170 # For the minimization, we associate each state with an ID. A set of states

171 # is represented as a set of state IDs.

172 id_to_state = {}

173

174 # First we partition the states into the following groups:

175 # - terminal states without action

176 # - nonterminal states without action

177 # - one group per action: terminal states which have the action

178 # - one group per action: nonterminal states which have the action

179 # These are the keys of initial_partitions. The values are lists of state

180 # IDs.

181 initial_partitions = {}

182 terminal_set = self.__dfa.terminal_set()

183 all_keys = [] # Will contain all TransitionKeys in the dfa.

184

185 # f fills in initial_partitions, id_to_state and all_keys.

186 def f(state, visitor_state):

187 state_id = len(id_to_state)

188 id_to_state[state_id] = state

189 action = state.action()

190 all_keys.append(state.key_iter())

191 if state in terminal_set:

192 key = ("terminal set", action)

193 else:

194 # Match actions are valid only if we have matched the whole token, not

195 # just some sub-part of it.

196 # TODO(dcarney): restore

197 # assert not action.is_match_action()

198 key = ("nonterminal set", action)

199 if not key in initial_partitions:

200 initial_partitions[key] = set()

201 initial_partitions[key].add(state_id)

202 self.__dfa.visit_all_states(f)

203 partitions = set()

204 working_set = set()

205

206 # Create StatePartition objects:

207 for k, p in initial_partitions.items():

208 p = StatePartition(p)

209 partitions.add(p)

210 working_set.add(p)

211 state_to_id = {v : k for k, v in id_to_state.items()}

212 assert len(id_to_state) == len(state_to_id)

213 self.__state_to_id = state_to_id

214 self.__id_to_state = id_to_state

215

216 # The alphabet defines the TransitionKeys we need to check when dedicing if

217 # two states of the dfa can be in the same partition. See the doc string of

218 # TransitionKey.disjoint_keys.

219

220 # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet is

221 # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with

222 # TransitionKey [a-d], and state S3 has a transition to S4 with

223 # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This will

224 # become clear when we check the transition for TransitionKey [c-d] (S1 has

225 # a transition to S2, S3 has a transition to S4).

226 encoding = self.__dfa.encoding()

227 self.__alphabet = list(

228 TransitionKey.disjoint_keys(encoding, chain(*all_keys)))

229

230 # For each state and each TransitionKey in the alphabet, find out which

231 # transition we take from the state with the TransitionKey.

232 transitions = {}

233 for state_id, state in id_to_state.items():

234 def f((key_id, key)):

235 transition_state = state.transition_state_for_key(key)

236 if transition_state:

237 return state_to_id[transition_state]

238 return None

239 transitions[state_id] = map(f, enumerate(self.__alphabet))

240 self.__transitions = transitions

241 # verify created structures

242 if self.__verify:

243 for partition in partitions:

244 for state_id in partition:

245 transition_state_array = transitions[state_id]

246 state = id_to_state[state_id]

247 for key_id, key in enumerate(self.__alphabet):

248 transition_state_id = transition_state_array[key_id]

249 transition_state = state.transition_state_for_key(key)

250 if not transition_state:

251 assert transition_state_id == None

252 else:

253 assert transition_state_id != None

254 assert transition_state_id == state_to_id[transition_state]

255 return (working_set, partitions)

256

257 @staticmethod

258 def __partition_count(partitions):

259 return len(reduce(lambda acc, p: acc \| p.set(), partitions, set()))

260

261 def __create_states_from_partitions(self, partitions):

262 '''Creates a new set of states where each state corresponds to a

263 partition.'''

264 id_to_state = self.__id_to_state

265 state_to_id = self.__state_to_id

266 verify = self.__verify

267 partition_to_name = {}

268 state_id_to_partition = {}

269

270 # Fill in partition_to_name and state_id_to_partition.

271 for partition in partitions:

272 partition_to_name[partition] = str(partition)

273 for state_id in partition:

274 state_id_to_partition[state_id] = partition

275 transitions = self.__transitions

276

277 # Create nodes for partitions.

278 partition_name_to_node = {}

279 for partition in partitions:

280 state_ids = list(partition)

281 # state is a representative state for the partition, and state_id is it's

282 # ID. To check the transitions between partitions, it's enough to check

283 # transitions from the representative state. All other states will have

284 # equivalent transitions, that is, transitions which transition into the

285 # same partition.

286 state_id = state_ids[0]

287 state = id_to_state[state_id]

288 node = {

289 'transitions' : {},

290 'terminal' : state in self.__dfa.terminal_set(),

291 'action' : state.action(),

292 }

293 partition_name_to_node[str(partition)] = node

294 transition_key_to_state_id = transitions[state_id]

295 for key_id, key in enumerate(self.__alphabet):

296 transition_state_id = transition_key_to_state_id[key_id]

297 if transition_state_id == None:

298 if verify:

299 # There is no transition for that key from state; check that no

300 # other states in the partition have a transition with that key

301 # either.

302 assert not state.transition_state_for_key(key)

303 for s_id in state_ids:

304 assert not id_to_state[s_id].transition_state_for_key(key)

305 continue

306 transition_partition = state_id_to_partition[transition_state_id]

307 assert transition_partition

308 if verify:

309 # There is a transition for that key from state; check that all other

310 # states in the partition have an equivalent transition (transition

311 # into the same partition).

312 for s_id in state_ids:

313 transition = id_to_state[s_id].transition_state_for_key(key)

314 assert transition

315 test_partition = state_id_to_partition[state_to_id[transition]]

316 assert test_partition == transition_partition

317 # Record the transition between partitions.

318 node['transitions'][key] = partition_to_name[transition_partition]

319 start_id = state_to_id[self.__dfa.start_state()]

320 start_name = partition_to_name[state_id_to_partition[start_id]]

321 return (start_name, partition_name_to_node)

322

323 def minimize(self):

324 '''Minimize the dfa. For the general idea of minimizing automata, see

325 http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to take

326 account the actions associated to stages, i.e., we cannot merge two states

327 which have different actions.'''

328 (working_set, partitions) = self.__create_initial_partitions()

329 node_count = self.__dfa.node_count()

330 id_to_state = self.__id_to_state

331 state_to_id = self.__state_to_id

332 # transitions is a 2-dimensional array indexed by state_id and index of the

333 # TransitionKey in the alphabet.

334 # transitions[state_id][transition_key_ix] = transition_state_id

335 transitions = self.__transitions

336 key_range = range(0, len(self.__alphabet))

337 while working_set:

338 assert working_set <= partitions

339 assert self.__partition_count(partitions) == node_count

340 # We split other partitions according to test_partition: If a partition

341 # contains two states S1 and S2, such that S1 transitions to a state in

342 # test_partition with a TransitionKey K, and S2 transitions to a state

343 # not in test_partition with the same K, then S1 and S2 cannot be in the

344 # same partition.

345 test_partition = iter(working_set).next()

346 working_set.remove(test_partition)

347 state_in_test_partition = [False] * len(id_to_state)

348 for state_id in test_partition:

349 state_in_test_partition[state_id] = True

350 for key_index in key_range:

351 # states_transitioning_to_test_partition will contain the state_ids of

352 # the states (across all partitions) which transition into

353 # test_partition (any state within test_partition) with that key.

354 states_transitioning_to_test_partition = set()

355 for state_id, transition_key_to_state_id in transitions.items():

356 transition_state_id = transition_key_to_state_id[key_index]

357 if (transition_state_id and

358 state_in_test_partition[transition_state_id]):

359 states_transitioning_to_test_partition.add(state_id)

360 if not states_transitioning_to_test_partition:

361 continue

362 # For each partition, we need to split it in two: {states which

363 # transition into test_partition, states which don't}.

364 new_partitions = set()

365 old_partitions = set()

366 for p in partitions:

367 intersection = p.set().intersection(

368 states_transitioning_to_test_partition)

369 if not intersection:

370 continue

371 difference = p.set().difference(

372 states_transitioning_to_test_partition)

373 if not difference:

374 continue

375 intersection = StatePartition(intersection)

376 difference = StatePartition(difference)

377 old_partitions.add(p)

378 new_partitions.add(intersection)

379 new_partitions.add(difference)

380 if p in working_set:

381 working_set.remove(p)

382 working_set.add(intersection)

383 working_set.add(difference)

384 elif len(intersection) <= len(difference):

385 working_set.add(intersection)

386 else:

387 working_set.add(difference)

388 if old_partitions:

389 partitions -= old_partitions

390 if new_partitions:

391 partitions \|= new_partitions

392 (start_name, mapping) = self.__create_states_from_partitions(partitions)

393 return Dfa(self.__dfa.encoding(), start_name, mapping)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/code_generator.py ('k') | tools/lexer_generator/dfa_minimizer.py » ('j') | no next file with comments »