mojo/public/third_party/ply/yacc.py - Issue 814543006: Move //mojo/{public, edk} underneath //third_party

Side by Side Diff: mojo/public/third_party/ply/yacc.py

Issue 814543006: Move //mojo/{public, edk} underneath //third_party (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebase Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # -----------------------------------------------------------------------------

2 # ply: yacc.py

3 #

4 # Copyright (C) 2001-2011,

5 # David M. Beazley (Dabeaz LLC)

6 # All rights reserved.

7 #

8 # Redistribution and use in source and binary forms, with or without

9 # modification, are permitted provided that the following conditions are

10 # met:

11 #

12 # * Redistributions of source code must retain the above copyright notice,

13 # this list of conditions and the following disclaimer.

14 # * Redistributions in binary form must reproduce the above copyright notice,

15 # this list of conditions and the following disclaimer in the documentation

16 # and/or other materials provided with the distribution.

17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to

18 # endorse or promote products derived from this software without

19 # specific prior written permission.

20 #

21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

32 # -----------------------------------------------------------------------------

33 #

34 # This implements an LR parser that is constructed from grammar rules defined

35 # as Python functions. The grammer is specified by supplying the BNF inside

36 # Python documentation strings. The inspiration for this technique was borrowed

37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between

38 # Spark and the GNU bison utility.

39 #

40 # The current implementation is only somewhat object-oriented. The

41 # LR parser itself is defined in terms of an object (which allows multiple

42 # parsers to co-exist). However, most of the variables used during table

43 # construction are defined in terms of global variables. Users shouldn't

44 # notice unless they are trying to define multiple parsers at the same

45 # time using threads (in which case they should have their head examined).

46 #

47 # This implementation supports both SLR and LALR(1) parsing. LALR(1)

48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),

49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,

50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced

51 # by the more efficient DeRemer and Pennello algorithm.

52 #

53 # :::::::: WARNING :::::::

54 #

55 # Construction of LR parsing tables is fairly complicated and expensive.

56 # To make this module run fast, a LOT of work has been put into

57 # optimization---often at the expensive of readability and what might

58 # consider to be good Python "coding style." Modify the code at your

59 # own risk!

60 # ----------------------------------------------------------------------------

61

62 __version__ = "3.4"

63 __tabversion__ = "3.2" # Table version

64

65 #-----------------------------------------------------------------------------

66 # === User configurable parameters ===

67 #

68 # Change these to modify the default behavior of yacc (if you wish)

69 #-----------------------------------------------------------------------------

70

71 yaccdebug = 1 # Debugging mode. If set, yacc generates a

72 # a 'parser.out' file in the current directory

73

74 debug_file = 'parser.out' # Default name of the debugging file

75 tab_module = 'parsetab' # Default name of the table module

76 default_lr = 'LALR' # Default LR table generation method

77

78 error_count = 3 # Number of symbols that must be shifted to leave recovery mode

79

80 yaccdevel = 0 # Set to True if developing yacc. This turns off optimized

81 # implementations of certain functions.

82

83 resultlimit = 40 # Size limit of results when running in debug mod e.

84

85 pickle_protocol = 0 # Protocol to use when writing pickle files

86

87 import re, types, sys, os.path

88

89 # Compatibility function for python 2.6/3.0

90 if sys.version_info[0] < 3:

91 def func_code(f):

92 return f.func_code

93 else:

94 def func_code(f):

95 return f.__code__

96

97 # Compatibility

98 try:

99 MAXINT = sys.maxint

100 except AttributeError:

101 MAXINT = sys.maxsize

102

103 # Python 2.x/3.0 compatibility.

104 def load_ply_lex():

105 if sys.version_info[0] < 3:

106 import lex

107 else:

108 import ply.lex as lex

109 return lex

110

111 # This object is a stand-in for a logging object created by the

112 # logging module. PLY will use this by default to create things

113 # such as the parser.out file. If a user wants more detailed

114 # information, they can create their own logging object and pass

115 # it into PLY.

116

117 class PlyLogger(object):

118 def __init__(self,f):

119 self.f = f

120 def debug(self,msg,args,*kwargs):

121 self.f.write((msg % args) + "\n")

122 info = debug

123

124 def warning(self,msg,args,*kwargs):

125 self.f.write("WARNING: "+ (msg % args) + "\n")

126

127 def error(self,msg,args,*kwargs):

128 self.f.write("ERROR: " + (msg % args) + "\n")

129

130 critical = debug

131

132 # Null logger is used when no output is generated. Does nothing.

133 class NullLogger(object):

134 def __getattribute__(self,name):

135 return self

136 def __call__(self,args,*kwargs):

137 return self

138

139 # Exception raised for yacc-related errors

140 class YaccError(Exception): pass

141

142 # Format the result message that the parser produces when running in debug mode.

143 def format_result(r):

144 repr_str = repr(r)

145 if '\n' in repr_str: repr_str = repr(repr_str)

146 if len(repr_str) > resultlimit:

147 repr_str = repr_str[:resultlimit]+" ..."

148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)

149 return result

150

151

152 # Format stack entries when the parser is running in debug mode

153 def format_stack_entry(r):

154 repr_str = repr(r)

155 if '\n' in repr_str: repr_str = repr(repr_str)

156 if len(repr_str) < 16:

157 return repr_str

158 else:

159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))

160

161 #-----------------------------------------------------------------------------

162 # === LR Parsing Engine ===

163 #

164 # The following classes are used for the LR parser itself. These are not

165 # used during table construction and are independent of the actual LR

166 # table generation algorithm

167 #-----------------------------------------------------------------------------

168

169 # This class is used to hold non-terminal grammar symbols during parsing.

170 # It normally has the following attributes set:

171 # .type = Grammar symbol type

172 # .value = Symbol value

173 # .lineno = Starting line number

174 # .endlineno = Ending line number (optional, set automatically)

175 # .lexpos = Starting lex position

176 # .endlexpos = Ending lex position (optional, set automatically)

177

178 class YaccSymbol:

179 def __str__(self): return self.type

180 def __repr__(self): return str(self)

181

182 # This class is a wrapper around the objects actually passed to each

183 # grammar rule. Index lookup and assignment actually assign the

184 # .value attribute of the underlying YaccSymbol object.

185 # The lineno() method returns the line number of a given

186 # item (or 0 if not defined). The linespan() method returns

187 # a tuple of (startline,endline) representing the range of lines

188 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)

189 # representing the range of positional information for a symbol.

190

191 class YaccProduction:

192 def __init__(self,s,stack=None):

193 self.slice = s

194 self.stack = stack

195 self.lexer = None

196 self.parser= None

197 def __getitem__(self,n):

198 if n >= 0: return self.slice[n].value

199 else: return self.stack[n].value

200

201 def __setitem__(self,n,v):

202 self.slice[n].value = v

203

204 def __getslice__(self,i,j):

205 return [s.value for s in self.slice[i:j]]

206

207 def __len__(self):

208 return len(self.slice)

209

210 def lineno(self,n):

211 return getattr(self.slice[n],"lineno",0)

212

213 def set_lineno(self,n,lineno):

214 self.slice[n].lineno = lineno

215

216 def linespan(self,n):

217 startline = getattr(self.slice[n],"lineno",0)

218 endline = getattr(self.slice[n],"endlineno",startline)

219 return startline,endline

220

221 def lexpos(self,n):

222 return getattr(self.slice[n],"lexpos",0)

223

224 def lexspan(self,n):

225 startpos = getattr(self.slice[n],"lexpos",0)

226 endpos = getattr(self.slice[n],"endlexpos",startpos)

227 return startpos,endpos

228

229 def error(self):

230 raise SyntaxError

231

232

233 # -----------------------------------------------------------------------------

234 # == LRParser ==

235 #

236 # The LR Parsing engine.

237 # -----------------------------------------------------------------------------

238

239 class LRParser:

240 def __init__(self,lrtab,errorf):

241 self.productions = lrtab.lr_productions

242 self.action = lrtab.lr_action

243 self.goto = lrtab.lr_goto

244 self.errorfunc = errorf

245

246 def errok(self):

247 self.errorok = 1

248

249 def restart(self):

250 del self.statestack[:]

251 del self.symstack[:]

252 sym = YaccSymbol()

253 sym.type = '$end'

254 self.symstack.append(sym)

255 self.statestack.append(0)

256

257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

258 if debug or yaccdevel:

259 if isinstance(debug,int):

260 debug = PlyLogger(sys.stderr)

261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)

262 elif tracking:

263 return self.parseopt(input,lexer,debug,tracking,tokenfunc)

264 else:

265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)

266

267

268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

269 # parsedebug().

270 #

271 # This is the debugging enabled version of parse(). All changes made to the

272 # parsing engine should be made here. For the non-debugging version,

273 # copy this code to a method parseopt() and delete all of the sections

274 # enclosed in:

275 #

276 # #--! DEBUG

277 # statements

278 # #--! DEBUG

279 #

280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

281

282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=No ne):

283 lookahead = None # Current lookahead symbol

284 lookaheadstack = [ ] # Stack of lookahead symbols

285 actions = self.action # Local reference to action table (to a void lookup on self.)

286 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

287 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

288 pslice = YaccProduction(None) # Production object passed to grammar r ules

289 errorcount = 0 # Used during error recovery

290

291 # --! DEBUG

292 debug.info("PLY: PARSE DEBUG START")

293 # --! DEBUG

294

295 # If no lexer was given, we will try to use the lex module

296 if not lexer:

297 lex = load_ply_lex()

298 lexer = lex.lexer

299

300 # Set up the lexer and parser objects on pslice

301 pslice.lexer = lexer

302 pslice.parser = self

303

304 # If input was supplied, pass to lexer

305 if input is not None:

306 lexer.input(input)

307

308 if tokenfunc is None:

309 # Tokenize function

310 get_token = lexer.token

311 else:

312 get_token = tokenfunc

313

314 # Set up the state and symbol stacks

315

316 statestack = [ ] # Stack of parsing states

317 self.statestack = statestack

318 symstack = [ ] # Stack of grammar symbols

319 self.symstack = symstack

320

321 pslice.stack = symstack # Put in the production

322 errtoken = None # Err token

323

324 # The start state is assumed to be (0,$end)

325

326 statestack.append(0)

327 sym = YaccSymbol()

328 sym.type = "$end"

329 symstack.append(sym)

330 state = 0

331 while 1:

332 # Get the next symbol on the input. If a lookahead symbol

333 # is already set, we just use that. Otherwise, we'll pull

334 # the next token off of the lookaheadstack or from the lexer

335

336 # --! DEBUG

337 debug.debug('')

338 debug.debug('State : %s', state)

339 # --! DEBUG

340

341 if not lookahead:

342 if not lookaheadstack:

343 lookahead = get_token() # Get the next token

344 else:

345 lookahead = lookaheadstack.pop()

346 if not lookahead:

347 lookahead = YaccSymbol()

348 lookahead.type = "$end"

349

350 # --! DEBUG

351 debug.debug('Stack : %s',

352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]) , str(lookahead))).lstrip())

353 # --! DEBUG

354

355 # Check the action table

356 ltype = lookahead.type

357 t = actions[state].get(ltype)

358

359 if t is not None:

360 if t > 0:

361 # shift a symbol on the stack

362 statestack.append(t)

363 state = t

364

365 # --! DEBUG

366 debug.debug("Action : Shift and goto state %s", t)

367 # --! DEBUG

368

369 symstack.append(lookahead)

370 lookahead = None

371

372 # Decrease error count on successful shift

373 if errorcount: errorcount -=1

374 continue

375

376 if t < 0:

377 # reduce a symbol on the stack, emit a production

378 p = prod[-t]

379 pname = p.name

380 plen = p.len

381

382 # Get production function

383 sym = YaccSymbol()

384 sym.type = pname # Production name

385 sym.value = None

386

387 # --! DEBUG

388 if plen:

389 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[- plen:]])+"]",-t)

390 else:

391 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, [],-t)

392

393 # --! DEBUG

394

395 if plen:

396 targ = symstack[-plen-1:]

397 targ[0] = sym

398

399 # --! TRACKING

400 if tracking:

401 t1 = targ[1]

402 sym.lineno = t1.lineno

403 sym.lexpos = t1.lexpos

404 t1 = targ[-1]

405 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

407

408 # --! TRACKING

409

410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

411 # The code enclosed in this section is duplicated

412 # below as a performance optimization. Make sure

413 # changes get made in both locations.

414

415 pslice.slice = targ

416

417 try:

418 # Call the grammar rule with our special slice objec t

419 del symstack[-plen:]

420 del statestack[-plen:]

421 p.callable(pslice)

422 # --! DEBUG

423 debug.info("Result : %s", format_result(pslice[0]))

424 # --! DEBUG

425 symstack.append(sym)

426 state = goto[statestack[-1]][pname]

427 statestack.append(state)

428 except SyntaxError:

429 # If an error was set. Enter error recovery state

430 lookaheadstack.append(lookahead)

431 symstack.pop()

432 statestack.pop()

433 state = statestack[-1]

434 sym.type = 'error'

435 lookahead = sym

436 errorcount = error_count

437 self.errorok = 0

438 continue

439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

440

441 else:

442

443 # --! TRACKING

444 if tracking:

445 sym.lineno = lexer.lineno

446 sym.lexpos = lexer.lexpos

447 # --! TRACKING

448

449 targ = [ sym ]

450

451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

452 # The code enclosed in this section is duplicated

453 # above as a performance optimization. Make sure

454 # changes get made in both locations.

455

456 pslice.slice = targ

457

458 try:

459 # Call the grammar rule with our special slice objec t

460 p.callable(pslice)

461 # --! DEBUG

462 debug.info("Result : %s", format_result(pslice[0]))

463 # --! DEBUG

464 symstack.append(sym)

465 state = goto[statestack[-1]][pname]

466 statestack.append(state)

467 except SyntaxError:

468 # If an error was set. Enter error recovery state

469 lookaheadstack.append(lookahead)

470 symstack.pop()

471 statestack.pop()

472 state = statestack[-1]

473 sym.type = 'error'

474 lookahead = sym

475 errorcount = error_count

476 self.errorok = 0

477 continue

478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

479

480 if t == 0:

481 n = symstack[-1]

482 result = getattr(n,"value",None)

483 # --! DEBUG

484 debug.info("Done : Returning %s", format_result(result))

485 debug.info("PLY: PARSE DEBUG END")

486 # --! DEBUG

487 return result

488

489 if t == None:

490

491 # --! DEBUG

492 debug.error('Error : %s',

493 ("%s . %s" % (" ".join([xx.type for xx in symstack][ 1:]), str(lookahead))).lstrip())

494 # --! DEBUG

495

496 # We have some kind of parsing error here. To handle

497 # this, we are going to push the current token onto

498 # the tokenstack and replace it with an 'error' token.

499 # If there are any synchronization rules, they may

500 # catch it.

501 #

502 # In addition to pushing the error token, we call call

503 # the user defined p_error() function if this is the

504 # first syntax error. This function is only called if

505 # errorcount == 0.

506 if errorcount == 0 or self.errorok:

507 errorcount = error_count

508 self.errorok = 0

509 errtoken = lookahead

510 if errtoken.type == "$end":

511 errtoken = None # End of file!

512 if self.errorfunc:

513 global errok,token,restart

514 errok = self.errok # Set some special functions a vailable in error recovery

515 token = get_token

516 restart = self.restart

517 if errtoken and not hasattr(errtoken,'lexer'):

518 errtoken.lexer = lexer

519 tok = self.errorfunc(errtoken)

520 del errok, token, restart # Delete special functions

521

522 if self.errorok:

523 # User must have done some kind of panic

524 # mode recovery on their own. The

525 # returned token is the next lookahead

526 lookahead = tok

527 errtoken = None

528 continue

529 else:

530 if errtoken:

531 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

532 else: lineno = 0

533 if lineno:

534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

535 else:

536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

537 else:

538 sys.stderr.write("yacc: Parse error in input. EOF\n" )

539 return

540

541 else:

542 errorcount = error_count

543

544 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

545 # entire parse has been rolled back and we're completely hosed. The token is

546 # discarded and we just keep going.

547

548 if len(statestack) <= 1 and lookahead.type != "$end":

549 lookahead = None

550 errtoken = None

551 state = 0

552 # Nuke the pushback stack

553 del lookaheadstack[:]

554 continue

555

556 # case 2: the statestack has a couple of entries on it, but we'r e

557 # at the end of the file. nuke the top entry and generate an err or token

558

559 # Start nuking entries on the stack

560 if lookahead.type == "$end":

561 # Whoa. We're really hosed here. Bail out

562 return

563

564 if lookahead.type != 'error':

565 sym = symstack[-1]

566 if sym.type == 'error':

567 # Hmmm. Error is on top of stack, we'll just nuke input

568 # symbol and continue

569 lookahead = None

570 continue

571 t = YaccSymbol()

572 t.type = 'error'

573 if hasattr(lookahead,"lineno"):

574 t.lineno = lookahead.lineno

575 t.value = lookahead

576 lookaheadstack.append(lookahead)

577 lookahead = t

578 else:

579 symstack.pop()

580 statestack.pop()

581 state = statestack[-1] # Potential bug fix

582

583 continue

584

585 # Call an error function here

586 raise RuntimeError("yacc: internal parser error!!!\n")

587

588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

589 # parseopt().

590 #

591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.

592 # Edit the debug version above, then copy any modifications to the method

593 # below while removing #--! DEBUG sections.

594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

595

596

597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

598 lookahead = None # Current lookahead symbol

599 lookaheadstack = [ ] # Stack of lookahead symbols

600 actions = self.action # Local reference to action table (to a void lookup on self.)

601 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

602 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

603 pslice = YaccProduction(None) # Production object passed to grammar r ules

604 errorcount = 0 # Used during error recovery

605

606 # If no lexer was given, we will try to use the lex module

607 if not lexer:

608 lex = load_ply_lex()

609 lexer = lex.lexer

610

611 # Set up the lexer and parser objects on pslice

612 pslice.lexer = lexer

613 pslice.parser = self

614

615 # If input was supplied, pass to lexer

616 if input is not None:

617 lexer.input(input)

618

619 if tokenfunc is None:

620 # Tokenize function

621 get_token = lexer.token

622 else:

623 get_token = tokenfunc

624

625 # Set up the state and symbol stacks

626

627 statestack = [ ] # Stack of parsing states

628 self.statestack = statestack

629 symstack = [ ] # Stack of grammar symbols

630 self.symstack = symstack

631

632 pslice.stack = symstack # Put in the production

633 errtoken = None # Err token

634

635 # The start state is assumed to be (0,$end)

636

637 statestack.append(0)

638 sym = YaccSymbol()

639 sym.type = '$end'

640 symstack.append(sym)

641 state = 0

642 while 1:

643 # Get the next symbol on the input. If a lookahead symbol

644 # is already set, we just use that. Otherwise, we'll pull

645 # the next token off of the lookaheadstack or from the lexer

646

647 if not lookahead:

648 if not lookaheadstack:

649 lookahead = get_token() # Get the next token

650 else:

651 lookahead = lookaheadstack.pop()

652 if not lookahead:

653 lookahead = YaccSymbol()

654 lookahead.type = '$end'

655

656 # Check the action table

657 ltype = lookahead.type

658 t = actions[state].get(ltype)

659

660 if t is not None:

661 if t > 0:

662 # shift a symbol on the stack

663 statestack.append(t)

664 state = t

665

666 symstack.append(lookahead)

667 lookahead = None

668

669 # Decrease error count on successful shift

670 if errorcount: errorcount -=1

671 continue

672

673 if t < 0:

674 # reduce a symbol on the stack, emit a production

675 p = prod[-t]

676 pname = p.name

677 plen = p.len

678

679 # Get production function

680 sym = YaccSymbol()

681 sym.type = pname # Production name

682 sym.value = None

683

684 if plen:

685 targ = symstack[-plen-1:]

686 targ[0] = sym

687

688 # --! TRACKING

689 if tracking:

690 t1 = targ[1]

691 sym.lineno = t1.lineno

692 sym.lexpos = t1.lexpos

693 t1 = targ[-1]

694 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

696

697 # --! TRACKING

698

699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

700 # The code enclosed in this section is duplicated

701 # below as a performance optimization. Make sure

702 # changes get made in both locations.

703

704 pslice.slice = targ

705

706 try:

707 # Call the grammar rule with our special slice objec t

708 del symstack[-plen:]

709 del statestack[-plen:]

710 p.callable(pslice)

711 symstack.append(sym)

712 state = goto[statestack[-1]][pname]

713 statestack.append(state)

714 except SyntaxError:

715 # If an error was set. Enter error recovery state

716 lookaheadstack.append(lookahead)

717 symstack.pop()

718 statestack.pop()

719 state = statestack[-1]

720 sym.type = 'error'

721 lookahead = sym

722 errorcount = error_count

723 self.errorok = 0

724 continue

725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

726

727 else:

728

729 # --! TRACKING

730 if tracking:

731 sym.lineno = lexer.lineno

732 sym.lexpos = lexer.lexpos

733 # --! TRACKING

734

735 targ = [ sym ]

736

737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

738 # The code enclosed in this section is duplicated

739 # above as a performance optimization. Make sure

740 # changes get made in both locations.

741

742 pslice.slice = targ

743

744 try:

745 # Call the grammar rule with our special slice objec t

746 p.callable(pslice)

747 symstack.append(sym)

748 state = goto[statestack[-1]][pname]

749 statestack.append(state)

750 except SyntaxError:

751 # If an error was set. Enter error recovery state

752 lookaheadstack.append(lookahead)

753 symstack.pop()

754 statestack.pop()

755 state = statestack[-1]

756 sym.type = 'error'

757 lookahead = sym

758 errorcount = error_count

759 self.errorok = 0

760 continue

761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

762

763 if t == 0:

764 n = symstack[-1]

765 return getattr(n,"value",None)

766

767 if t == None:

768

769 # We have some kind of parsing error here. To handle

770 # this, we are going to push the current token onto

771 # the tokenstack and replace it with an 'error' token.

772 # If there are any synchronization rules, they may

773 # catch it.

774 #

775 # In addition to pushing the error token, we call call

776 # the user defined p_error() function if this is the

777 # first syntax error. This function is only called if

778 # errorcount == 0.

779 if errorcount == 0 or self.errorok:

780 errorcount = error_count

781 self.errorok = 0

782 errtoken = lookahead

783 if errtoken.type == '$end':

784 errtoken = None # End of file!

785 if self.errorfunc:

786 global errok,token,restart

787 errok = self.errok # Set some special functions a vailable in error recovery

788 token = get_token

789 restart = self.restart

790 if errtoken and not hasattr(errtoken,'lexer'):

791 errtoken.lexer = lexer

792 tok = self.errorfunc(errtoken)

793 del errok, token, restart # Delete special functions

794

795 if self.errorok:

796 # User must have done some kind of panic

797 # mode recovery on their own. The

798 # returned token is the next lookahead

799 lookahead = tok

800 errtoken = None

801 continue

802 else:

803 if errtoken:

804 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

805 else: lineno = 0

806 if lineno:

807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

808 else:

809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

810 else:

811 sys.stderr.write("yacc: Parse error in input. EOF\n" )

812 return

813

814 else:

815 errorcount = error_count

816

817 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

818 # entire parse has been rolled back and we're completely hosed. The token is

819 # discarded and we just keep going.

820

821 if len(statestack) <= 1 and lookahead.type != '$end':

822 lookahead = None

823 errtoken = None

824 state = 0

825 # Nuke the pushback stack

826 del lookaheadstack[:]

827 continue

828

829 # case 2: the statestack has a couple of entries on it, but we'r e

830 # at the end of the file. nuke the top entry and generate an err or token

831

832 # Start nuking entries on the stack

833 if lookahead.type == '$end':

834 # Whoa. We're really hosed here. Bail out

835 return

836

837 if lookahead.type != 'error':

838 sym = symstack[-1]

839 if sym.type == 'error':

840 # Hmmm. Error is on top of stack, we'll just nuke input

841 # symbol and continue

842 lookahead = None

843 continue

844 t = YaccSymbol()

845 t.type = 'error'

846 if hasattr(lookahead,"lineno"):

847 t.lineno = lookahead.lineno

848 t.value = lookahead

849 lookaheadstack.append(lookahead)

850 lookahead = t

851 else:

852 symstack.pop()

853 statestack.pop()

854 state = statestack[-1] # Potential bug fix

855

856 continue

857

858 # Call an error function here

859 raise RuntimeError("yacc: internal parser error!!!\n")

860

861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

862 # parseopt_notrack().

863 #

864 # Optimized version of parseopt() with line number tracking removed.

865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove

866 # code in the #--! TRACKING sections

867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

868

869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc =None):

870 lookahead = None # Current lookahead symbol

871 lookaheadstack = [ ] # Stack of lookahead symbols

872 actions = self.action # Local reference to action table (to a void lookup on self.)

873 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

874 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

875 pslice = YaccProduction(None) # Production object passed to grammar r ules

876 errorcount = 0 # Used during error recovery

877

878 # If no lexer was given, we will try to use the lex module

879 if not lexer:

880 lex = load_ply_lex()

881 lexer = lex.lexer

882

883 # Set up the lexer and parser objects on pslice

884 pslice.lexer = lexer

885 pslice.parser = self

886

887 # If input was supplied, pass to lexer

888 if input is not None:

889 lexer.input(input)

890

891 if tokenfunc is None:

892 # Tokenize function

893 get_token = lexer.token

894 else:

895 get_token = tokenfunc

896

897 # Set up the state and symbol stacks

898

899 statestack = [ ] # Stack of parsing states

900 self.statestack = statestack

901 symstack = [ ] # Stack of grammar symbols

902 self.symstack = symstack

903

904 pslice.stack = symstack # Put in the production

905 errtoken = None # Err token

906

907 # The start state is assumed to be (0,$end)

908

909 statestack.append(0)

910 sym = YaccSymbol()

911 sym.type = '$end'

912 symstack.append(sym)

913 state = 0

914 while 1:

915 # Get the next symbol on the input. If a lookahead symbol

916 # is already set, we just use that. Otherwise, we'll pull

917 # the next token off of the lookaheadstack or from the lexer

918

919 if not lookahead:

920 if not lookaheadstack:

921 lookahead = get_token() # Get the next token

922 else:

923 lookahead = lookaheadstack.pop()

924 if not lookahead:

925 lookahead = YaccSymbol()

926 lookahead.type = '$end'

927

928 # Check the action table

929 ltype = lookahead.type

930 t = actions[state].get(ltype)

931

932 if t is not None:

933 if t > 0:

934 # shift a symbol on the stack

935 statestack.append(t)

936 state = t

937

938 symstack.append(lookahead)

939 lookahead = None

940

941 # Decrease error count on successful shift

942 if errorcount: errorcount -=1

943 continue

944

945 if t < 0:

946 # reduce a symbol on the stack, emit a production

947 p = prod[-t]

948 pname = p.name

949 plen = p.len

950

951 # Get production function

952 sym = YaccSymbol()

953 sym.type = pname # Production name

954 sym.value = None

955

956 if plen:

957 targ = symstack[-plen-1:]

958 targ[0] = sym

959

960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

961 # The code enclosed in this section is duplicated

962 # below as a performance optimization. Make sure

963 # changes get made in both locations.

964

965 pslice.slice = targ

966

967 try:

968 # Call the grammar rule with our special slice objec t

969 del symstack[-plen:]

970 del statestack[-plen:]

971 p.callable(pslice)

972 symstack.append(sym)

973 state = goto[statestack[-1]][pname]

974 statestack.append(state)

975 except SyntaxError:

976 # If an error was set. Enter error recovery state

977 lookaheadstack.append(lookahead)

978 symstack.pop()

979 statestack.pop()

980 state = statestack[-1]

981 sym.type = 'error'

982 lookahead = sym

983 errorcount = error_count

984 self.errorok = 0

985 continue

986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

987

988 else:

989

990 targ = [ sym ]

991

992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

993 # The code enclosed in this section is duplicated

994 # above as a performance optimization. Make sure

995 # changes get made in both locations.

996

997 pslice.slice = targ

998

999 try:

1000 # Call the grammar rule with our special slice objec t

1001 p.callable(pslice)

1002 symstack.append(sym)

1003 state = goto[statestack[-1]][pname]

1004 statestack.append(state)

1005 except SyntaxError:

1006 # If an error was set. Enter error recovery state

1007 lookaheadstack.append(lookahead)

1008 symstack.pop()

1009 statestack.pop()

1010 state = statestack[-1]

1011 sym.type = 'error'

1012 lookahead = sym

1013 errorcount = error_count

1014 self.errorok = 0

1015 continue

1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

1017

1018 if t == 0:

1019 n = symstack[-1]

1020 return getattr(n,"value",None)

1021

1022 if t == None:

1023

1024 # We have some kind of parsing error here. To handle

1025 # this, we are going to push the current token onto

1026 # the tokenstack and replace it with an 'error' token.

1027 # If there are any synchronization rules, they may

1028 # catch it.

1029 #

1030 # In addition to pushing the error token, we call call

1031 # the user defined p_error() function if this is the

1032 # first syntax error. This function is only called if

1033 # errorcount == 0.

1034 if errorcount == 0 or self.errorok:

1035 errorcount = error_count

1036 self.errorok = 0

1037 errtoken = lookahead

1038 if errtoken.type == '$end':

1039 errtoken = None # End of file!

1040 if self.errorfunc:

1041 global errok,token,restart

1042 errok = self.errok # Set some special functions a vailable in error recovery

1043 token = get_token

1044 restart = self.restart

1045 if errtoken and not hasattr(errtoken,'lexer'):

1046 errtoken.lexer = lexer

1047 tok = self.errorfunc(errtoken)

1048 del errok, token, restart # Delete special functions

1049

1050 if self.errorok:

1051 # User must have done some kind of panic

1052 # mode recovery on their own. The

1053 # returned token is the next lookahead

1054 lookahead = tok

1055 errtoken = None

1056 continue

1057 else:

1058 if errtoken:

1059 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

1060 else: lineno = 0

1061 if lineno:

1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

1063 else:

1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

1065 else:

1066 sys.stderr.write("yacc: Parse error in input. EOF\n" )

1067 return

1068

1069 else:

1070 errorcount = error_count

1071

1072 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

1073 # entire parse has been rolled back and we're completely hosed. The token is

1074 # discarded and we just keep going.

1075

1076 if len(statestack) <= 1 and lookahead.type != '$end':

1077 lookahead = None

1078 errtoken = None

1079 state = 0

1080 # Nuke the pushback stack

1081 del lookaheadstack[:]

1082 continue

1083

1084 # case 2: the statestack has a couple of entries on it, but we'r e

1085 # at the end of the file. nuke the top entry and generate an err or token

1086

1087 # Start nuking entries on the stack

1088 if lookahead.type == '$end':

1089 # Whoa. We're really hosed here. Bail out

1090 return

1091

1092 if lookahead.type != 'error':

1093 sym = symstack[-1]

1094 if sym.type == 'error':

1095 # Hmmm. Error is on top of stack, we'll just nuke input

1096 # symbol and continue

1097 lookahead = None

1098 continue

1099 t = YaccSymbol()

1100 t.type = 'error'

1101 if hasattr(lookahead,"lineno"):

1102 t.lineno = lookahead.lineno

1103 t.value = lookahead

1104 lookaheadstack.append(lookahead)

1105 lookahead = t

1106 else:

1107 symstack.pop()

1108 statestack.pop()

1109 state = statestack[-1] # Potential bug fix

1110

1111 continue

1112

1113 # Call an error function here

1114 raise RuntimeError("yacc: internal parser error!!!\n")

1115

1116 # -----------------------------------------------------------------------------

1117 # === Grammar Representation ===

1118 #

1119 # The following functions, classes, and variables are used to represent and

1120 # manipulate the rules that make up a grammar.

1121 # -----------------------------------------------------------------------------

1122

1123 import re

1124

1125 # regex matching identifiers

1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

1127

1128 # -----------------------------------------------------------------------------

1129 # class Production:

1130 #

1131 # This class stores the raw information about a single production or grammar rul e.

1132 # A grammar rule refers to a specification such as this:

1133 #

1134 # expr : expr PLUS term

1135 #

1136 # Here are the basic attributes defined on all productions

1137 #

1138 # name - Name of the production. For example 'expr'

1139 # prod - A list of symbols on the right side ['expr','PLUS','term']

1140 # prec - Production precedence level

1141 # number - Production number.

1142 # func - Function that executes on reduce

1143 # file - File where production function is defined

1144 # lineno - Line number where production function is defined

1145 #

1146 # The following attributes are defined or optional.

1147 #

1148 # len - Length of the production (number of symbols on right hand si de)

1149 # usyms - Set of unique symbols found in the production

1150 # -----------------------------------------------------------------------------

1151

1152 class Production(object):

1153 reduced = 0

1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='', line=0):

1155 self.name = name

1156 self.prod = tuple(prod)

1157 self.number = number

1158 self.func = func

1159 self.callable = None

1160 self.file = file

1161 self.line = line

1162 self.prec = precedence

1163

1164 # Internal settings used during table construction

1165

1166 self.len = len(self.prod) # Length of the production

1167

1168 # Create a list of unique production symbols used in the production

1169 self.usyms = [ ]

1170 for s in self.prod:

1171 if s not in self.usyms:

1172 self.usyms.append(s)

1173

1174 # List of all LR items for the production

1175 self.lr_items = []

1176 self.lr_next = None

1177

1178 # Create a string representation

1179 if self.prod:

1180 self.str = "%s -> %s" % (self.name," ".join(self.prod))

1181 else:

1182 self.str = "%s -> <empty>" % self.name

1183

1184 def __str__(self):

1185 return self.str

1186

1187 def __repr__(self):

1188 return "Production("+str(self)+")"

1189

1190 def __len__(self):

1191 return len(self.prod)

1192

1193 def __nonzero__(self):

1194 return 1

1195

1196 def __getitem__(self,index):

1197 return self.prod[index]

1198

1199 # Return the nth lr_item from the production (or None if at the end)

1200 def lr_item(self,n):

1201 if n > len(self.prod): return None

1202 p = LRItem(self,n)

1203

1204 # Precompute the list of productions immediately following. Hack. Remov e later

1205 try:

1206 p.lr_after = Prodnames[p.prod[n+1]]

1207 except (IndexError,KeyError):

1208 p.lr_after = []

1209 try:

1210 p.lr_before = p.prod[n-1]

1211 except IndexError:

1212 p.lr_before = None

1213

1214 return p

1215

1216 # Bind the production function name to a callable

1217 def bind(self,pdict):

1218 if self.func:

1219 self.callable = pdict[self.func]

1220

1221 # This class serves as a minimal standin for Production objects when

1222 # reading table data from files. It only contains information

1223 # actually used by the LR parsing engine, plus some additional

1224 # debugging information.

1225 class MiniProduction(object):

1226 def __init__(self,str,name,len,func,file,line):

1227 self.name = name

1228 self.len = len

1229 self.func = func

1230 self.callable = None

1231 self.file = file

1232 self.line = line

1233 self.str = str

1234 def __str__(self):

1235 return self.str

1236 def __repr__(self):

1237 return "MiniProduction(%s)" % self.str

1238

1239 # Bind the production function name to a callable

1240 def bind(self,pdict):

1241 if self.func:

1242 self.callable = pdict[self.func]

1243

1244

1245 # -----------------------------------------------------------------------------

1246 # class LRItem

1247 #

1248 # This class represents a specific stage of parsing a production rule. For

1249 # example:

1250 #

1251 # expr : expr . PLUS term

1252 #

1253 # In the above, the "." represents the current location of the parse. Here

1254 # basic attributes:

1255 #

1256 # name - Name of the production. For example 'expr'

1257 # prod - A list of symbols on the right side ['expr','.', 'PLUS','te rm']

1258 # number - Production number.

1259 #

1260 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term '

1261 # then lr_next refers to 'expr -> expr PLUS . term'

1262 # lr_index - LR item index (location of the ".") in the prod list.

1263 # lookaheads - LALR lookahead symbols for this item

1264 # len - Length of the production (number of symbols on right hand s ide)

1265 # lr_after - List of all productions that immediately follow

1266 # lr_before - Grammar symbol immediately before

1267 # -----------------------------------------------------------------------------

1268

1269 class LRItem(object):

1270 def __init__(self,p,n):

1271 self.name = p.name

1272 self.prod = list(p.prod)

1273 self.number = p.number

1274 self.lr_index = n

1275 self.lookaheads = { }

1276 self.prod.insert(n,".")

1277 self.prod = tuple(self.prod)

1278 self.len = len(self.prod)

1279 self.usyms = p.usyms

1280

1281 def __str__(self):

1282 if self.prod:

1283 s = "%s -> %s" % (self.name," ".join(self.prod))

1284 else:

1285 s = "%s -> <empty>" % self.name

1286 return s

1287

1288 def __repr__(self):

1289 return "LRItem("+str(self)+")"

1290

1291 # -----------------------------------------------------------------------------

1292 # rightmost_terminal()

1293 #

1294 # Return the rightmost terminal from a list of symbols. Used in add_production( )

1295 # -----------------------------------------------------------------------------

1296 def rightmost_terminal(symbols, terminals):

1297 i = len(symbols) - 1

1298 while i >= 0:

1299 if symbols[i] in terminals:

1300 return symbols[i]

1301 i -= 1

1302 return None

1303

1304 # -----------------------------------------------------------------------------

1305 # === GRAMMAR CLASS ===

1306 #

1307 # The following class represents the contents of the specified grammar along

1308 # with various computed properties such as first sets, follow sets, LR items, et c.

1309 # This data is used for critical parts of the table generation process later.

1310 # -----------------------------------------------------------------------------

1311

1312 class GrammarError(YaccError): pass

1313

1314 class Grammar(object):

1315 def __init__(self,terminals):

1316 self.Productions = [None] # A list of all of the productions. The fir st

1317 # entry is always reserved for the purpose o f

1318 # building an augmented grammar

1319

1320 self.Prodnames = { } # A dictionary mapping the names of nontermi nals to a list of all

1321 # productions of that nonterminal.

1322

1323 self.Prodmap = { } # A dictionary that is only used to detect d uplicate

1324 # productions.

1325

1326 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a

1327 # list of the rules where they are used.

1328

1329 for term in terminals:

1330 self.Terminals[term] = []

1331

1332 self.Terminals['error'] = []

1333

1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list

1335 # of rule numbers where they are used.

1336

1337 self.First = { } # A dictionary of precomputed FIRST(x) symbo ls

1338

1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symb ols

1340

1341 self.Precedence = { } # Precedence rules for each terminal. Contai ns tuples of the

1342 # form ('right',level) or ('nonassoc', level ) or ('left',level)

1343

1344 self.UsedPrecedence = { } # Precedence rules that were actually used b y the grammer.

1345 # This is only used to provide error checkin g and to generate

1346 # a warning about unused precedence rules.

1347

1348 self.Start = None # Starting symbol for the grammar

1349

1350

1351 def __len__(self):

1352 return len(self.Productions)

1353

1354 def __getitem__(self,index):

1355 return self.Productions[index]

1356

1357 # -------------------------------------------------------------------------- ---

1358 # set_precedence()

1359 #

1360 # Sets the precedence for a given terminal. assoc is the associativity such as

1361 # 'left','right', or 'nonassoc'. level is a numeric level.

1362 #

1363 # -------------------------------------------------------------------------- ---

1364

1365 def set_precedence(self,term,assoc,level):

1366 assert self.Productions == [None],"Must call set_precedence() before add _production()"

1367 if term in self.Precedence:

1368 raise GrammarError("Precedence already specified for terminal '%s'" % term)

1369 if assoc not in ['left','right','nonassoc']:

1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

1371 self.Precedence[term] = (assoc,level)

1372

1373 # -------------------------------------------------------------------------- ---

1374 # add_production()

1375 #

1376 # Given an action function, this function assembles a production rule and

1377 # computes its precedence level.

1378 #

1379 # The production rule is supplied as a list of symbols. For example,

1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

1381 # symbols ['expr','PLUS','term'].

1382 #

1383 # Precedence is determined by the precedence of the right-most non-terminal

1384 # or the precedence of a terminal specified by %prec.

1385 #

1386 # A variety of error checks are performed to make sure production symbols

1387 # are valid and that %prec is used correctly.

1388 # -------------------------------------------------------------------------- ---

1389

1390 def add_production(self,prodname,syms,func=None,file='',line=0):

1391

1392 if prodname in self.Terminals:

1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined a s a token" % (file,line,prodname))

1394 if prodname == 'error':

1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserv ed word" % (file,line,prodname))

1396 if not _is_identifier.match(prodname):

1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prod name))

1398

1399 # Look for literal tokens

1400 for n,s in enumerate(syms):

1401 if s[0] in "'\"":

1402 try:

1403 c = eval(s)

1404 if (len(c) > 1):

1405 raise GrammarError("%s:%d: Literal token %s in rule '% s' may only be a single character" % (file,line,s, prodname))

1406 if not c in self.Terminals:

1407 self.Terminals[c] = []

1408 syms[n] = c

1409 continue

1410 except SyntaxError:

1411 pass

1412 if not _is_identifier.match(s) and s != '%prec':

1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (fi le,line,s, prodname))

1414

1415 # Determine the precedence level

1416 if '%prec' in syms:

1417 if syms[-1] == '%prec':

1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))

1419 if syms[-2] != '%prec':

1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))

1421 precname = syms[-1]

1422 prodprec = self.Precedence.get(precname,None)

1423 if not prodprec:

1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))

1425 else:

1426 self.UsedPrecedence[precname] = 1

1427 del syms[-2:] # Drop %prec from the rule

1428 else:

1429 # If no %prec, precedence is determined by the rightmost terminal sy mbol

1430 precname = rightmost_terminal(syms,self.Terminals)

1431 prodprec = self.Precedence.get(precname,('right',0))

1432

1433 # See if the rule is already in the rulemap

1434 map = "%s -> %s" % (prodname,syms)

1435 if map in self.Prodmap:

1436 m = self.Prodmap[map]

1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +

1438 "Previous definition at %s:%d" % (m.file, m.line) )

1439

1440 # From this point on, everything is valid. Create a new Production inst ance

1441 pnumber = len(self.Productions)

1442 if not prodname in self.Nonterminals:

1443 self.Nonterminals[prodname] = [ ]

1444

1445 # Add the production number to Terminals and Nonterminals

1446 for t in syms:

1447 if t in self.Terminals:

1448 self.Terminals[t].append(pnumber)

1449 else:

1450 if not t in self.Nonterminals:

1451 self.Nonterminals[t] = [ ]

1452 self.Nonterminals[t].append(pnumber)

1453

1454 # Create a production and add it to the list of productions

1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line)

1456 self.Productions.append(p)

1457 self.Prodmap[map] = p

1458

1459 # Add to the global productions list

1460 try:

1461 self.Prodnames[prodname].append(p)

1462 except KeyError:

1463 self.Prodnames[prodname] = [ p ]

1464 return 0

1465

1466 # -------------------------------------------------------------------------- ---

1467 # set_start()

1468 #

1469 # Sets the starting symbol and creates the augmented grammar. Production

1470 # rule 0 is S' -> start where start is the start symbol.

1471 # -------------------------------------------------------------------------- ---

1472

1473 def set_start(self,start=None):

1474 if not start:

1475 start = self.Productions[1].name

1476 if start not in self.Nonterminals:

1477 raise GrammarError("start symbol %s undefined" % start)

1478 self.Productions[0] = Production(0,"S'",[start])

1479 self.Nonterminals[start].append(0)

1480 self.Start = start

1481

1482 # -------------------------------------------------------------------------- ---

1483 # find_unreachable()

1484 #

1485 # Find all of the nonterminal symbols that can't be reached from the startin g

1486 # symbol. Returns a list of nonterminals that can't be reached.

1487 # -------------------------------------------------------------------------- ---

1488

1489 def find_unreachable(self):

1490

1491 # Mark all symbols that are reachable from a symbol s

1492 def mark_reachable_from(s):

1493 if reachable[s]:

1494 # We've already reached symbol s.

1495 return

1496 reachable[s] = 1

1497 for p in self.Prodnames.get(s,[]):

1498 for r in p.prod:

1499 mark_reachable_from(r)

1500

1501 reachable = { }

1502 for s in list(self.Terminals) + list(self.Nonterminals):

1503 reachable[s] = 0

1504

1505 mark_reachable_from( self.Productions[0].prod[0] )

1506

1507 return [s for s in list(self.Nonterminals)

1508 if not reachable[s]]

1509

1510 # -------------------------------------------------------------------------- ---

1511 # infinite_cycles()

1512 #

1513 # This function looks at the various parsing rules and tries to detect

1514 # infinite recursion cycles (grammar rules where there is no possible way

1515 # to derive a string of only terminals).

1516 # -------------------------------------------------------------------------- ---

1517

1518 def infinite_cycles(self):

1519 terminates = {}

1520

1521 # Terminals:

1522 for t in self.Terminals:

1523 terminates[t] = 1

1524

1525 terminates['$end'] = 1

1526

1527 # Nonterminals:

1528

1529 # Initialize to false:

1530 for n in self.Nonterminals:

1531 terminates[n] = 0

1532

1533 # Then propagate termination until no change:

1534 while 1:

1535 some_change = 0

1536 for (n,pl) in self.Prodnames.items():

1537 # Nonterminal n terminates iff any of its productions terminates .

1538 for p in pl:

1539 # Production p terminates iff all of its rhs symbols termina te.

1540 for s in p.prod:

1541 if not terminates[s]:

1542 # The symbol s does not terminate,

1543 # so production p does not terminate.

1544 p_terminates = 0

1545 break

1546 else:

1547 # didn't break from the loop,

1548 # so every symbol s terminates

1549 # so production p terminates.

1550 p_terminates = 1

1551

1552 if p_terminates:

1553 # symbol n terminates!

1554 if not terminates[n]:

1555 terminates[n] = 1

1556 some_change = 1

1557 # Don't need to consider any more productions for this n .

1558 break

1559

1560 if not some_change:

1561 break

1562

1563 infinite = []

1564 for (s,term) in terminates.items():

1565 if not term:

1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

1567 # s is used-but-not-defined, and we've already warned of tha t,

1568 # so it would be overkill to say that it's also non-terminat ing.

1569 pass

1570 else:

1571 infinite.append(s)

1572

1573 return infinite

1574

1575

1576 # -------------------------------------------------------------------------- ---

1577 # undefined_symbols()

1578 #

1579 # Find all symbols that were used the grammar, but not defined as tokens or

1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symb ol

1581 # and prod is the production where the symbol was used.

1582 # -------------------------------------------------------------------------- ---

1583 def undefined_symbols(self):

1584 result = []

1585 for p in self.Productions:

1586 if not p: continue

1587

1588 for s in p.prod:

1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

1590 result.append((s,p))

1591 return result

1592

1593 # -------------------------------------------------------------------------- ---

1594 # unused_terminals()

1595 #

1596 # Find all terminals that were defined, but not used by the grammar. Return s

1597 # a list of all symbols.

1598 # -------------------------------------------------------------------------- ---

1599 def unused_terminals(self):

1600 unused_tok = []

1601 for s,v in self.Terminals.items():

1602 if s != 'error' and not v:

1603 unused_tok.append(s)

1604

1605 return unused_tok

1606

1607 # -------------------------------------------------------------------------- ----

1608 # unused_rules()

1609 #

1610 # Find all grammar rules that were defined, but not used (maybe not reachab le)

1611 # Returns a list of productions.

1612 # -------------------------------------------------------------------------- ----

1613

1614 def unused_rules(self):

1615 unused_prod = []

1616 for s,v in self.Nonterminals.items():

1617 if not v:

1618 p = self.Prodnames[s][0]

1619 unused_prod.append(p)

1620 return unused_prod

1621

1622 # -------------------------------------------------------------------------- ---

1623 # unused_precedence()

1624 #

1625 # Returns a list of tuples (term,precedence) corresponding to precedence

1626 # rules that were never used by the grammar. term is the name of the termin al

1627 # on which precedence was applied and precedence is a string such as 'left' or

1628 # 'right' corresponding to the type of precedence.

1629 # -------------------------------------------------------------------------- ---

1630

1631 def unused_precedence(self):

1632 unused = []

1633 for termname in self.Precedence:

1634 if not (termname in self.Terminals or termname in self.UsedPrecedenc e):

1635 unused.append((termname,self.Precedence[termname][0]))

1636

1637 return unused

1638

1639 # -------------------------------------------------------------------------

1640 # _first()

1641 #

1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.

1643 #

1644 # During execution of compute_first1, the result may be incomplete.

1645 # Afterward (e.g., when called from compute_follow()), it will be complete.

1646 # -------------------------------------------------------------------------

1647 def _first(self,beta):

1648

1649 # We are computing First(x1,x2,x3,...,xn)

1650 result = [ ]

1651 for x in beta:

1652 x_produces_empty = 0

1653

1654 # Add all the non-<empty> symbols of First[x] to the result.

1655 for f in self.First[x]:

1656 if f == '<empty>':

1657 x_produces_empty = 1

1658 else:

1659 if f not in result: result.append(f)

1660

1661 if x_produces_empty:

1662 # We have to consider the next x in beta,

1663 # i.e. stay in the loop.

1664 pass

1665 else:

1666 # We don't have to consider any further symbols in beta.

1667 break

1668 else:

1669 # There was no 'break' from the loop,

1670 # so x_produces_empty was true for all x in beta,

1671 # so beta produces empty as well.

1672 result.append('<empty>')

1673

1674 return result

1675

1676 # -------------------------------------------------------------------------

1677 # compute_first()

1678 #

1679 # Compute the value of FIRST1(X) for all symbols

1680 # -------------------------------------------------------------------------

1681 def compute_first(self):

1682 if self.First:

1683 return self.First

1684

1685 # Terminals:

1686 for t in self.Terminals:

1687 self.First[t] = [t]

1688

1689 self.First['$end'] = ['$end']

1690

1691 # Nonterminals:

1692

1693 # Initialize to the empty set:

1694 for n in self.Nonterminals:

1695 self.First[n] = []

1696

1697 # Then propagate symbols until no change:

1698 while 1:

1699 some_change = 0

1700 for n in self.Nonterminals:

1701 for p in self.Prodnames[n]:

1702 for f in self._first(p.prod):

1703 if f not in self.First[n]:

1704 self.First[n].append( f )

1705 some_change = 1

1706 if not some_change:

1707 break

1708

1709 return self.First

1710

1711 # ---------------------------------------------------------------------

1712 # compute_follow()

1713 #

1714 # Computes all of the follow sets for every non-terminal symbol. The

1715 # follow set is the set of all symbols that might follow a given

1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189.

1717 # ---------------------------------------------------------------------

1718 def compute_follow(self,start=None):

1719 # If already computed, return the result

1720 if self.Follow:

1721 return self.Follow

1722

1723 # If first sets not computed yet, do that first.

1724 if not self.First:

1725 self.compute_first()

1726

1727 # Add '$end' to the follow list of the start symbol

1728 for k in self.Nonterminals:

1729 self.Follow[k] = [ ]

1730

1731 if not start:

1732 start = self.Productions[1].name

1733

1734 self.Follow[start] = [ '$end' ]

1735

1736 while 1:

1737 didadd = 0

1738 for p in self.Productions[1:]:

1739 # Here is the production set

1740 for i in range(len(p.prod)):

1741 B = p.prod[i]

1742 if B in self.Nonterminals:

1743 # Okay. We got a non-terminal in a production

1744 fst = self._first(p.prod[i+1:])

1745 hasempty = 0

1746 for f in fst:

1747 if f != '<empty>' and f not in self.Follow[B]:

1748 self.Follow[B].append(f)

1749 didadd = 1

1750 if f == '<empty>':

1751 hasempty = 1

1752 if hasempty or i == (len(p.prod)-1):

1753 # Add elements of follow(a) to follow(b)

1754 for f in self.Follow[p.name]:

1755 if f not in self.Follow[B]:

1756 self.Follow[B].append(f)

1757 didadd = 1

1758 if not didadd: break

1759 return self.Follow

1760

1761

1762 # -------------------------------------------------------------------------- ---

1763 # build_lritems()

1764 #

1765 # This function walks the list of productions and builds a complete set of t he

1766 # LR items. The LR items are stored in two ways: First, they are uniquely

1767 # numbered and placed in the list _lritems. Second, a linked list of LR ite ms

1768 # is built for each production. For example:

1769 #

1770 # E -> E PLUS E

1771 #

1772 # Creates the list

1773 #

1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

1775 # -------------------------------------------------------------------------- ---

1776

1777 def build_lritems(self):

1778 for p in self.Productions:

1779 lastlri = p

1780 i = 0

1781 lr_items = []

1782 while 1:

1783 if i > len(p):

1784 lri = None

1785 else:

1786 lri = LRItem(p,i)

1787 # Precompute the list of productions immediately following

1788 try:

1789 lri.lr_after = self.Prodnames[lri.prod[i+1]]

1790 except (IndexError,KeyError):

1791 lri.lr_after = []

1792 try:

1793 lri.lr_before = lri.prod[i-1]

1794 except IndexError:

1795 lri.lr_before = None

1796

1797 lastlri.lr_next = lri

1798 if not lri: break

1799 lr_items.append(lri)

1800 lastlri = lri

1801 i += 1

1802 p.lr_items = lr_items

1803

1804 # -----------------------------------------------------------------------------

1805 # == Class LRTable ==

1806 #

1807 # This basic class represents a basic table of LR parsing information.

1808 # Methods for generating the tables are not defined here. They are defined

1809 # in the derived class LRGeneratedTable.

1810 # -----------------------------------------------------------------------------

1811

1812 class VersionError(YaccError): pass

1813

1814 class LRTable(object):

1815 def __init__(self):

1816 self.lr_action = None

1817 self.lr_goto = None

1818 self.lr_productions = None

1819 self.lr_method = None

1820

1821 def read_table(self,module):

1822 if isinstance(module,types.ModuleType):

1823 parsetab = module

1824 else:

1825 if sys.version_info[0] < 3:

1826 exec("import %s as parsetab" % module)

1827 else:

1828 env = { }

1829 exec("import %s as parsetab" % module, env, env)

1830 parsetab = env['parsetab']

1831

1832 if parsetab._tabversion != __tabversion__:

1833 raise VersionError("yacc table file version is out of date")

1834

1835 self.lr_action = parsetab._lr_action

1836 self.lr_goto = parsetab._lr_goto

1837

1838 self.lr_productions = []

1839 for p in parsetab._lr_productions:

1840 self.lr_productions.append(MiniProduction(*p))

1841

1842 self.lr_method = parsetab._lr_method

1843 return parsetab._lr_signature

1844

1845 def read_pickle(self,filename):

1846 try:

1847 import cPickle as pickle

1848 except ImportError:

1849 import pickle

1850

1851 in_f = open(filename,"rb")

1852

1853 tabversion = pickle.load(in_f)

1854 if tabversion != __tabversion__:

1855 raise VersionError("yacc table file version is out of date")

1856 self.lr_method = pickle.load(in_f)

1857 signature = pickle.load(in_f)

1858 self.lr_action = pickle.load(in_f)

1859 self.lr_goto = pickle.load(in_f)

1860 productions = pickle.load(in_f)

1861

1862 self.lr_productions = []

1863 for p in productions:

1864 self.lr_productions.append(MiniProduction(*p))

1865

1866 in_f.close()

1867 return signature

1868

1869 # Bind all production function names to callable objects in pdict

1870 def bind_callables(self,pdict):

1871 for p in self.lr_productions:

1872 p.bind(pdict)

1873

1874 # -----------------------------------------------------------------------------

1875 # === LR Generator ===

1876 #

1877 # The following classes and functions are used to generate LR parsing tables on

1878 # a grammar.

1879 # -----------------------------------------------------------------------------

1880

1881 # -----------------------------------------------------------------------------

1882 # digraph()

1883 # traverse()

1884 #

1885 # The following two functions are used to compute set valued functions

1886 # of the form:

1887 #

1888 # F(x) = F'(x) U U{F(y) \| x R y}

1889 #

1890 # This is used to compute the values of Read() sets as well as FOLLOW sets

1891 # in LALR(1) generation.

1892 #

1893 # Inputs: X - An input set

1894 # R - A relation

1895 # FP - Set-valued function

1896 # ------------------------------------------------------------------------------

1897

1898 def digraph(X,R,FP):

1899 N = { }

1900 for x in X:

1901 N[x] = 0

1902 stack = []

1903 F = { }

1904 for x in X:

1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)

1906 return F

1907

1908 def traverse(x,N,stack,F,X,R,FP):

1909 stack.append(x)

1910 d = len(stack)

1911 N[x] = d

1912 F[x] = FP(x) # F(X) <- F'(x)

1913

1914 rel = R(x) # Get y's related to x

1915 for y in rel:

1916 if N[y] == 0:

1917 traverse(y,N,stack,F,X,R,FP)

1918 N[x] = min(N[x],N[y])

1919 for a in F.get(y,[]):

1920 if a not in F[x]: F[x].append(a)

1921 if N[x] == d:

1922 N[stack[-1]] = MAXINT

1923 F[stack[-1]] = F[x]

1924 element = stack.pop()

1925 while element != x:

1926 N[stack[-1]] = MAXINT

1927 F[stack[-1]] = F[x]

1928 element = stack.pop()

1929

1930 class LALRError(YaccError): pass

1931

1932 # -----------------------------------------------------------------------------

1933 # == LRGeneratedTable ==

1934 #

1935 # This class implements the LR table generation algorithm. There are no

1936 # public methods except for write()

1937 # -----------------------------------------------------------------------------

1938

1939 class LRGeneratedTable(LRTable):

1940 def __init__(self,grammar,method='LALR',log=None):

1941 if method not in ['SLR','LALR']:

1942 raise LALRError("Unsupported method %s" % method)

1943

1944 self.grammar = grammar

1945 self.lr_method = method

1946

1947 # Set up the logger

1948 if not log:

1949 log = NullLogger()

1950 self.log = log

1951

1952 # Internal attributes

1953 self.lr_action = {} # Action table

1954 self.lr_goto = {} # Goto table

1955 self.lr_productions = grammar.Productions # Copy of grammar Producti on array

1956 self.lr_goto_cache = {} # Cache of computed gotos

1957 self.lr0_cidhash = {} # Cache of closures

1958

1959 self._add_count = 0 # Internal counter used to detect cycles

1960

1961 # Diagonistic information filled in by the table generator

1962 self.sr_conflict = 0

1963 self.rr_conflict = 0

1964 self.conflicts = [] # List of conflicts

1965

1966 self.sr_conflicts = []

1967 self.rr_conflicts = []

1968

1969 # Build the tables

1970 self.grammar.build_lritems()

1971 self.grammar.compute_first()

1972 self.grammar.compute_follow()

1973 self.lr_parse_table()

1974

1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

1976

1977 def lr0_closure(self,I):

1978 self._add_count += 1

1979

1980 # Add everything in I to J

1981 J = I[:]

1982 didadd = 1

1983 while didadd:

1984 didadd = 0

1985 for j in J:

1986 for x in j.lr_after:

1987 if getattr(x,"lr0_added",0) == self._add_count: continue

1988 # Add B --> .G to J

1989 J.append(x.lr_next)

1990 x.lr0_added = self._add_count

1991 didadd = 1

1992

1993 return J

1994

1995 # Compute the LR(0) goto function goto(I,X) where I is a set

1996 # of LR(0) items and X is a grammar symbol. This function is written

1997 # in a way that guarantees uniqueness of the generated goto sets

1998 # (i.e. the same goto set will never be returned as two different Python

1999 # objects). With uniqueness, we can later do fast set comparisons using

2000 # id(obj) instead of element-wise comparison.

2001

2002 def lr0_goto(self,I,x):

2003 # First we look for a previously cached entry

2004 g = self.lr_goto_cache.get((id(I),x),None)

2005 if g: return g

2006

2007 # Now we generate the goto set in a way that guarantees uniqueness

2008 # of the result

2009

2010 s = self.lr_goto_cache.get(x,None)

2011 if not s:

2012 s = { }

2013 self.lr_goto_cache[x] = s

2014

2015 gs = [ ]

2016 for p in I:

2017 n = p.lr_next

2018 if n and n.lr_before == x:

2019 s1 = s.get(id(n),None)

2020 if not s1:

2021 s1 = { }

2022 s[id(n)] = s1

2023 gs.append(n)

2024 s = s1

2025 g = s.get('$end',None)

2026 if not g:

2027 if gs:

2028 g = self.lr0_closure(gs)

2029 s['$end'] = g

2030 else:

2031 s['$end'] = gs

2032 self.lr_goto_cache[(id(I),x)] = g

2033 return g

2034

2035 # Compute the LR(0) sets of item function

2036 def lr0_items(self):

2037

2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]

2039 i = 0

2040 for I in C:

2041 self.lr0_cidhash[id(I)] = i

2042 i += 1

2043

2044 # Loop over the items in C and each grammar symbols

2045 i = 0

2046 while i < len(C):

2047 I = C[i]

2048 i += 1

2049

2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets

2051 asyms = { }

2052 for ii in I:

2053 for s in ii.usyms:

2054 asyms[s] = None

2055

2056 for x in asyms:

2057 g = self.lr0_goto(I,x)

2058 if not g: continue

2059 if id(g) in self.lr0_cidhash: continue

2060 self.lr0_cidhash[id(g)] = len(C)

2061 C.append(g)

2062

2063 return C

2064

2065 # -------------------------------------------------------------------------- ---

2066 # ==== LALR(1) Parsing ====

2067 #

2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of

2069 # relying upon Follow() sets when performing reductions, a more selective

2070 # lookahead set that incorporates the state of the LR(0) machine is utilized .

2071 # Thus, we mainly just have to focus on calculating the lookahead sets.

2072 #

2073 # The method used here is due to DeRemer and Pennelo (1982).

2074 #

2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems ,

2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649

2078 #

2079 # Further details can also be found in:

2080 #

2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing ",

2082 # McGraw-Hill Book Company, (1985).

2083 #

2084 # -------------------------------------------------------------------------- ---

2085

2086 # -------------------------------------------------------------------------- ---

2087 # compute_nullable_nonterminals()

2088 #

2089 # Creates a dictionary containing all of the non-terminals that might produc e

2090 # an empty production.

2091 # -------------------------------------------------------------------------- ---

2092

2093 def compute_nullable_nonterminals(self):

2094 nullable = {}

2095 num_nullable = 0

2096 while 1:

2097 for p in self.grammar.Productions[1:]:

2098 if p.len == 0:

2099 nullable[p.name] = 1

2100 continue

2101 for t in p.prod:

2102 if not t in nullable: break

2103 else:

2104 nullable[p.name] = 1

2105 if len(nullable) == num_nullable: break

2106 num_nullable = len(nullable)

2107 return nullable

2108

2109 # -------------------------------------------------------------------------- ---

2110 # find_nonterminal_trans(C)

2111 #

2112 # Given a set of LR(0) items, this functions finds all of the non-terminal

2113 # transitions. These are transitions in which a dot appears immediately b efore

2114 # a non-terminal. Returns a list of tuples of the form (state,N) where sta te

2115 # is the state number and N is the nonterminal symbol.

2116 #

2117 # The input C is the set of LR(0) items.

2118 # -------------------------------------------------------------------------- ---

2119

2120 def find_nonterminal_transitions(self,C):

2121 trans = []

2122 for state in range(len(C)):

2123 for p in C[state]:

2124 if p.lr_index < p.len - 1:

2125 t = (state,p.prod[p.lr_index+1])

2126 if t[1] in self.grammar.Nonterminals:

2127 if t not in trans: trans.append(t)

2128 state = state + 1

2129 return trans

2130

2131 # -------------------------------------------------------------------------- ---

2132 # dr_relation()

2133 #

2134 # Computes the DR(p,A) relationships for non-terminal transitions. The inpu t

2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol .

2136 #

2137 # Returns a list of terminals.

2138 # -------------------------------------------------------------------------- ---

2139

2140 def dr_relation(self,C,trans,nullable):

2141 dr_set = { }

2142 state,N = trans

2143 terms = []

2144

2145 g = self.lr0_goto(C[state],N)

2146 for p in g:

2147 if p.lr_index < p.len - 1:

2148 a = p.prod[p.lr_index+1]

2149 if a in self.grammar.Terminals:

2150 if a not in terms: terms.append(a)

2151

2152 # This extra bit is to handle the start state

2153 if state == 0 and N == self.grammar.Productions[0].prod[0]:

2154 terms.append('$end')

2155

2156 return terms

2157

2158 # -------------------------------------------------------------------------- ---

2159 # reads_relation()

2160 #

2161 # Computes the READS() relation (p,A) READS (t,C).

2162 # -------------------------------------------------------------------------- ---

2163

2164 def reads_relation(self,C, trans, empty):

2165 # Look for empty transitions

2166 rel = []

2167 state, N = trans

2168

2169 g = self.lr0_goto(C[state],N)

2170 j = self.lr0_cidhash.get(id(g),-1)

2171 for p in g:

2172 if p.lr_index < p.len - 1:

2173 a = p.prod[p.lr_index + 1]

2174 if a in empty:

2175 rel.append((j,a))

2176

2177 return rel

2178

2179 # -------------------------------------------------------------------------- ---

2180 # compute_lookback_includes()

2181 #

2182 # Determines the lookback and includes relations

2183 #

2184 # LOOKBACK:

2185 #

2186 # This relation is determined by running the LR(0) state machine forward.

2187 # For example, starting with a production "N : . A B C", we run it forward

2188 # to obtain "N : A B C ." We then build a relationship between this final

2189 # state and the starting state. These relationships are stored in a dictio nary

2190 # lookdict.

2191 #

2192 # INCLUDES:

2193 #

2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

2195 #

2196 # This relation is used to determine non-terminal transitions that occur

2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

2198 # if the following holds:

2199 #

2200 # B -> LAT, where T -> epsilon and p' -L-> p

2201 #

2202 # L is essentially a prefix (which may be empty), T is a suffix that must be

2203 # able to derive an empty string. State p' must lead to state p with the st ring L.

2204 #

2205 # -------------------------------------------------------------------------- ---

2206

2207 def compute_lookback_includes(self,C,trans,nullable):

2208

2209 lookdict = {} # Dictionary of lookback relations

2210 includedict = {} # Dictionary of include relations

2211

2212 # Make a dictionary of non-terminal transitions

2213 dtrans = {}

2214 for t in trans:

2215 dtrans[t] = 1

2216

2217 # Loop over all transitions and compute lookbacks and includes

2218 for state,N in trans:

2219 lookb = []

2220 includes = []

2221 for p in C[state]:

2222 if p.name != N: continue

2223

2224 # Okay, we have a name match. We now follow the production all the way

2225 # through the state machine until we get the . on the right hand side

2226

2227 lr_index = p.lr_index

2228 j = state

2229 while lr_index < p.len - 1:

2230 lr_index = lr_index + 1

2231 t = p.prod[lr_index]

2232

2233 # Check to see if this symbol and state are a non-terminal transition

2234 if (j,t) in dtrans:

2235 # Yes. Okay, there is some chance that this is an in cludes relation

2236 # the only way to know for certain is whether the res t of the

2237 # production derives empty

2238

2239 li = lr_index + 1

2240 while li < p.len:

2241 if p.prod[li] in self.grammar.Terminals: break # No forget it

2242 if not p.prod[li] in nullable: break

2243 li = li + 1

2244 else:

2245 # Appears to be a relation between (j,t) and (st ate,N)

2246 includes.append((j,t))

2247

2248 g = self.lr0_goto(C[j],t) # Go to next set

2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state

2250

2251 # When we get here, j is the final state, now we have to locate the production

2252 for r in C[j]:

2253 if r.name != p.name: continue

2254 if r.len != p.len: continue

2255 i = 0

2256 # This look is comparing a production ". A B C" with "A B C ."

2257 while i < r.lr_index:

2258 if r.prod[i] != p.prod[i+1]: break

2259 i = i + 1

2260 else:

2261 lookb.append((j,r))

2262 for i in includes:

2263 if not i in includedict: includedict[i] = []

2264 includedict[i].append((state,N))

2265 lookdict[(state,N)] = lookb

2266

2267 return lookdict,includedict

2268

2269 # -------------------------------------------------------------------------- ---

2270 # compute_read_sets()

2271 #

2272 # Given a set of LR(0) items, this function computes the read sets.

2273 #

2274 # Inputs: C = Set of LR(0) items

2275 # ntrans = Set of nonterminal transitions

2276 # nullable = Set of empty transitions

2277 #

2278 # Returns a set containing the read sets

2279 # -------------------------------------------------------------------------- ---

2280

2281 def compute_read_sets(self,C, ntrans, nullable):

2282 FP = lambda x: self.dr_relation(C,x,nullable)

2283 R = lambda x: self.reads_relation(C,x,nullable)

2284 F = digraph(ntrans,R,FP)

2285 return F

2286

2287 # -------------------------------------------------------------------------- ---

2288 # compute_follow_sets()

2289 #

2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,

2291 # and an include set, this function computes the follow sets

2292 #

2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}

2294 #

2295 # Inputs:

2296 # ntrans = Set of nonterminal transitions

2297 # readsets = Readset (previously computed)

2298 # inclsets = Include sets (previously computed)

2299 #

2300 # Returns a set containing the follow sets

2301 # -------------------------------------------------------------------------- ---

2302

2303 def compute_follow_sets(self,ntrans,readsets,inclsets):

2304 FP = lambda x: readsets[x]

2305 R = lambda x: inclsets.get(x,[])

2306 F = digraph(ntrans,R,FP)

2307 return F

2308

2309 # -------------------------------------------------------------------------- ---

2310 # add_lookaheads()

2311 #

2312 # Attaches the lookahead symbols to grammar rules.

2313 #

2314 # Inputs: lookbacks - Set of lookback relations

2315 # followset - Computed follow set

2316 #

2317 # This function directly attaches the lookaheads to productions contained

2318 # in the lookbacks set

2319 # -------------------------------------------------------------------------- ---

2320

2321 def add_lookaheads(self,lookbacks,followset):

2322 for trans,lb in lookbacks.items():

2323 # Loop over productions in lookback

2324 for state,p in lb:

2325 if not state in p.lookaheads:

2326 p.lookaheads[state] = []

2327 f = followset.get(trans,[])

2328 for a in f:

2329 if a not in p.lookaheads[state]: p.lookaheads[state].appen d(a)

2330

2331 # -------------------------------------------------------------------------- ---

2332 # add_lalr_lookaheads()

2333 #

2334 # This function does all of the work of adding lookahead information for use

2335 # with LALR parsing

2336 # -------------------------------------------------------------------------- ---

2337

2338 def add_lalr_lookaheads(self,C):

2339 # Determine all of the nullable nonterminals

2340 nullable = self.compute_nullable_nonterminals()

2341

2342 # Find all non-terminal transitions

2343 trans = self.find_nonterminal_transitions(C)

2344

2345 # Compute read sets

2346 readsets = self.compute_read_sets(C,trans,nullable)

2347

2348 # Compute lookback/includes relations

2349 lookd, included = self.compute_lookback_includes(C,trans,nullable)

2350

2351 # Compute LALR FOLLOW sets

2352 followsets = self.compute_follow_sets(trans,readsets,included)

2353

2354 # Add all of the lookaheads

2355 self.add_lookaheads(lookd,followsets)

2356

2357 # -------------------------------------------------------------------------- ---

2358 # lr_parse_table()

2359 #

2360 # This function constructs the parse tables for SLR or LALR

2361 # -------------------------------------------------------------------------- ---

2362 def lr_parse_table(self):

2363 Productions = self.grammar.Productions

2364 Precedence = self.grammar.Precedence

2365 goto = self.lr_goto # Goto array

2366 action = self.lr_action # Action array

2367 log = self.log # Logger for output

2368

2369 actionp = { } # Action production array (temporary)

2370

2371 log.info("Parsing method: %s", self.lr_method)

2372

2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

2374 # This determines the number of states

2375

2376 C = self.lr0_items()

2377

2378 if self.lr_method == 'LALR':

2379 self.add_lalr_lookaheads(C)

2380

2381 # Build the parser table, state by state

2382 st = 0

2383 for I in C:

2384 # Loop over each production in I

2385 actlist = [ ] # List of actions

2386 st_action = { }

2387 st_actionp = { }

2388 st_goto = { }

2389 log.info("")

2390 log.info("state %d", st)

2391 log.info("")

2392 for p in I:

2393 log.info(" (%d) %s", p.number, str(p))

2394 log.info("")

2395

2396 for p in I:

2397 if p.len == p.lr_index + 1:

2398 if p.name == "S'":

2399 # Start symbol. Accept!

2400 st_action["$end"] = 0

2401 st_actionp["$end"] = p

2402 else:

2403 # We are at the end of a production. Reduce!

2404 if self.lr_method == 'LALR':

2405 laheads = p.lookaheads[st]

2406 else:

2407 laheads = self.grammar.Follow[p.name]

2408 for a in laheads:

2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))

2410 r = st_action.get(a,None)

2411 if r is not None:

2412 # Whoa. Have a shift/reduce or reduce/reduce conflict

2413 if r > 0:

2414 # Need to decide on shift or reduce here

2415 # By default we favor shifting. Need to add

2416 # some precedence rules here.

2417 sprec,slevel = Productions[st_actionp[a] .number].prec

2418 rprec,rlevel = Precedence.get(a,('right' ,0))

2419 if (slevel < rlevel) or ((slevel == rlev el) and (rprec == 'left')):

2420 # We really need to reduce here.

2421 st_action[a] = -p.number

2422 st_actionp[a] = p

2423 if not slevel and not rlevel:

2424 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

2425 self.sr_conflicts.append((st,a,' reduce'))

2426 Productions[p.number].reduced += 1

2427 elif (slevel == rlevel) and (rprec == 'n onassoc'):

2428 st_action[a] = None

2429 else:

2430 # Hmmm. Guess we'll keep the shift

2431 if not rlevel:

2432 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

2433 self.sr_conflicts.append((st,a,' shift'))

2434 elif r < 0:

2435 # Reduce/reduce conflict. In this case , we favor the rule

2436 # that was defined first in the grammar file

2437 oldp = Productions[-r]

2438 pp = Productions[p.number]

2439 if oldp.line > pp.line:

2440 st_action[a] = -p.number

2441 st_actionp[a] = p

2442 chosenp,rejectp = pp,oldp

2443 Productions[p.number].reduced += 1

2444 Productions[oldp.number].reduced -= 1

2445 else:

2446 chosenp,rejectp = oldp,pp

2447 self.rr_conflicts.append((st,chosenp,rej ectp))

2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])

2449 else:

2450 raise LALRError("Unknown conflict in sta te %d" % st)

2451 else:

2452 st_action[a] = -p.number

2453 st_actionp[a] = p

2454 Productions[p.number].reduced += 1

2455 else:

2456 i = p.lr_index

2457 a = p.prod[i+1] # Get symbol right after the "."

2458 if a in self.grammar.Terminals:

2459 g = self.lr0_goto(I,a)

2460 j = self.lr0_cidhash.get(id(g),-1)

2461 if j >= 0:

2462 # We are in a shift state

2463 actlist.append((a,p,"shift and go to state %d" % j))

2464 r = st_action.get(a,None)

2465 if r is not None:

2466 # Whoa have a shift/reduce or shift/shift co nflict

2467 if r > 0:

2468 if r != j:

2469 raise LALRError("Shift/shift conflic t in state %d" % st)

2470 elif r < 0:

2471 # Do a precedence check.

2472 # - if precedence of reduce rule is h igher, we reduce.

2473 # - if precedence of reduce is same a nd left assoc, we reduce.

2474 # - otherwise we shift

2475 rprec,rlevel = Productions[st_actionp[a] .number].prec

2476 sprec,slevel = Precedence.get(a,('right' ,0))

2477 if (slevel > rlevel) or ((slevel == rlev el) and (rprec == 'right')):

2478 # We decide to shift here... highest precedence to shift

2479 Productions[st_actionp[a].number].re duced -= 1

2480 st_action[a] = j

2481 st_actionp[a] = p

2482 if not rlevel:

2483 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

2484 self.sr_conflicts.append((st,a,' shift'))

2485 elif (slevel == rlevel) and (rprec == 'n onassoc'):

2486 st_action[a] = None

2487 else:

2488 # Hmmm. Guess we'll keep the reduce

2489 if not slevel and not rlevel:

2490 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

2491 self.sr_conflicts.append((st,a,' reduce'))

2492

2493 else:

2494 raise LALRError("Unknown conflict in sta te %d" % st)

2495 else:

2496 st_action[a] = j

2497 st_actionp[a] = p

2498

2499 # Print the actions associated with each terminal

2500 _actprint = { }

2501 for a,p,m in actlist:

2502 if a in st_action:

2503 if p is st_actionp[a]:

2504 log.info(" %-15s %s",a,m)

2505 _actprint[(a,m)] = 1

2506 log.info("")

2507 # Print the actions that were not used. (debugging)

2508 not_used = 0

2509 for a,p,m in actlist:

2510 if a in st_action:

2511 if p is not st_actionp[a]:

2512 if not (a,m) in _actprint:

2513 log.debug(" ! %-15s [ %s ]",a,m)

2514 not_used = 1

2515 _actprint[(a,m)] = 1

2516 if not_used:

2517 log.debug("")

2518

2519 # Construct the goto table for this state

2520

2521 nkeys = { }

2522 for ii in I:

2523 for s in ii.usyms:

2524 if s in self.grammar.Nonterminals:

2525 nkeys[s] = None

2526 for n in nkeys:

2527 g = self.lr0_goto(I,n)

2528 j = self.lr0_cidhash.get(id(g),-1)

2529 if j >= 0:

2530 st_goto[n] = j

2531 log.info(" %-30s shift and go to state %d",n,j)

2532

2533 action[st] = st_action

2534 actionp[st] = st_actionp

2535 goto[st] = st_goto

2536 st += 1

2537

2538

2539 # -------------------------------------------------------------------------- ---

2540 # write()

2541 #

2542 # This function writes the LR parsing tables to a file

2543 # -------------------------------------------------------------------------- ---

2544

2545 def write_table(self,modulename,outputdir='',signature=""):

2546 basemodulename = modulename.split(".")[-1]

2547 filename = os.path.join(outputdir,basemodulename) + ".py"

2548 try:

2549 f = open(filename,"w")

2550

2551 f.write("""

2552 # %s

2553 # This file is automatically generated. Do not edit.

2554 _tabversion = %r

2555

2556 _lr_method = %r

2557

2558 _lr_signature = %r

2559 """ % (filename, __tabversion__, self.lr_method, signature))

2560

2561 # Change smaller to 0 to go back to original tables

2562 smaller = 1

2563

2564 # Factor out names to try and make smaller

2565 if smaller:

2566 items = { }

2567

2568 for s,nd in self.lr_action.items():

2569 for name,v in nd.items():

2570 i = items.get(name)

2571 if not i:

2572 i = ([],[])

2573 items[name] = i

2574 i[0].append(s)

2575 i[1].append(v)

2576

2577 f.write("\n_lr_action_items = {")

2578 for k,v in items.items():

2579 f.write("%r:([" % k)

2580 for i in v[0]:

2581 f.write("%r," % i)

2582 f.write("],[")

2583 for i in v[1]:

2584 f.write("%r," % i)

2585

2586 f.write("]),")

2587 f.write("}\n")

2588

2589 f.write("""

2590 _lr_action = { }

2591 for _k, _v in _lr_action_items.items():

2592 for _x,_y in zip(_v[0],_v[1]):

2593 if not _x in _lr_action: _lr_action[_x] = { }

2594 _lr_action[_x][_k] = _y

2595 del _lr_action_items

2596 """)

2597

2598 else:

2599 f.write("\n_lr_action = { ");

2600 for k,v in self.lr_action.items():

2601 f.write("(%r,%r):%r," % (k[0],k[1],v))

2602 f.write("}\n");

2603

2604 if smaller:

2605 # Factor out names to try and make smaller

2606 items = { }

2607

2608 for s,nd in self.lr_goto.items():

2609 for name,v in nd.items():

2610 i = items.get(name)

2611 if not i:

2612 i = ([],[])

2613 items[name] = i

2614 i[0].append(s)

2615 i[1].append(v)

2616

2617 f.write("\n_lr_goto_items = {")

2618 for k,v in items.items():

2619 f.write("%r:([" % k)

2620 for i in v[0]:

2621 f.write("%r," % i)

2622 f.write("],[")

2623 for i in v[1]:

2624 f.write("%r," % i)

2625

2626 f.write("]),")

2627 f.write("}\n")

2628

2629 f.write("""

2630 _lr_goto = { }

2631 for _k, _v in _lr_goto_items.items():

2632 for _x,_y in zip(_v[0],_v[1]):

2633 if not _x in _lr_goto: _lr_goto[_x] = { }

2634 _lr_goto[_x][_k] = _y

2635 del _lr_goto_items

2636 """)

2637 else:

2638 f.write("\n_lr_goto = { ");

2639 for k,v in self.lr_goto.items():

2640 f.write("(%r,%r):%r," % (k[0],k[1],v))

2641 f.write("}\n");

2642

2643 # Write production table

2644 f.write("_lr_productions = [\n")

2645 for p in self.lr_productions:

2646 if p.func:

2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p .func,p.file,p.line))

2648 else:

2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p .len))

2650 f.write("]\n")

2651 f.close()

2652

2653 except IOError:

2654 e = sys.exc_info()[1]

2655 sys.stderr.write("Unable to create '%s'\n" % filename)

2656 sys.stderr.write(str(e)+"\n")

2657 return

2658

2659

2660 # -------------------------------------------------------------------------- ---

2661 # pickle_table()

2662 #

2663 # This function pickles the LR parsing tables to a supplied file object

2664 # -------------------------------------------------------------------------- ---

2665

2666 def pickle_table(self,filename,signature=""):

2667 try:

2668 import cPickle as pickle

2669 except ImportError:

2670 import pickle

2671 outf = open(filename,"wb")

2672 pickle.dump(__tabversion__,outf,pickle_protocol)

2673 pickle.dump(self.lr_method,outf,pickle_protocol)

2674 pickle.dump(signature,outf,pickle_protocol)

2675 pickle.dump(self.lr_action,outf,pickle_protocol)

2676 pickle.dump(self.lr_goto,outf,pickle_protocol)

2677

2678 outp = []

2679 for p in self.lr_productions:

2680 if p.func:

2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))

2682 else:

2683 outp.append((str(p),p.name,p.len,None,None,None))

2684 pickle.dump(outp,outf,pickle_protocol)

2685 outf.close()

2686

2687 # -----------------------------------------------------------------------------

2688 # === INTROSPECTION ===

2689 #

2690 # The following functions and classes are used to implement the PLY

2691 # introspection features followed by the yacc() function itself.

2692 # -----------------------------------------------------------------------------

2693

2694 # -----------------------------------------------------------------------------

2695 # get_caller_module_dict()

2696 #

2697 # This function returns a dictionary containing all of the symbols defined withi n

2698 # a caller further down the call stack. This is used to get the environment

2699 # associated with the yacc() call if none was provided.

2700 # -----------------------------------------------------------------------------

2701

2702 def get_caller_module_dict(levels):

2703 try:

2704 raise RuntimeError

2705 except RuntimeError:

2706 e,b,t = sys.exc_info()

2707 f = t.tb_frame

2708 while levels > 0:

2709 f = f.f_back

2710 levels -= 1

2711 ldict = f.f_globals.copy()

2712 if f.f_globals != f.f_locals:

2713 ldict.update(f.f_locals)

2714

2715 return ldict

2716

2717 # -----------------------------------------------------------------------------

2718 # parse_grammar()

2719 #

2720 # This takes a raw grammar rule string and parses it into production data

2721 # -----------------------------------------------------------------------------

2722 def parse_grammar(doc,file,line):

2723 grammar = []

2724 # Split the doc string into lines

2725 pstrings = doc.splitlines()

2726 lastp = None

2727 dline = line

2728 for ps in pstrings:

2729 dline += 1

2730 p = ps.split()

2731 if not p: continue

2732 try:

2733 if p[0] == '\|':

2734 # This is a continuation of a previous rule

2735 if not lastp:

2736 raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))

2737 prodname = lastp

2738 syms = p[1:]

2739 else:

2740 prodname = p[0]

2741 lastp = prodname

2742 syms = p[2:]

2743 assign = p[1]

2744 if assign != ':' and assign != '::=':

2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (fil e,dline))

2746

2747 grammar.append((file,dline,prodname,syms))

2748 except SyntaxError:

2749 raise

2750 except Exception:

2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,p s.strip()))

2752

2753 return grammar

2754

2755 # -----------------------------------------------------------------------------

2756 # ParserReflect()

2757 #

2758 # This class represents information extracted for building a parser including

2759 # start symbol, error function, tokens, precedence list, action functions,

2760 # etc.

2761 # -----------------------------------------------------------------------------

2762 class ParserReflect(object):

2763 def __init__(self,pdict,log=None):

2764 self.pdict = pdict

2765 self.start = None

2766 self.error_func = None

2767 self.tokens = None

2768 self.files = {}

2769 self.grammar = []

2770 self.error = 0

2771

2772 if log is None:

2773 self.log = PlyLogger(sys.stderr)

2774 else:

2775 self.log = log

2776

2777 # Get all of the basic information

2778 def get_all(self):

2779 self.get_start()

2780 self.get_error_func()

2781 self.get_tokens()

2782 self.get_precedence()

2783 self.get_pfunctions()

2784

2785 # Validate all of the information

2786 def validate_all(self):

2787 self.validate_start()

2788 self.validate_error_func()

2789 self.validate_tokens()

2790 self.validate_precedence()

2791 self.validate_pfunctions()

2792 self.validate_files()

2793 return self.error

2794

2795 # Compute a signature over the grammar

2796 def signature(self):

2797 try:

2798 from hashlib import md5

2799 except ImportError:

2800 from md5 import md5

2801 try:

2802 sig = md5()

2803 if self.start:

2804 sig.update(self.start.encode('latin-1'))

2805 if self.prec:

2806 sig.update("".join(["".join(p) for p in self.prec]).encode('lati n-1'))

2807 if self.tokens:

2808 sig.update(" ".join(self.tokens).encode('latin-1'))

2809 for f in self.pfuncs:

2810 if f[3]:

2811 sig.update(f[3].encode('latin-1'))

2812 except (TypeError,ValueError):

2813 pass

2814 return sig.digest()

2815

2816 # -------------------------------------------------------------------------- ---

2817 # validate_file()

2818 #

2819 # This method checks to see if there are duplicated p_rulename() functions

2820 # in the parser module file. Without this function, it is really easy for

2821 # users to make mistakes by cutting and pasting code fragments (and it's a r eal

2822 # bugger to try and figure out why the resulting parser doesn't work). Ther efore,

2823 # we just do a little regular expression pattern matching of def statements

2824 # to try and detect duplicates.

2825 # -------------------------------------------------------------------------- ---

2826

2827 def validate_files(self):

2828 # Match def p_funcname(

2829 fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')

2830

2831 for filename in self.files.keys():

2832 base,ext = os.path.splitext(filename)

2833 if ext != '.py': return 1 # No idea. Assume it's okay.

2834

2835 try:

2836 f = open(filename)

2837 lines = f.readlines()

2838 f.close()

2839 except IOError:

2840 continue

2841

2842 counthash = { }

2843 for linen,l in enumerate(lines):

2844 linen += 1

2845 m = fre.match(l)

2846 if m:

2847 name = m.group(1)

2848 prev = counthash.get(name)

2849 if not prev:

2850 counthash[name] = linen

2851 else:

2852 self.log.warning("%s:%d: Function %s redefined. Previous ly defined on line %d", filename,linen,name,prev)

2853

2854 # Get the start symbol

2855 def get_start(self):

2856 self.start = self.pdict.get('start')

2857

2858 # Validate the start symbol

2859 def validate_start(self):

2860 if self.start is not None:

2861 if not isinstance(self.start,str):

2862 self.log.error("'start' must be a string")

2863

2864 # Look for error handler

2865 def get_error_func(self):

2866 self.error_func = self.pdict.get('p_error')

2867

2868 # Validate the error function

2869 def validate_error_func(self):

2870 if self.error_func:

2871 if isinstance(self.error_func,types.FunctionType):

2872 ismethod = 0

2873 elif isinstance(self.error_func, types.MethodType):

2874 ismethod = 1

2875 else:

2876 self.log.error("'p_error' defined, but is not a function or meth od")

2877 self.error = 1

2878 return

2879

2880 eline = func_code(self.error_func).co_firstlineno

2881 efile = func_code(self.error_func).co_filename

2882 self.files[efile] = 1

2883

2884 if (func_code(self.error_func).co_argcount != 1+ismethod):

2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,elin e)

2886 self.error = 1

2887

2888 # Get the tokens map

2889 def get_tokens(self):

2890 tokens = self.pdict.get("tokens",None)

2891 if not tokens:

2892 self.log.error("No token list is defined")

2893 self.error = 1

2894 return

2895

2896 if not isinstance(tokens,(list, tuple)):

2897 self.log.error("tokens must be a list or tuple")

2898 self.error = 1

2899 return

2900

2901 if not tokens:

2902 self.log.error("tokens is empty")

2903 self.error = 1

2904 return

2905

2906 self.tokens = tokens

2907

2908 # Validate the tokens

2909 def validate_tokens(self):

2910 # Validate the tokens.

2911 if 'error' in self.tokens:

2912 self.log.error("Illegal token name 'error'. Is a reserved word")

2913 self.error = 1

2914 return

2915

2916 terminals = {}

2917 for n in self.tokens:

2918 if n in terminals:

2919 self.log.warning("Token '%s' multiply defined", n)

2920 terminals[n] = 1

2921

2922 # Get the precedence map (if any)

2923 def get_precedence(self):

2924 self.prec = self.pdict.get("precedence",None)

2925

2926 # Validate and parse the precedence map

2927 def validate_precedence(self):

2928 preclist = []

2929 if self.prec:

2930 if not isinstance(self.prec,(list,tuple)):

2931 self.log.error("precedence must be a list or tuple")

2932 self.error = 1

2933 return

2934 for level,p in enumerate(self.prec):

2935 if not isinstance(p,(list,tuple)):

2936 self.log.error("Bad precedence table")

2937 self.error = 1

2938 return

2939

2940 if len(p) < 2:

2941 self.log.error("Malformed precedence entry %s. Must be (asso c, term, ..., term)",p)

2942 self.error = 1

2943 return

2944 assoc = p[0]

2945 if not isinstance(assoc,str):

2946 self.log.error("precedence associativity must be a string")

2947 self.error = 1

2948 return

2949 for term in p[1:]:

2950 if not isinstance(term,str):

2951 self.log.error("precedence items must be strings")

2952 self.error = 1

2953 return

2954 preclist.append((term,assoc,level+1))

2955 self.preclist = preclist

2956

2957 # Get all p_functions from the grammar

2958 def get_pfunctions(self):

2959 p_functions = []

2960 for name, item in self.pdict.items():

2961 if name[:2] != 'p_': continue

2962 if name == 'p_error': continue

2963 if isinstance(item,(types.FunctionType,types.MethodType)):

2964 line = func_code(item).co_firstlineno

2965 file = func_code(item).co_filename

2966 p_functions.append((line,file,name,item.__doc__))

2967

2968 # Sort all of the actions by line number

2969 p_functions.sort()

2970 self.pfuncs = p_functions

2971

2972

2973 # Validate all of the p_functions

2974 def validate_pfunctions(self):

2975 grammar = []

2976 # Check for non-empty symbols

2977 if len(self.pfuncs) == 0:

2978 self.log.error("no rules of the form p_rulename are defined")

2979 self.error = 1

2980 return

2981

2982 for line, file, name, doc in self.pfuncs:

2983 func = self.pdict[name]

2984 if isinstance(func, types.MethodType):

2985 reqargs = 2

2986 else:

2987 reqargs = 1

2988 if func_code(func).co_argcount > reqargs:

2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,li ne,func.__name__)

2990 self.error = 1

2991 elif func_code(func).co_argcount < reqargs:

2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line ,func.__name__)

2993 self.error = 1

2994 elif not func.__doc__:

2995 self.log.warning("%s:%d: No documentation string specified in fu nction '%s' (ignored)",file,line,func.__name__)

2996 else:

2997 try:

2998 parsed_g = parse_grammar(doc,file,line)

2999 for g in parsed_g:

3000 grammar.append((name, g))

3001 except SyntaxError:

3002 e = sys.exc_info()[1]

3003 self.log.error(str(e))

3004 self.error = 1

3005

3006 # Looks like a valid grammar rule

3007 # Mark the file in which defined.

3008 self.files[file] = 1

3009

3010 # Secondary validation step that looks for p_ definitions that are not f unctions

3011 # or functions that look like they might be grammar rules.

3012

3013 for n,v in self.pdict.items():

3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.Metho dType)): continue

3015 if n[0:2] == 't_': continue

3016 if n[0:2] == 'p_' and n != 'p_error':

3017 self.log.warning("'%s' not defined as a function", n)

3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount = = 1) or

3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):

3020 try:

3021 doc = v.__doc__.split(" ")

3022 if doc[1] == ':':

3023 self.log.warning("%s:%d: Possible grammar rule '%s' defi ned without p_ prefix",

3024 func_code(v).co_filename, func_code(v). co_firstlineno,n)

3025 except Exception:

3026 pass

3027

3028 self.grammar = grammar

3029

3030 # -----------------------------------------------------------------------------

3031 # yacc(module)

3032 #

3033 # Build a parser

3034 # -----------------------------------------------------------------------------

3035

3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star t=None,

3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,out putdir='',

3038 debuglog=None, errorlog = None, picklefile=None):

3039

3040 global parse # Reference to the parsing method of the last b uilt parser

3041

3042 # If pickling is enabled, table files are not created

3043

3044 if picklefile:

3045 write_tables = 0

3046

3047 if errorlog is None:

3048 errorlog = PlyLogger(sys.stderr)

3049

3050 # Get the module dictionary used for the parser

3051 if module:

3052 _items = [(k,getattr(module,k)) for k in dir(module)]

3053 pdict = dict(_items)

3054 else:

3055 pdict = get_caller_module_dict(2)

3056

3057 # Collect parser information from the dictionary

3058 pinfo = ParserReflect(pdict,log=errorlog)

3059 pinfo.get_all()

3060

3061 if pinfo.error:

3062 raise YaccError("Unable to build parser")

3063

3064 # Check signature against table files (if any)

3065 signature = pinfo.signature()

3066

3067 # Read the tables

3068 try:

3069 lr = LRTable()

3070 if picklefile:

3071 read_signature = lr.read_pickle(picklefile)

3072 else:

3073 read_signature = lr.read_table(tabmodule)

3074 if optimize or (read_signature == signature):

3075 try:

3076 lr.bind_callables(pinfo.pdict)

3077 parser = LRParser(lr,pinfo.error_func)

3078 parse = parser.parse

3079 return parser

3080 except Exception:

3081 e = sys.exc_info()[1]

3082 errorlog.warning("There was a problem loading the table file: %s ", repr(e))

3083 except VersionError:

3084 e = sys.exc_info()

3085 errorlog.warning(str(e))

3086 except Exception:

3087 pass

3088

3089 if debuglog is None:

3090 if debug:

3091 debuglog = PlyLogger(open(debugfile,"w"))

3092 else:

3093 debuglog = NullLogger()

3094

3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __ver sion__)

3096

3097

3098 errors = 0

3099

3100 # Validate the parser information

3101 if pinfo.validate_all():

3102 raise YaccError("Unable to build parser")

3103

3104 if not pinfo.error_func:

3105 errorlog.warning("no p_error() function is defined")

3106

3107 # Create a grammar object

3108 grammar = Grammar(pinfo.tokens)

3109

3110 # Set precedence level for terminals

3111 for term, assoc, level in pinfo.preclist:

3112 try:

3113 grammar.set_precedence(term,assoc,level)

3114 except GrammarError:

3115 e = sys.exc_info()[1]

3116 errorlog.warning("%s",str(e))

3117

3118 # Add productions to the grammar

3119 for funcname, gram in pinfo.grammar:

3120 file, line, prodname, syms = gram

3121 try:

3122 grammar.add_production(prodname,syms,funcname,file,line)

3123 except GrammarError:

3124 e = sys.exc_info()[1]

3125 errorlog.error("%s",str(e))

3126 errors = 1

3127

3128 # Set the grammar start symbols

3129 try:

3130 if start is None:

3131 grammar.set_start(pinfo.start)

3132 else:

3133 grammar.set_start(start)

3134 except GrammarError:

3135 e = sys.exc_info()[1]

3136 errorlog.error(str(e))

3137 errors = 1

3138

3139 if errors:

3140 raise YaccError("Unable to build parser")

3141

3142 # Verify the grammar structure

3143 undefined_symbols = grammar.undefined_symbols()

3144 for sym, prod in undefined_symbols:

3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)

3146 errors = 1

3147

3148 unused_terminals = grammar.unused_terminals()

3149 if unused_terminals:

3150 debuglog.info("")

3151 debuglog.info("Unused terminals:")

3152 debuglog.info("")

3153 for term in unused_terminals:

3154 errorlog.warning("Token '%s' defined, but not used", term)

3155 debuglog.info(" %s", term)

3156

3157 # Print out all productions to the debug log

3158 if debug:

3159 debuglog.info("")

3160 debuglog.info("Grammar")

3161 debuglog.info("")

3162 for n,p in enumerate(grammar.Productions):

3163 debuglog.info("Rule %-5d %s", n, p)

3164

3165 # Find unused non-terminals

3166 unused_rules = grammar.unused_rules()

3167 for prod in unused_rules:

3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, pr od.line, prod.name)

3169

3170 if len(unused_terminals) == 1:

3171 errorlog.warning("There is 1 unused token")

3172 if len(unused_terminals) > 1:

3173 errorlog.warning("There are %d unused tokens", len(unused_terminals))

3174

3175 if len(unused_rules) == 1:

3176 errorlog.warning("There is 1 unused rule")

3177 if len(unused_rules) > 1:

3178 errorlog.warning("There are %d unused rules", len(unused_rules))

3179

3180 if debug:

3181 debuglog.info("")

3182 debuglog.info("Terminals, with rules where they appear")

3183 debuglog.info("")

3184 terms = list(grammar.Terminals)

3185 terms.sort()

3186 for term in terms:

3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar. Terminals[term]]))

3188

3189 debuglog.info("")

3190 debuglog.info("Nonterminals, with rules where they appear")

3191 debuglog.info("")

3192 nonterms = list(grammar.Nonterminals)

3193 nonterms.sort()

3194 for nonterm in nonterms:

3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in gramm ar.Nonterminals[nonterm]]))

3196 debuglog.info("")

3197

3198 if check_recursion:

3199 unreachable = grammar.find_unreachable()

3200 for u in unreachable:

3201 errorlog.warning("Symbol '%s' is unreachable",u)

3202

3203 infinite = grammar.infinite_cycles()

3204 for inf in infinite:

3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf)

3206 errors = 1

3207

3208 unused_prec = grammar.unused_precedence()

3209 for term, assoc in unused_prec:

3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", a ssoc, term)

3211 errors = 1

3212

3213 if errors:

3214 raise YaccError("Unable to build parser")

3215

3216 # Run the LRGeneratedTable on the grammar

3217 if debug:

3218 errorlog.debug("Generating %s tables", method)

3219

3220 lr = LRGeneratedTable(grammar,method,debuglog)

3221

3222 if debug:

3223 num_sr = len(lr.sr_conflicts)

3224

3225 # Report shift/reduce and reduce/reduce conflicts

3226 if num_sr == 1:

3227 errorlog.warning("1 shift/reduce conflict")

3228 elif num_sr > 1:

3229 errorlog.warning("%d shift/reduce conflicts", num_sr)

3230

3231 num_rr = len(lr.rr_conflicts)

3232 if num_rr == 1:

3233 errorlog.warning("1 reduce/reduce conflict")

3234 elif num_rr > 1:

3235 errorlog.warning("%d reduce/reduce conflicts", num_rr)

3236

3237 # Write out conflicts to the output file

3238 if debug and (lr.sr_conflicts or lr.rr_conflicts):

3239 debuglog.warning("")

3240 debuglog.warning("Conflicts:")

3241 debuglog.warning("")

3242

3243 for state, tok, resolution in lr.sr_conflicts:

3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)

3245

3246 already_reported = {}

3247 for state, rule, rejected in lr.rr_conflicts:

3248 if (state,id(rule),id(rejected)) in already_reported:

3249 continue

3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state)

3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state)

3254 already_reported[state,id(rule),id(rejected)] = 1

3255

3256 warned_never = []

3257 for state, rule, rejected in lr.rr_conflicts:

3258 if not rejected.reduced and (rejected not in warned_never):

3259 debuglog.warning("Rule (%s) is never reduced", rejected)

3260 errorlog.warning("Rule (%s) is never reduced", rejected)

3261 warned_never.append(rejected)

3262

3263 # Write the table file if requested

3264 if write_tables:

3265 lr.write_table(tabmodule,outputdir,signature)

3266

3267 # Write a pickled version of the tables

3268 if picklefile:

3269 lr.pickle_table(picklefile,signature)

3270

3271 # Build the parser

3272 lr.bind_callables(pinfo.pdict)

3273 parser = LRParser(lr,pinfo.error_func)

3274

3275 parse = parser.parse

3276 return parser

OLD	NEW

« no previous file with comments | « mojo/public/third_party/ply/license.patch ('k') | mojo/public/tools/BUILD.gn » ('j') | no next file with comments »