third_party/ply/yacc.py - Issue 6673006: Add PLY (Python Lex Yacc)

Side by Side Diff: third_party/ply/yacc.py

Issue 6673006: Add PLY (Python Lex Yacc) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # -----------------------------------------------------------------------------

	2 # ply: yacc.py

	3 #

	4 # Copyright (C) 2001-2011,

	5 # David M. Beazley (Dabeaz LLC)

	6 # All rights reserved.

	7 #

	8 # Redistribution and use in source and binary forms, with or without

	9 # modification, are permitted provided that the following conditions are

	10 # met:

	11 #

	12 # * Redistributions of source code must retain the above copyright notice,

	13 # this list of conditions and the following disclaimer.

	14 # * Redistributions in binary form must reproduce the above copyright notice,

	15 # this list of conditions and the following disclaimer in the documentation

	16 # and/or other materials provided with the distribution.

	17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to

	18 # endorse or promote products derived from this software without

	19 # specific prior written permission.

	20 #

	21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	32 # -----------------------------------------------------------------------------

	33 #

	34 # This implements an LR parser that is constructed from grammar rules defined

	35 # as Python functions. The grammer is specified by supplying the BNF inside

	36 # Python documentation strings. The inspiration for this technique was borrowed

	37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between

	38 # Spark and the GNU bison utility.

	39 #

	40 # The current implementation is only somewhat object-oriented. The

	41 # LR parser itself is defined in terms of an object (which allows multiple

	42 # parsers to co-exist). However, most of the variables used during table

	43 # construction are defined in terms of global variables. Users shouldn't

	44 # notice unless they are trying to define multiple parsers at the same

	45 # time using threads (in which case they should have their head examined).

	46 #

	47 # This implementation supports both SLR and LALR(1) parsing. LALR(1)

	48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),

	49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,

	50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced

	51 # by the more efficient DeRemer and Pennello algorithm.

	52 #

	53 # :::::::: WARNING :::::::

	54 #

	55 # Construction of LR parsing tables is fairly complicated and expensive.

	56 # To make this module run fast, a LOT of work has been put into

	57 # optimization---often at the expensive of readability and what might

	58 # consider to be good Python "coding style." Modify the code at your

	59 # own risk!

	60 # ----------------------------------------------------------------------------

	61

	62 __version__ = "3.4"

	63 __tabversion__ = "3.2" # Table version

	64

	65 #-----------------------------------------------------------------------------

	66 # === User configurable parameters ===

	67 #

	68 # Change these to modify the default behavior of yacc (if you wish)

	69 #-----------------------------------------------------------------------------

	70

	71 yaccdebug = 1 # Debugging mode. If set, yacc generates a

	72 # a 'parser.out' file in the current directory

	73

	74 debug_file = 'parser.out' # Default name of the debugging file

	75 tab_module = 'parsetab' # Default name of the table module

	76 default_lr = 'LALR' # Default LR table generation method

	77

	78 error_count = 3 # Number of symbols that must be shifted to leave recovery mode

	79

	80 yaccdevel = 0 # Set to True if developing yacc. This turns off optimized

	81 # implementations of certain functions.

	82

	83 resultlimit = 40 # Size limit of results when running in debug mod e.

	84

	85 pickle_protocol = 0 # Protocol to use when writing pickle files

	86

	87 import re, types, sys, os.path

	88

	89 # Compatibility function for python 2.6/3.0

	90 if sys.version_info[0] < 3:

	91 def func_code(f):

	92 return f.func_code

	93 else:

	94 def func_code(f):

	95 return f.__code__

	96

	97 # Compatibility

	98 try:

	99 MAXINT = sys.maxint

	100 except AttributeError:

	101 MAXINT = sys.maxsize

	102

	103 # Python 2.x/3.0 compatibility.

	104 def load_ply_lex():

	105 if sys.version_info[0] < 3:

	106 import lex

	107 else:

	108 import ply.lex as lex

	109 return lex

	110

	111 # This object is a stand-in for a logging object created by the

	112 # logging module. PLY will use this by default to create things

	113 # such as the parser.out file. If a user wants more detailed

	114 # information, they can create their own logging object and pass

	115 # it into PLY.

	116

	117 class PlyLogger(object):

	118 def __init__(self,f):

	119 self.f = f

	120 def debug(self,msg,args,*kwargs):

	121 self.f.write((msg % args) + "\n")

	122 info = debug

	123

	124 def warning(self,msg,args,*kwargs):

	125 self.f.write("WARNING: "+ (msg % args) + "\n")

	126

	127 def error(self,msg,args,*kwargs):

	128 self.f.write("ERROR: " + (msg % args) + "\n")

	129

	130 critical = debug

	131

	132 # Null logger is used when no output is generated. Does nothing.

	133 class NullLogger(object):

	134 def __getattribute__(self,name):

	135 return self

	136 def __call__(self,args,*kwargs):

	137 return self

	138

	139 # Exception raised for yacc-related errors

	140 class YaccError(Exception): pass

	141

	142 # Format the result message that the parser produces when running in debug mode.

	143 def format_result(r):

	144 repr_str = repr(r)

	145 if '\n' in repr_str: repr_str = repr(repr_str)

	146 if len(repr_str) > resultlimit:

	147 repr_str = repr_str[:resultlimit]+" ..."

	148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)

	149 return result

	150

	151

	152 # Format stack entries when the parser is running in debug mode

	153 def format_stack_entry(r):

	154 repr_str = repr(r)

	155 if '\n' in repr_str: repr_str = repr(repr_str)

	156 if len(repr_str) < 16:

	157 return repr_str

	158 else:

	159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))

	160

	161 #-----------------------------------------------------------------------------

	162 # === LR Parsing Engine ===

	163 #

	164 # The following classes are used for the LR parser itself. These are not

	165 # used during table construction and are independent of the actual LR

	166 # table generation algorithm

	167 #-----------------------------------------------------------------------------

	168

	169 # This class is used to hold non-terminal grammar symbols during parsing.

	170 # It normally has the following attributes set:

	171 # .type = Grammar symbol type

	172 # .value = Symbol value

	173 # .lineno = Starting line number

	174 # .endlineno = Ending line number (optional, set automatically)

	175 # .lexpos = Starting lex position

	176 # .endlexpos = Ending lex position (optional, set automatically)

	177

	178 class YaccSymbol:

	179 def __str__(self): return self.type

	180 def __repr__(self): return str(self)

	181

	182 # This class is a wrapper around the objects actually passed to each

	183 # grammar rule. Index lookup and assignment actually assign the

	184 # .value attribute of the underlying YaccSymbol object.

	185 # The lineno() method returns the line number of a given

	186 # item (or 0 if not defined). The linespan() method returns

	187 # a tuple of (startline,endline) representing the range of lines

	188 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)

	189 # representing the range of positional information for a symbol.

	190

	191 class YaccProduction:

	192 def __init__(self,s,stack=None):

	193 self.slice = s

	194 self.stack = stack

	195 self.lexer = None

	196 self.parser= None

	197 def __getitem__(self,n):

	198 if n >= 0: return self.slice[n].value

	199 else: return self.stack[n].value

	200

	201 def __setitem__(self,n,v):

	202 self.slice[n].value = v

	203

	204 def __getslice__(self,i,j):

	205 return [s.value for s in self.slice[i:j]]

	206

	207 def __len__(self):

	208 return len(self.slice)

	209

	210 def lineno(self,n):

	211 return getattr(self.slice[n],"lineno",0)

	212

	213 def set_lineno(self,n,lineno):

	214 self.slice[n].lineno = lineno

	215

	216 def linespan(self,n):

	217 startline = getattr(self.slice[n],"lineno",0)

	218 endline = getattr(self.slice[n],"endlineno",startline)

	219 return startline,endline

	220

	221 def lexpos(self,n):

	222 return getattr(self.slice[n],"lexpos",0)

	223

	224 def lexspan(self,n):

	225 startpos = getattr(self.slice[n],"lexpos",0)

	226 endpos = getattr(self.slice[n],"endlexpos",startpos)

	227 return startpos,endpos

	228

	229 def error(self):

	230 raise SyntaxError

	231

	232

	233 # -----------------------------------------------------------------------------

	234 # == LRParser ==

	235 #

	236 # The LR Parsing engine.

	237 # -----------------------------------------------------------------------------

	238

	239 class LRParser:

	240 def __init__(self,lrtab,errorf):

	241 self.productions = lrtab.lr_productions

	242 self.action = lrtab.lr_action

	243 self.goto = lrtab.lr_goto

	244 self.errorfunc = errorf

	245

	246 def errok(self):

	247 self.errorok = 1

	248

	249 def restart(self):

	250 del self.statestack[:]

	251 del self.symstack[:]

	252 sym = YaccSymbol()

	253 sym.type = '$end'

	254 self.symstack.append(sym)

	255 self.statestack.append(0)

	256

	257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

	258 if debug or yaccdevel:

	259 if isinstance(debug,int):

	260 debug = PlyLogger(sys.stderr)

	261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)

	262 elif tracking:

	263 return self.parseopt(input,lexer,debug,tracking,tokenfunc)

	264 else:

	265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)

	266

	267

	268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	269 # parsedebug().

	270 #

	271 # This is the debugging enabled version of parse(). All changes made to the

	272 # parsing engine should be made here. For the non-debugging version,

	273 # copy this code to a method parseopt() and delete all of the sections

	274 # enclosed in:

	275 #

	276 # #--! DEBUG

	277 # statements

	278 # #--! DEBUG

	279 #

	280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	281

	282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=No ne):

	283 lookahead = None # Current lookahead symbol

	284 lookaheadstack = [ ] # Stack of lookahead symbols

	285 actions = self.action # Local reference to action table (to a void lookup on self.)

	286 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	287 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	288 pslice = YaccProduction(None) # Production object passed to grammar r ules

	289 errorcount = 0 # Used during error recovery

	290

	291 # --! DEBUG

	292 debug.info("PLY: PARSE DEBUG START")

	293 # --! DEBUG

	294

	295 # If no lexer was given, we will try to use the lex module

	296 if not lexer:

	297 lex = load_ply_lex()

	298 lexer = lex.lexer

	299

	300 # Set up the lexer and parser objects on pslice

	301 pslice.lexer = lexer

	302 pslice.parser = self

	303

	304 # If input was supplied, pass to lexer

	305 if input is not None:

	306 lexer.input(input)

	307

	308 if tokenfunc is None:

	309 # Tokenize function

	310 get_token = lexer.token

	311 else:

	312 get_token = tokenfunc

	313

	314 # Set up the state and symbol stacks

	315

	316 statestack = [ ] # Stack of parsing states

	317 self.statestack = statestack

	318 symstack = [ ] # Stack of grammar symbols

	319 self.symstack = symstack

	320

	321 pslice.stack = symstack # Put in the production

	322 errtoken = None # Err token

	323

	324 # The start state is assumed to be (0,$end)

	325

	326 statestack.append(0)

	327 sym = YaccSymbol()

	328 sym.type = "$end"

	329 symstack.append(sym)

	330 state = 0

	331 while 1:

	332 # Get the next symbol on the input. If a lookahead symbol

	333 # is already set, we just use that. Otherwise, we'll pull

	334 # the next token off of the lookaheadstack or from the lexer

	335

	336 # --! DEBUG

	337 debug.debug('')

	338 debug.debug('State : %s', state)

	339 # --! DEBUG

	340

	341 if not lookahead:

	342 if not lookaheadstack:

	343 lookahead = get_token() # Get the next token

	344 else:

	345 lookahead = lookaheadstack.pop()

	346 if not lookahead:

	347 lookahead = YaccSymbol()

	348 lookahead.type = "$end"

	349

	350 # --! DEBUG

	351 debug.debug('Stack : %s',

	352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]) , str(lookahead))).lstrip())

	353 # --! DEBUG

	354

	355 # Check the action table

	356 ltype = lookahead.type

	357 t = actions[state].get(ltype)

	358

	359 if t is not None:

	360 if t > 0:

	361 # shift a symbol on the stack

	362 statestack.append(t)

	363 state = t

	364

	365 # --! DEBUG

	366 debug.debug("Action : Shift and goto state %s", t)

	367 # --! DEBUG

	368

	369 symstack.append(lookahead)

	370 lookahead = None

	371

	372 # Decrease error count on successful shift

	373 if errorcount: errorcount -=1

	374 continue

	375

	376 if t < 0:

	377 # reduce a symbol on the stack, emit a production

	378 p = prod[-t]

	379 pname = p.name

	380 plen = p.len

	381

	382 # Get production function

	383 sym = YaccSymbol()

	384 sym.type = pname # Production name

	385 sym.value = None

	386

	387 # --! DEBUG

	388 if plen:

	389 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[- plen:]])+"]",-t)

	390 else:

	391 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, [],-t)

	392

	393 # --! DEBUG

	394

	395 if plen:

	396 targ = symstack[-plen-1:]

	397 targ[0] = sym

	398

	399 # --! TRACKING

	400 if tracking:

	401 t1 = targ[1]

	402 sym.lineno = t1.lineno

	403 sym.lexpos = t1.lexpos

	404 t1 = targ[-1]

	405 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

	406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

	407

	408 # --! TRACKING

	409

	410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	411 # The code enclosed in this section is duplicated

	412 # below as a performance optimization. Make sure

	413 # changes get made in both locations.

	414

	415 pslice.slice = targ

	416

	417 try:

	418 # Call the grammar rule with our special slice objec t

	419 del symstack[-plen:]

	420 del statestack[-plen:]

	421 p.callable(pslice)

	422 # --! DEBUG

	423 debug.info("Result : %s", format_result(pslice[0]))

	424 # --! DEBUG

	425 symstack.append(sym)

	426 state = goto[statestack[-1]][pname]

	427 statestack.append(state)

	428 except SyntaxError:

	429 # If an error was set. Enter error recovery state

	430 lookaheadstack.append(lookahead)

	431 symstack.pop()

	432 statestack.pop()

	433 state = statestack[-1]

	434 sym.type = 'error'

	435 lookahead = sym

	436 errorcount = error_count

	437 self.errorok = 0

	438 continue

	439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	440

	441 else:

	442

	443 # --! TRACKING

	444 if tracking:

	445 sym.lineno = lexer.lineno

	446 sym.lexpos = lexer.lexpos

	447 # --! TRACKING

	448

	449 targ = [ sym ]

	450

	451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	452 # The code enclosed in this section is duplicated

	453 # above as a performance optimization. Make sure

	454 # changes get made in both locations.

	455

	456 pslice.slice = targ

	457

	458 try:

	459 # Call the grammar rule with our special slice objec t

	460 p.callable(pslice)

	461 # --! DEBUG

	462 debug.info("Result : %s", format_result(pslice[0]))

	463 # --! DEBUG

	464 symstack.append(sym)

	465 state = goto[statestack[-1]][pname]

	466 statestack.append(state)

	467 except SyntaxError:

	468 # If an error was set. Enter error recovery state

	469 lookaheadstack.append(lookahead)

	470 symstack.pop()

	471 statestack.pop()

	472 state = statestack[-1]

	473 sym.type = 'error'

	474 lookahead = sym

	475 errorcount = error_count

	476 self.errorok = 0

	477 continue

	478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	479

	480 if t == 0:

	481 n = symstack[-1]

	482 result = getattr(n,"value",None)

	483 # --! DEBUG

	484 debug.info("Done : Returning %s", format_result(result))

	485 debug.info("PLY: PARSE DEBUG END")

	486 # --! DEBUG

	487 return result

	488

	489 if t == None:

	490

	491 # --! DEBUG

	492 debug.error('Error : %s',

	493 ("%s . %s" % (" ".join([xx.type for xx in symstack][ 1:]), str(lookahead))).lstrip())

	494 # --! DEBUG

	495

	496 # We have some kind of parsing error here. To handle

	497 # this, we are going to push the current token onto

	498 # the tokenstack and replace it with an 'error' token.

	499 # If there are any synchronization rules, they may

	500 # catch it.

	501 #

	502 # In addition to pushing the error token, we call call

	503 # the user defined p_error() function if this is the

	504 # first syntax error. This function is only called if

	505 # errorcount == 0.

	506 if errorcount == 0 or self.errorok:

	507 errorcount = error_count

	508 self.errorok = 0

	509 errtoken = lookahead

	510 if errtoken.type == "$end":

	511 errtoken = None # End of file!

	512 if self.errorfunc:

	513 global errok,token,restart

	514 errok = self.errok # Set some special functions a vailable in error recovery

	515 token = get_token

	516 restart = self.restart

	517 if errtoken and not hasattr(errtoken,'lexer'):

	518 errtoken.lexer = lexer

	519 tok = self.errorfunc(errtoken)

	520 del errok, token, restart # Delete special functions

	521

	522 if self.errorok:

	523 # User must have done some kind of panic

	524 # mode recovery on their own. The

	525 # returned token is the next lookahead

	526 lookahead = tok

	527 errtoken = None

	528 continue

	529 else:

	530 if errtoken:

	531 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	532 else: lineno = 0

	533 if lineno:

	534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	535 else:

	536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	537 else:

	538 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	539 return

	540

	541 else:

	542 errorcount = error_count

	543

	544 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	545 # entire parse has been rolled back and we're completely hosed. The token is

	546 # discarded and we just keep going.

	547

	548 if len(statestack) <= 1 and lookahead.type != "$end":

	549 lookahead = None

	550 errtoken = None

	551 state = 0

	552 # Nuke the pushback stack

	553 del lookaheadstack[:]

	554 continue

	555

	556 # case 2: the statestack has a couple of entries on it, but we'r e

	557 # at the end of the file. nuke the top entry and generate an err or token

	558

	559 # Start nuking entries on the stack

	560 if lookahead.type == "$end":

	561 # Whoa. We're really hosed here. Bail out

	562 return

	563

	564 if lookahead.type != 'error':

	565 sym = symstack[-1]

	566 if sym.type == 'error':

	567 # Hmmm. Error is on top of stack, we'll just nuke input

	568 # symbol and continue

	569 lookahead = None

	570 continue

	571 t = YaccSymbol()

	572 t.type = 'error'

	573 if hasattr(lookahead,"lineno"):

	574 t.lineno = lookahead.lineno

	575 t.value = lookahead

	576 lookaheadstack.append(lookahead)

	577 lookahead = t

	578 else:

	579 symstack.pop()

	580 statestack.pop()

	581 state = statestack[-1] # Potential bug fix

	582

	583 continue

	584

	585 # Call an error function here

	586 raise RuntimeError("yacc: internal parser error!!!\n")

	587

	588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	589 # parseopt().

	590 #

	591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.

	592 # Edit the debug version above, then copy any modifications to the method

	593 # below while removing #--! DEBUG sections.

	594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	595

	596

	597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

	598 lookahead = None # Current lookahead symbol

	599 lookaheadstack = [ ] # Stack of lookahead symbols

	600 actions = self.action # Local reference to action table (to a void lookup on self.)

	601 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	602 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	603 pslice = YaccProduction(None) # Production object passed to grammar r ules

	604 errorcount = 0 # Used during error recovery

	605

	606 # If no lexer was given, we will try to use the lex module

	607 if not lexer:

	608 lex = load_ply_lex()

	609 lexer = lex.lexer

	610

	611 # Set up the lexer and parser objects on pslice

	612 pslice.lexer = lexer

	613 pslice.parser = self

	614

	615 # If input was supplied, pass to lexer

	616 if input is not None:

	617 lexer.input(input)

	618

	619 if tokenfunc is None:

	620 # Tokenize function

	621 get_token = lexer.token

	622 else:

	623 get_token = tokenfunc

	624

	625 # Set up the state and symbol stacks

	626

	627 statestack = [ ] # Stack of parsing states

	628 self.statestack = statestack

	629 symstack = [ ] # Stack of grammar symbols

	630 self.symstack = symstack

	631

	632 pslice.stack = symstack # Put in the production

	633 errtoken = None # Err token

	634

	635 # The start state is assumed to be (0,$end)

	636

	637 statestack.append(0)

	638 sym = YaccSymbol()

	639 sym.type = '$end'

	640 symstack.append(sym)

	641 state = 0

	642 while 1:

	643 # Get the next symbol on the input. If a lookahead symbol

	644 # is already set, we just use that. Otherwise, we'll pull

	645 # the next token off of the lookaheadstack or from the lexer

	646

	647 if not lookahead:

	648 if not lookaheadstack:

	649 lookahead = get_token() # Get the next token

	650 else:

	651 lookahead = lookaheadstack.pop()

	652 if not lookahead:

	653 lookahead = YaccSymbol()

	654 lookahead.type = '$end'

	655

	656 # Check the action table

	657 ltype = lookahead.type

	658 t = actions[state].get(ltype)

	659

	660 if t is not None:

	661 if t > 0:

	662 # shift a symbol on the stack

	663 statestack.append(t)

	664 state = t

	665

	666 symstack.append(lookahead)

	667 lookahead = None

	668

	669 # Decrease error count on successful shift

	670 if errorcount: errorcount -=1

	671 continue

	672

	673 if t < 0:

	674 # reduce a symbol on the stack, emit a production

	675 p = prod[-t]

	676 pname = p.name

	677 plen = p.len

	678

	679 # Get production function

	680 sym = YaccSymbol()

	681 sym.type = pname # Production name

	682 sym.value = None

	683

	684 if plen:

	685 targ = symstack[-plen-1:]

	686 targ[0] = sym

	687

	688 # --! TRACKING

	689 if tracking:

	690 t1 = targ[1]

	691 sym.lineno = t1.lineno

	692 sym.lexpos = t1.lexpos

	693 t1 = targ[-1]

	694 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

	695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

	696

	697 # --! TRACKING

	698

	699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	700 # The code enclosed in this section is duplicated

	701 # below as a performance optimization. Make sure

	702 # changes get made in both locations.

	703

	704 pslice.slice = targ

	705

	706 try:

	707 # Call the grammar rule with our special slice objec t

	708 del symstack[-plen:]

	709 del statestack[-plen:]

	710 p.callable(pslice)

	711 symstack.append(sym)

	712 state = goto[statestack[-1]][pname]

	713 statestack.append(state)

	714 except SyntaxError:

	715 # If an error was set. Enter error recovery state

	716 lookaheadstack.append(lookahead)

	717 symstack.pop()

	718 statestack.pop()

	719 state = statestack[-1]

	720 sym.type = 'error'

	721 lookahead = sym

	722 errorcount = error_count

	723 self.errorok = 0

	724 continue

	725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	726

	727 else:

	728

	729 # --! TRACKING

	730 if tracking:

	731 sym.lineno = lexer.lineno

	732 sym.lexpos = lexer.lexpos

	733 # --! TRACKING

	734

	735 targ = [ sym ]

	736

	737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	738 # The code enclosed in this section is duplicated

	739 # above as a performance optimization. Make sure

	740 # changes get made in both locations.

	741

	742 pslice.slice = targ

	743

	744 try:

	745 # Call the grammar rule with our special slice objec t

	746 p.callable(pslice)

	747 symstack.append(sym)

	748 state = goto[statestack[-1]][pname]

	749 statestack.append(state)

	750 except SyntaxError:

	751 # If an error was set. Enter error recovery state

	752 lookaheadstack.append(lookahead)

	753 symstack.pop()

	754 statestack.pop()

	755 state = statestack[-1]

	756 sym.type = 'error'

	757 lookahead = sym

	758 errorcount = error_count

	759 self.errorok = 0

	760 continue

	761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	762

	763 if t == 0:

	764 n = symstack[-1]

	765 return getattr(n,"value",None)

	766

	767 if t == None:

	768

	769 # We have some kind of parsing error here. To handle

	770 # this, we are going to push the current token onto

	771 # the tokenstack and replace it with an 'error' token.

	772 # If there are any synchronization rules, they may

	773 # catch it.

	774 #

	775 # In addition to pushing the error token, we call call

	776 # the user defined p_error() function if this is the

	777 # first syntax error. This function is only called if

	778 # errorcount == 0.

	779 if errorcount == 0 or self.errorok:

	780 errorcount = error_count

	781 self.errorok = 0

	782 errtoken = lookahead

	783 if errtoken.type == '$end':

	784 errtoken = None # End of file!

	785 if self.errorfunc:

	786 global errok,token,restart

	787 errok = self.errok # Set some special functions a vailable in error recovery

	788 token = get_token

	789 restart = self.restart

	790 if errtoken and not hasattr(errtoken,'lexer'):

	791 errtoken.lexer = lexer

	792 tok = self.errorfunc(errtoken)

	793 del errok, token, restart # Delete special functions

	794

	795 if self.errorok:

	796 # User must have done some kind of panic

	797 # mode recovery on their own. The

	798 # returned token is the next lookahead

	799 lookahead = tok

	800 errtoken = None

	801 continue

	802 else:

	803 if errtoken:

	804 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	805 else: lineno = 0

	806 if lineno:

	807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	808 else:

	809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	810 else:

	811 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	812 return

	813

	814 else:

	815 errorcount = error_count

	816

	817 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	818 # entire parse has been rolled back and we're completely hosed. The token is

	819 # discarded and we just keep going.

	820

	821 if len(statestack) <= 1 and lookahead.type != '$end':

	822 lookahead = None

	823 errtoken = None

	824 state = 0

	825 # Nuke the pushback stack

	826 del lookaheadstack[:]

	827 continue

	828

	829 # case 2: the statestack has a couple of entries on it, but we'r e

	830 # at the end of the file. nuke the top entry and generate an err or token

	831

	832 # Start nuking entries on the stack

	833 if lookahead.type == '$end':

	834 # Whoa. We're really hosed here. Bail out

	835 return

	836

	837 if lookahead.type != 'error':

	838 sym = symstack[-1]

	839 if sym.type == 'error':

	840 # Hmmm. Error is on top of stack, we'll just nuke input

	841 # symbol and continue

	842 lookahead = None

	843 continue

	844 t = YaccSymbol()

	845 t.type = 'error'

	846 if hasattr(lookahead,"lineno"):

	847 t.lineno = lookahead.lineno

	848 t.value = lookahead

	849 lookaheadstack.append(lookahead)

	850 lookahead = t

	851 else:

	852 symstack.pop()

	853 statestack.pop()

	854 state = statestack[-1] # Potential bug fix

	855

	856 continue

	857

	858 # Call an error function here

	859 raise RuntimeError("yacc: internal parser error!!!\n")

	860

	861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	862 # parseopt_notrack().

	863 #

	864 # Optimized version of parseopt() with line number tracking removed.

	865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove

	866 # code in the #--! TRACKING sections

	867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	868

	869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc =None):

	870 lookahead = None # Current lookahead symbol

	871 lookaheadstack = [ ] # Stack of lookahead symbols

	872 actions = self.action # Local reference to action table (to a void lookup on self.)

	873 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	874 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	875 pslice = YaccProduction(None) # Production object passed to grammar r ules

	876 errorcount = 0 # Used during error recovery

	877

	878 # If no lexer was given, we will try to use the lex module

	879 if not lexer:

	880 lex = load_ply_lex()

	881 lexer = lex.lexer

	882

	883 # Set up the lexer and parser objects on pslice

	884 pslice.lexer = lexer

	885 pslice.parser = self

	886

	887 # If input was supplied, pass to lexer

	888 if input is not None:

	889 lexer.input(input)

	890

	891 if tokenfunc is None:

	892 # Tokenize function

	893 get_token = lexer.token

	894 else:

	895 get_token = tokenfunc

	896

	897 # Set up the state and symbol stacks

	898

	899 statestack = [ ] # Stack of parsing states

	900 self.statestack = statestack

	901 symstack = [ ] # Stack of grammar symbols

	902 self.symstack = symstack

	903

	904 pslice.stack = symstack # Put in the production

	905 errtoken = None # Err token

	906

	907 # The start state is assumed to be (0,$end)

	908

	909 statestack.append(0)

	910 sym = YaccSymbol()

	911 sym.type = '$end'

	912 symstack.append(sym)

	913 state = 0

	914 while 1:

	915 # Get the next symbol on the input. If a lookahead symbol

	916 # is already set, we just use that. Otherwise, we'll pull

	917 # the next token off of the lookaheadstack or from the lexer

	918

	919 if not lookahead:

	920 if not lookaheadstack:

	921 lookahead = get_token() # Get the next token

	922 else:

	923 lookahead = lookaheadstack.pop()

	924 if not lookahead:

	925 lookahead = YaccSymbol()

	926 lookahead.type = '$end'

	927

	928 # Check the action table

	929 ltype = lookahead.type

	930 t = actions[state].get(ltype)

	931

	932 if t is not None:

	933 if t > 0:

	934 # shift a symbol on the stack

	935 statestack.append(t)

	936 state = t

	937

	938 symstack.append(lookahead)

	939 lookahead = None

	940

	941 # Decrease error count on successful shift

	942 if errorcount: errorcount -=1

	943 continue

	944

	945 if t < 0:

	946 # reduce a symbol on the stack, emit a production

	947 p = prod[-t]

	948 pname = p.name

	949 plen = p.len

	950

	951 # Get production function

	952 sym = YaccSymbol()

	953 sym.type = pname # Production name

	954 sym.value = None

	955

	956 if plen:

	957 targ = symstack[-plen-1:]

	958 targ[0] = sym

	959

	960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	961 # The code enclosed in this section is duplicated

	962 # below as a performance optimization. Make sure

	963 # changes get made in both locations.

	964

	965 pslice.slice = targ

	966

	967 try:

	968 # Call the grammar rule with our special slice objec t

	969 del symstack[-plen:]

	970 del statestack[-plen:]

	971 p.callable(pslice)

	972 symstack.append(sym)

	973 state = goto[statestack[-1]][pname]

	974 statestack.append(state)

	975 except SyntaxError:

	976 # If an error was set. Enter error recovery state

	977 lookaheadstack.append(lookahead)

	978 symstack.pop()

	979 statestack.pop()

	980 state = statestack[-1]

	981 sym.type = 'error'

	982 lookahead = sym

	983 errorcount = error_count

	984 self.errorok = 0

	985 continue

	986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	987

	988 else:

	989

	990 targ = [ sym ]

	991

	992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	993 # The code enclosed in this section is duplicated

	994 # above as a performance optimization. Make sure

	995 # changes get made in both locations.

	996

	997 pslice.slice = targ

	998

	999 try:

	1000 # Call the grammar rule with our special slice objec t

	1001 p.callable(pslice)

	1002 symstack.append(sym)

	1003 state = goto[statestack[-1]][pname]

	1004 statestack.append(state)

	1005 except SyntaxError:

	1006 # If an error was set. Enter error recovery state

	1007 lookaheadstack.append(lookahead)

	1008 symstack.pop()

	1009 statestack.pop()

	1010 state = statestack[-1]

	1011 sym.type = 'error'

	1012 lookahead = sym

	1013 errorcount = error_count

	1014 self.errorok = 0

	1015 continue

	1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	1017

	1018 if t == 0:

	1019 n = symstack[-1]

	1020 return getattr(n,"value",None)

	1021

	1022 if t == None:

	1023

	1024 # We have some kind of parsing error here. To handle

	1025 # this, we are going to push the current token onto

	1026 # the tokenstack and replace it with an 'error' token.

	1027 # If there are any synchronization rules, they may

	1028 # catch it.

	1029 #

	1030 # In addition to pushing the error token, we call call

	1031 # the user defined p_error() function if this is the

	1032 # first syntax error. This function is only called if

	1033 # errorcount == 0.

	1034 if errorcount == 0 or self.errorok:

	1035 errorcount = error_count

	1036 self.errorok = 0

	1037 errtoken = lookahead

	1038 if errtoken.type == '$end':

	1039 errtoken = None # End of file!

	1040 if self.errorfunc:

	1041 global errok,token,restart

	1042 errok = self.errok # Set some special functions a vailable in error recovery

	1043 token = get_token

	1044 restart = self.restart

	1045 if errtoken and not hasattr(errtoken,'lexer'):

	1046 errtoken.lexer = lexer

	1047 tok = self.errorfunc(errtoken)

	1048 del errok, token, restart # Delete special functions

	1049

	1050 if self.errorok:

	1051 # User must have done some kind of panic

	1052 # mode recovery on their own. The

	1053 # returned token is the next lookahead

	1054 lookahead = tok

	1055 errtoken = None

	1056 continue

	1057 else:

	1058 if errtoken:

	1059 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	1060 else: lineno = 0

	1061 if lineno:

	1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	1063 else:

	1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	1065 else:

	1066 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	1067 return

	1068

	1069 else:

	1070 errorcount = error_count

	1071

	1072 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	1073 # entire parse has been rolled back and we're completely hosed. The token is

	1074 # discarded and we just keep going.

	1075

	1076 if len(statestack) <= 1 and lookahead.type != '$end':

	1077 lookahead = None

	1078 errtoken = None

	1079 state = 0

	1080 # Nuke the pushback stack

	1081 del lookaheadstack[:]

	1082 continue

	1083

	1084 # case 2: the statestack has a couple of entries on it, but we'r e

	1085 # at the end of the file. nuke the top entry and generate an err or token

	1086

	1087 # Start nuking entries on the stack

	1088 if lookahead.type == '$end':

	1089 # Whoa. We're really hosed here. Bail out

	1090 return

	1091

	1092 if lookahead.type != 'error':

	1093 sym = symstack[-1]

	1094 if sym.type == 'error':

	1095 # Hmmm. Error is on top of stack, we'll just nuke input

	1096 # symbol and continue

	1097 lookahead = None

	1098 continue

	1099 t = YaccSymbol()

	1100 t.type = 'error'

	1101 if hasattr(lookahead,"lineno"):

	1102 t.lineno = lookahead.lineno

	1103 t.value = lookahead

	1104 lookaheadstack.append(lookahead)

	1105 lookahead = t

	1106 else:

	1107 symstack.pop()

	1108 statestack.pop()

	1109 state = statestack[-1] # Potential bug fix

	1110

	1111 continue

	1112

	1113 # Call an error function here

	1114 raise RuntimeError("yacc: internal parser error!!!\n")

	1115

	1116 # -----------------------------------------------------------------------------

	1117 # === Grammar Representation ===

	1118 #

	1119 # The following functions, classes, and variables are used to represent and

	1120 # manipulate the rules that make up a grammar.

	1121 # -----------------------------------------------------------------------------

	1122

	1123 import re

	1124

	1125 # regex matching identifiers

	1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

	1127

	1128 # -----------------------------------------------------------------------------

	1129 # class Production:

	1130 #

	1131 # This class stores the raw information about a single production or grammar rul e.

	1132 # A grammar rule refers to a specification such as this:

	1133 #

	1134 # expr : expr PLUS term

	1135 #

	1136 # Here are the basic attributes defined on all productions

	1137 #

	1138 # name - Name of the production. For example 'expr'

	1139 # prod - A list of symbols on the right side ['expr','PLUS','term']

	1140 # prec - Production precedence level

	1141 # number - Production number.

	1142 # func - Function that executes on reduce

	1143 # file - File where production function is defined

	1144 # lineno - Line number where production function is defined

	1145 #

	1146 # The following attributes are defined or optional.

	1147 #

	1148 # len - Length of the production (number of symbols on right hand si de)

	1149 # usyms - Set of unique symbols found in the production

	1150 # -----------------------------------------------------------------------------

	1151

	1152 class Production(object):

	1153 reduced = 0

	1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='', line=0):

	1155 self.name = name

	1156 self.prod = tuple(prod)

	1157 self.number = number

	1158 self.func = func

	1159 self.callable = None

	1160 self.file = file

	1161 self.line = line

	1162 self.prec = precedence

	1163

	1164 # Internal settings used during table construction

	1165

	1166 self.len = len(self.prod) # Length of the production

	1167

	1168 # Create a list of unique production symbols used in the production

	1169 self.usyms = [ ]

	1170 for s in self.prod:

	1171 if s not in self.usyms:

	1172 self.usyms.append(s)

	1173

	1174 # List of all LR items for the production

	1175 self.lr_items = []

	1176 self.lr_next = None

	1177

	1178 # Create a string representation

	1179 if self.prod:

	1180 self.str = "%s -> %s" % (self.name," ".join(self.prod))

	1181 else:

	1182 self.str = "%s -> <empty>" % self.name

	1183

	1184 def __str__(self):

	1185 return self.str

	1186

	1187 def __repr__(self):

	1188 return "Production("+str(self)+")"

	1189

	1190 def __len__(self):

	1191 return len(self.prod)

	1192

	1193 def __nonzero__(self):

	1194 return 1

	1195

	1196 def __getitem__(self,index):

	1197 return self.prod[index]

	1198

	1199 # Return the nth lr_item from the production (or None if at the end)

	1200 def lr_item(self,n):

	1201 if n > len(self.prod): return None

	1202 p = LRItem(self,n)

	1203

	1204 # Precompute the list of productions immediately following. Hack. Remov e later

	1205 try:

	1206 p.lr_after = Prodnames[p.prod[n+1]]

	1207 except (IndexError,KeyError):

	1208 p.lr_after = []

	1209 try:

	1210 p.lr_before = p.prod[n-1]

	1211 except IndexError:

	1212 p.lr_before = None

	1213

	1214 return p

	1215

	1216 # Bind the production function name to a callable

	1217 def bind(self,pdict):

	1218 if self.func:

	1219 self.callable = pdict[self.func]

	1220

	1221 # This class serves as a minimal standin for Production objects when

	1222 # reading table data from files. It only contains information

	1223 # actually used by the LR parsing engine, plus some additional

	1224 # debugging information.

	1225 class MiniProduction(object):

	1226 def __init__(self,str,name,len,func,file,line):

	1227 self.name = name

	1228 self.len = len

	1229 self.func = func

	1230 self.callable = None

	1231 self.file = file

	1232 self.line = line

	1233 self.str = str

	1234 def __str__(self):

	1235 return self.str

	1236 def __repr__(self):

	1237 return "MiniProduction(%s)" % self.str

	1238

	1239 # Bind the production function name to a callable

	1240 def bind(self,pdict):

	1241 if self.func:

	1242 self.callable = pdict[self.func]

	1243

	1244

	1245 # -----------------------------------------------------------------------------

	1246 # class LRItem

	1247 #

	1248 # This class represents a specific stage of parsing a production rule. For

	1249 # example:

	1250 #

	1251 # expr : expr . PLUS term

	1252 #

	1253 # In the above, the "." represents the current location of the parse. Here

	1254 # basic attributes:

	1255 #

	1256 # name - Name of the production. For example 'expr'

	1257 # prod - A list of symbols on the right side ['expr','.', 'PLUS','te rm']

	1258 # number - Production number.

	1259 #

	1260 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term '

	1261 # then lr_next refers to 'expr -> expr PLUS . term'

	1262 # lr_index - LR item index (location of the ".") in the prod list.

	1263 # lookaheads - LALR lookahead symbols for this item

	1264 # len - Length of the production (number of symbols on right hand s ide)

	1265 # lr_after - List of all productions that immediately follow

	1266 # lr_before - Grammar symbol immediately before

	1267 # -----------------------------------------------------------------------------

	1268

	1269 class LRItem(object):

	1270 def __init__(self,p,n):

	1271 self.name = p.name

	1272 self.prod = list(p.prod)

	1273 self.number = p.number

	1274 self.lr_index = n

	1275 self.lookaheads = { }

	1276 self.prod.insert(n,".")

	1277 self.prod = tuple(self.prod)

	1278 self.len = len(self.prod)

	1279 self.usyms = p.usyms

	1280

	1281 def __str__(self):

	1282 if self.prod:

	1283 s = "%s -> %s" % (self.name," ".join(self.prod))

	1284 else:

	1285 s = "%s -> <empty>" % self.name

	1286 return s

	1287

	1288 def __repr__(self):

	1289 return "LRItem("+str(self)+")"

	1290

	1291 # -----------------------------------------------------------------------------

	1292 # rightmost_terminal()

	1293 #

	1294 # Return the rightmost terminal from a list of symbols. Used in add_production( )

	1295 # -----------------------------------------------------------------------------

	1296 def rightmost_terminal(symbols, terminals):

	1297 i = len(symbols) - 1

	1298 while i >= 0:

	1299 if symbols[i] in terminals:

	1300 return symbols[i]

	1301 i -= 1

	1302 return None

	1303

	1304 # -----------------------------------------------------------------------------

	1305 # === GRAMMAR CLASS ===

	1306 #

	1307 # The following class represents the contents of the specified grammar along

	1308 # with various computed properties such as first sets, follow sets, LR items, et c.

	1309 # This data is used for critical parts of the table generation process later.

	1310 # -----------------------------------------------------------------------------

	1311

	1312 class GrammarError(YaccError): pass

	1313

	1314 class Grammar(object):

	1315 def __init__(self,terminals):

	1316 self.Productions = [None] # A list of all of the productions. The fir st

	1317 # entry is always reserved for the purpose o f

	1318 # building an augmented grammar

	1319

	1320 self.Prodnames = { } # A dictionary mapping the names of nontermi nals to a list of all

	1321 # productions of that nonterminal.

	1322

	1323 self.Prodmap = { } # A dictionary that is only used to detect d uplicate

	1324 # productions.

	1325

	1326 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a

	1327 # list of the rules where they are used.

	1328

	1329 for term in terminals:

	1330 self.Terminals[term] = []

	1331

	1332 self.Terminals['error'] = []

	1333

	1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list

	1335 # of rule numbers where they are used.

	1336

	1337 self.First = { } # A dictionary of precomputed FIRST(x) symbo ls

	1338

	1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symb ols

	1340

	1341 self.Precedence = { } # Precedence rules for each terminal. Contai ns tuples of the

	1342 # form ('right',level) or ('nonassoc', level ) or ('left',level)

	1343

	1344 self.UsedPrecedence = { } # Precedence rules that were actually used b y the grammer.

	1345 # This is only used to provide error checkin g and to generate

	1346 # a warning about unused precedence rules.

	1347

	1348 self.Start = None # Starting symbol for the grammar

	1349

	1350

	1351 def __len__(self):

	1352 return len(self.Productions)

	1353

	1354 def __getitem__(self,index):

	1355 return self.Productions[index]

	1356

	1357 # -------------------------------------------------------------------------- ---

	1358 # set_precedence()

	1359 #

	1360 # Sets the precedence for a given terminal. assoc is the associativity such as

	1361 # 'left','right', or 'nonassoc'. level is a numeric level.

	1362 #

	1363 # -------------------------------------------------------------------------- ---

	1364

	1365 def set_precedence(self,term,assoc,level):

	1366 assert self.Productions == [None],"Must call set_precedence() before add _production()"

	1367 if term in self.Precedence:

	1368 raise GrammarError("Precedence already specified for terminal '%s'" % term)

	1369 if assoc not in ['left','right','nonassoc']:

	1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

	1371 self.Precedence[term] = (assoc,level)

	1372

	1373 # -------------------------------------------------------------------------- ---

	1374 # add_production()

	1375 #

	1376 # Given an action function, this function assembles a production rule and

	1377 # computes its precedence level.

	1378 #

	1379 # The production rule is supplied as a list of symbols. For example,

	1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

	1381 # symbols ['expr','PLUS','term'].

	1382 #

	1383 # Precedence is determined by the precedence of the right-most non-terminal

	1384 # or the precedence of a terminal specified by %prec.

	1385 #

	1386 # A variety of error checks are performed to make sure production symbols

	1387 # are valid and that %prec is used correctly.

	1388 # -------------------------------------------------------------------------- ---

	1389

	1390 def add_production(self,prodname,syms,func=None,file='',line=0):

	1391

	1392 if prodname in self.Terminals:

	1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined a s a token" % (file,line,prodname))

	1394 if prodname == 'error':

	1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserv ed word" % (file,line,prodname))

	1396 if not _is_identifier.match(prodname):

	1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prod name))

	1398

	1399 # Look for literal tokens

	1400 for n,s in enumerate(syms):

	1401 if s[0] in "'\"":

	1402 try:

	1403 c = eval(s)

	1404 if (len(c) > 1):

	1405 raise GrammarError("%s:%d: Literal token %s in rule '% s' may only be a single character" % (file,line,s, prodname))

	1406 if not c in self.Terminals:

	1407 self.Terminals[c] = []

	1408 syms[n] = c

	1409 continue

	1410 except SyntaxError:

	1411 pass

	1412 if not _is_identifier.match(s) and s != '%prec':

	1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (fi le,line,s, prodname))

	1414

	1415 # Determine the precedence level

	1416 if '%prec' in syms:

	1417 if syms[-1] == '%prec':

	1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))

	1419 if syms[-2] != '%prec':

	1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))

	1421 precname = syms[-1]

	1422 prodprec = self.Precedence.get(precname,None)

	1423 if not prodprec:

	1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))

	1425 else:

	1426 self.UsedPrecedence[precname] = 1

	1427 del syms[-2:] # Drop %prec from the rule

	1428 else:

	1429 # If no %prec, precedence is determined by the rightmost terminal sy mbol

	1430 precname = rightmost_terminal(syms,self.Terminals)

	1431 prodprec = self.Precedence.get(precname,('right',0))

	1432

	1433 # See if the rule is already in the rulemap

	1434 map = "%s -> %s" % (prodname,syms)

	1435 if map in self.Prodmap:

	1436 m = self.Prodmap[map]

	1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +

	1438 "Previous definition at %s:%d" % (m.file, m.line) )

	1439

	1440 # From this point on, everything is valid. Create a new Production inst ance

	1441 pnumber = len(self.Productions)

	1442 if not prodname in self.Nonterminals:

	1443 self.Nonterminals[prodname] = [ ]

	1444

	1445 # Add the production number to Terminals and Nonterminals

	1446 for t in syms:

	1447 if t in self.Terminals:

	1448 self.Terminals[t].append(pnumber)

	1449 else:

	1450 if not t in self.Nonterminals:

	1451 self.Nonterminals[t] = [ ]

	1452 self.Nonterminals[t].append(pnumber)

	1453

	1454 # Create a production and add it to the list of productions

	1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line)

	1456 self.Productions.append(p)

	1457 self.Prodmap[map] = p

	1458

	1459 # Add to the global productions list

	1460 try:

	1461 self.Prodnames[prodname].append(p)

	1462 except KeyError:

	1463 self.Prodnames[prodname] = [ p ]

	1464 return 0

	1465

	1466 # -------------------------------------------------------------------------- ---

	1467 # set_start()

	1468 #

	1469 # Sets the starting symbol and creates the augmented grammar. Production

	1470 # rule 0 is S' -> start where start is the start symbol.

	1471 # -------------------------------------------------------------------------- ---

	1472

	1473 def set_start(self,start=None):

	1474 if not start:

	1475 start = self.Productions[1].name

	1476 if start not in self.Nonterminals:

	1477 raise GrammarError("start symbol %s undefined" % start)

	1478 self.Productions[0] = Production(0,"S'",[start])

	1479 self.Nonterminals[start].append(0)

	1480 self.Start = start

	1481

	1482 # -------------------------------------------------------------------------- ---

	1483 # find_unreachable()

	1484 #

	1485 # Find all of the nonterminal symbols that can't be reached from the startin g

	1486 # symbol. Returns a list of nonterminals that can't be reached.

	1487 # -------------------------------------------------------------------------- ---

	1488

	1489 def find_unreachable(self):

	1490

	1491 # Mark all symbols that are reachable from a symbol s

	1492 def mark_reachable_from(s):

	1493 if reachable[s]:

	1494 # We've already reached symbol s.

	1495 return

	1496 reachable[s] = 1

	1497 for p in self.Prodnames.get(s,[]):

	1498 for r in p.prod:

	1499 mark_reachable_from(r)

	1500

	1501 reachable = { }

	1502 for s in list(self.Terminals) + list(self.Nonterminals):

	1503 reachable[s] = 0

	1504

	1505 mark_reachable_from( self.Productions[0].prod[0] )

	1506

	1507 return [s for s in list(self.Nonterminals)

	1508 if not reachable[s]]

	1509

	1510 # -------------------------------------------------------------------------- ---

	1511 # infinite_cycles()

	1512 #

	1513 # This function looks at the various parsing rules and tries to detect

	1514 # infinite recursion cycles (grammar rules where there is no possible way

	1515 # to derive a string of only terminals).

	1516 # -------------------------------------------------------------------------- ---

	1517

	1518 def infinite_cycles(self):

	1519 terminates = {}

	1520

	1521 # Terminals:

	1522 for t in self.Terminals:

	1523 terminates[t] = 1

	1524

	1525 terminates['$end'] = 1

	1526

	1527 # Nonterminals:

	1528

	1529 # Initialize to false:

	1530 for n in self.Nonterminals:

	1531 terminates[n] = 0

	1532

	1533 # Then propagate termination until no change:

	1534 while 1:

	1535 some_change = 0

	1536 for (n,pl) in self.Prodnames.items():

	1537 # Nonterminal n terminates iff any of its productions terminates .

	1538 for p in pl:

	1539 # Production p terminates iff all of its rhs symbols termina te.

	1540 for s in p.prod:

	1541 if not terminates[s]:

	1542 # The symbol s does not terminate,

	1543 # so production p does not terminate.

	1544 p_terminates = 0

	1545 break

	1546 else:

	1547 # didn't break from the loop,

	1548 # so every symbol s terminates

	1549 # so production p terminates.

	1550 p_terminates = 1

	1551

	1552 if p_terminates:

	1553 # symbol n terminates!

	1554 if not terminates[n]:

	1555 terminates[n] = 1

	1556 some_change = 1

	1557 # Don't need to consider any more productions for this n .

	1558 break

	1559

	1560 if not some_change:

	1561 break

	1562

	1563 infinite = []

	1564 for (s,term) in terminates.items():

	1565 if not term:

	1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

	1567 # s is used-but-not-defined, and we've already warned of tha t,

	1568 # so it would be overkill to say that it's also non-terminat ing.

	1569 pass

	1570 else:

	1571 infinite.append(s)

	1572

	1573 return infinite

	1574

	1575

	1576 # -------------------------------------------------------------------------- ---

	1577 # undefined_symbols()

	1578 #

	1579 # Find all symbols that were used the grammar, but not defined as tokens or

	1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symb ol

	1581 # and prod is the production where the symbol was used.

	1582 # -------------------------------------------------------------------------- ---

	1583 def undefined_symbols(self):

	1584 result = []

	1585 for p in self.Productions:

	1586 if not p: continue

	1587

	1588 for s in p.prod:

	1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

	1590 result.append((s,p))

	1591 return result

	1592

	1593 # -------------------------------------------------------------------------- ---

	1594 # unused_terminals()

	1595 #

	1596 # Find all terminals that were defined, but not used by the grammar. Return s

	1597 # a list of all symbols.

	1598 # -------------------------------------------------------------------------- ---

	1599 def unused_terminals(self):

	1600 unused_tok = []

	1601 for s,v in self.Terminals.items():

	1602 if s != 'error' and not v:

	1603 unused_tok.append(s)

	1604

	1605 return unused_tok

	1606

	1607 # -------------------------------------------------------------------------- ----

	1608 # unused_rules()

	1609 #

	1610 # Find all grammar rules that were defined, but not used (maybe not reachab le)

	1611 # Returns a list of productions.

	1612 # -------------------------------------------------------------------------- ----

	1613

	1614 def unused_rules(self):

	1615 unused_prod = []

	1616 for s,v in self.Nonterminals.items():

	1617 if not v:

	1618 p = self.Prodnames[s][0]

	1619 unused_prod.append(p)

	1620 return unused_prod

	1621

	1622 # -------------------------------------------------------------------------- ---

	1623 # unused_precedence()

	1624 #

	1625 # Returns a list of tuples (term,precedence) corresponding to precedence

	1626 # rules that were never used by the grammar. term is the name of the termin al

	1627 # on which precedence was applied and precedence is a string such as 'left' or

	1628 # 'right' corresponding to the type of precedence.

	1629 # -------------------------------------------------------------------------- ---

	1630

	1631 def unused_precedence(self):

	1632 unused = []

	1633 for termname in self.Precedence:

	1634 if not (termname in self.Terminals or termname in self.UsedPrecedenc e):

	1635 unused.append((termname,self.Precedence[termname][0]))

	1636

	1637 return unused

	1638

	1639 # -------------------------------------------------------------------------

	1640 # _first()

	1641 #

	1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.

	1643 #

	1644 # During execution of compute_first1, the result may be incomplete.

	1645 # Afterward (e.g., when called from compute_follow()), it will be complete.

	1646 # -------------------------------------------------------------------------

	1647 def _first(self,beta):

	1648

	1649 # We are computing First(x1,x2,x3,...,xn)

	1650 result = [ ]

	1651 for x in beta:

	1652 x_produces_empty = 0

	1653

	1654 # Add all the non-<empty> symbols of First[x] to the result.

	1655 for f in self.First[x]:

	1656 if f == '<empty>':

	1657 x_produces_empty = 1

	1658 else:

	1659 if f not in result: result.append(f)

	1660

	1661 if x_produces_empty:

	1662 # We have to consider the next x in beta,

	1663 # i.e. stay in the loop.

	1664 pass

	1665 else:

	1666 # We don't have to consider any further symbols in beta.

	1667 break

	1668 else:

	1669 # There was no 'break' from the loop,

	1670 # so x_produces_empty was true for all x in beta,

	1671 # so beta produces empty as well.

	1672 result.append('<empty>')

	1673

	1674 return result

	1675

	1676 # -------------------------------------------------------------------------

	1677 # compute_first()

	1678 #

	1679 # Compute the value of FIRST1(X) for all symbols

	1680 # -------------------------------------------------------------------------

	1681 def compute_first(self):

	1682 if self.First:

	1683 return self.First

	1684

	1685 # Terminals:

	1686 for t in self.Terminals:

	1687 self.First[t] = [t]

	1688

	1689 self.First['$end'] = ['$end']

	1690

	1691 # Nonterminals:

	1692

	1693 # Initialize to the empty set:

	1694 for n in self.Nonterminals:

	1695 self.First[n] = []

	1696

	1697 # Then propagate symbols until no change:

	1698 while 1:

	1699 some_change = 0

	1700 for n in self.Nonterminals:

	1701 for p in self.Prodnames[n]:

	1702 for f in self._first(p.prod):

	1703 if f not in self.First[n]:

	1704 self.First[n].append( f )

	1705 some_change = 1

	1706 if not some_change:

	1707 break

	1708

	1709 return self.First

	1710

	1711 # ---------------------------------------------------------------------

	1712 # compute_follow()

	1713 #

	1714 # Computes all of the follow sets for every non-terminal symbol. The

	1715 # follow set is the set of all symbols that might follow a given

	1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189.

	1717 # ---------------------------------------------------------------------

	1718 def compute_follow(self,start=None):

	1719 # If already computed, return the result

	1720 if self.Follow:

	1721 return self.Follow

	1722

	1723 # If first sets not computed yet, do that first.

	1724 if not self.First:

	1725 self.compute_first()

	1726

	1727 # Add '$end' to the follow list of the start symbol

	1728 for k in self.Nonterminals:

	1729 self.Follow[k] = [ ]

	1730

	1731 if not start:

	1732 start = self.Productions[1].name

	1733

	1734 self.Follow[start] = [ '$end' ]

	1735

	1736 while 1:

	1737 didadd = 0

	1738 for p in self.Productions[1:]:

	1739 # Here is the production set

	1740 for i in range(len(p.prod)):

	1741 B = p.prod[i]

	1742 if B in self.Nonterminals:

	1743 # Okay. We got a non-terminal in a production

	1744 fst = self._first(p.prod[i+1:])

	1745 hasempty = 0

	1746 for f in fst:

	1747 if f != '<empty>' and f not in self.Follow[B]:

	1748 self.Follow[B].append(f)

	1749 didadd = 1

	1750 if f == '<empty>':

	1751 hasempty = 1

	1752 if hasempty or i == (len(p.prod)-1):

	1753 # Add elements of follow(a) to follow(b)

	1754 for f in self.Follow[p.name]:

	1755 if f not in self.Follow[B]:

	1756 self.Follow[B].append(f)

	1757 didadd = 1

	1758 if not didadd: break

	1759 return self.Follow

	1760

	1761

	1762 # -------------------------------------------------------------------------- ---

	1763 # build_lritems()

	1764 #

	1765 # This function walks the list of productions and builds a complete set of t he

	1766 # LR items. The LR items are stored in two ways: First, they are uniquely

	1767 # numbered and placed in the list _lritems. Second, a linked list of LR ite ms

	1768 # is built for each production. For example:

	1769 #

	1770 # E -> E PLUS E

	1771 #

	1772 # Creates the list

	1773 #

	1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

	1775 # -------------------------------------------------------------------------- ---

	1776

	1777 def build_lritems(self):

	1778 for p in self.Productions:

	1779 lastlri = p

	1780 i = 0

	1781 lr_items = []

	1782 while 1:

	1783 if i > len(p):

	1784 lri = None

	1785 else:

	1786 lri = LRItem(p,i)

	1787 # Precompute the list of productions immediately following

	1788 try:

	1789 lri.lr_after = self.Prodnames[lri.prod[i+1]]

	1790 except (IndexError,KeyError):

	1791 lri.lr_after = []

	1792 try:

	1793 lri.lr_before = lri.prod[i-1]

	1794 except IndexError:

	1795 lri.lr_before = None

	1796

	1797 lastlri.lr_next = lri

	1798 if not lri: break

	1799 lr_items.append(lri)

	1800 lastlri = lri

	1801 i += 1

	1802 p.lr_items = lr_items

	1803

	1804 # -----------------------------------------------------------------------------

	1805 # == Class LRTable ==

	1806 #

	1807 # This basic class represents a basic table of LR parsing information.

	1808 # Methods for generating the tables are not defined here. They are defined

	1809 # in the derived class LRGeneratedTable.

	1810 # -----------------------------------------------------------------------------

	1811

	1812 class VersionError(YaccError): pass

	1813

	1814 class LRTable(object):

	1815 def __init__(self):

	1816 self.lr_action = None

	1817 self.lr_goto = None

	1818 self.lr_productions = None

	1819 self.lr_method = None

	1820

	1821 def read_table(self,module):

	1822 if isinstance(module,types.ModuleType):

	1823 parsetab = module

	1824 else:

	1825 if sys.version_info[0] < 3:

	1826 exec("import %s as parsetab" % module)

	1827 else:

	1828 env = { }

	1829 exec("import %s as parsetab" % module, env, env)

	1830 parsetab = env['parsetab']

	1831

	1832 if parsetab._tabversion != __tabversion__:

	1833 raise VersionError("yacc table file version is out of date")

	1834

	1835 self.lr_action = parsetab._lr_action

	1836 self.lr_goto = parsetab._lr_goto

	1837

	1838 self.lr_productions = []

	1839 for p in parsetab._lr_productions:

	1840 self.lr_productions.append(MiniProduction(*p))

	1841

	1842 self.lr_method = parsetab._lr_method

	1843 return parsetab._lr_signature

	1844

	1845 def read_pickle(self,filename):

	1846 try:

	1847 import cPickle as pickle

	1848 except ImportError:

	1849 import pickle

	1850

	1851 in_f = open(filename,"rb")

	1852

	1853 tabversion = pickle.load(in_f)

	1854 if tabversion != __tabversion__:

	1855 raise VersionError("yacc table file version is out of date")

	1856 self.lr_method = pickle.load(in_f)

	1857 signature = pickle.load(in_f)

	1858 self.lr_action = pickle.load(in_f)

	1859 self.lr_goto = pickle.load(in_f)

	1860 productions = pickle.load(in_f)

	1861

	1862 self.lr_productions = []

	1863 for p in productions:

	1864 self.lr_productions.append(MiniProduction(*p))

	1865

	1866 in_f.close()

	1867 return signature

	1868

	1869 # Bind all production function names to callable objects in pdict

	1870 def bind_callables(self,pdict):

	1871 for p in self.lr_productions:

	1872 p.bind(pdict)

	1873

	1874 # -----------------------------------------------------------------------------

	1875 # === LR Generator ===

	1876 #

	1877 # The following classes and functions are used to generate LR parsing tables on

	1878 # a grammar.

	1879 # -----------------------------------------------------------------------------

	1880

	1881 # -----------------------------------------------------------------------------

	1882 # digraph()

	1883 # traverse()

	1884 #

	1885 # The following two functions are used to compute set valued functions

	1886 # of the form:

	1887 #

	1888 # F(x) = F'(x) U U{F(y) \| x R y}

	1889 #

	1890 # This is used to compute the values of Read() sets as well as FOLLOW sets

	1891 # in LALR(1) generation.

	1892 #

	1893 # Inputs: X - An input set

	1894 # R - A relation

	1895 # FP - Set-valued function

	1896 # ------------------------------------------------------------------------------

	1897

	1898 def digraph(X,R,FP):

	1899 N = { }

	1900 for x in X:

	1901 N[x] = 0

	1902 stack = []

	1903 F = { }

	1904 for x in X:

	1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)

	1906 return F

	1907

	1908 def traverse(x,N,stack,F,X,R,FP):

	1909 stack.append(x)

	1910 d = len(stack)

	1911 N[x] = d

	1912 F[x] = FP(x) # F(X) <- F'(x)

	1913

	1914 rel = R(x) # Get y's related to x

	1915 for y in rel:

	1916 if N[y] == 0:

	1917 traverse(y,N,stack,F,X,R,FP)

	1918 N[x] = min(N[x],N[y])

	1919 for a in F.get(y,[]):

	1920 if a not in F[x]: F[x].append(a)

	1921 if N[x] == d:

	1922 N[stack[-1]] = MAXINT

	1923 F[stack[-1]] = F[x]

	1924 element = stack.pop()

	1925 while element != x:

	1926 N[stack[-1]] = MAXINT

	1927 F[stack[-1]] = F[x]

	1928 element = stack.pop()

	1929

	1930 class LALRError(YaccError): pass

	1931

	1932 # -----------------------------------------------------------------------------

	1933 # == LRGeneratedTable ==

	1934 #

	1935 # This class implements the LR table generation algorithm. There are no

	1936 # public methods except for write()

	1937 # -----------------------------------------------------------------------------

	1938

	1939 class LRGeneratedTable(LRTable):

	1940 def __init__(self,grammar,method='LALR',log=None):

	1941 if method not in ['SLR','LALR']:

	1942 raise LALRError("Unsupported method %s" % method)

	1943

	1944 self.grammar = grammar

	1945 self.lr_method = method

	1946

	1947 # Set up the logger

	1948 if not log:

	1949 log = NullLogger()

	1950 self.log = log

	1951

	1952 # Internal attributes

	1953 self.lr_action = {} # Action table

	1954 self.lr_goto = {} # Goto table

	1955 self.lr_productions = grammar.Productions # Copy of grammar Producti on array

	1956 self.lr_goto_cache = {} # Cache of computed gotos

	1957 self.lr0_cidhash = {} # Cache of closures

	1958

	1959 self._add_count = 0 # Internal counter used to detect cycles

	1960

	1961 # Diagonistic information filled in by the table generator

	1962 self.sr_conflict = 0

	1963 self.rr_conflict = 0

	1964 self.conflicts = [] # List of conflicts

	1965

	1966 self.sr_conflicts = []

	1967 self.rr_conflicts = []

	1968

	1969 # Build the tables

	1970 self.grammar.build_lritems()

	1971 self.grammar.compute_first()

	1972 self.grammar.compute_follow()

	1973 self.lr_parse_table()

	1974

	1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

	1976

	1977 def lr0_closure(self,I):

	1978 self._add_count += 1

	1979

	1980 # Add everything in I to J

	1981 J = I[:]

	1982 didadd = 1

	1983 while didadd:

	1984 didadd = 0

	1985 for j in J:

	1986 for x in j.lr_after:

	1987 if getattr(x,"lr0_added",0) == self._add_count: continue

	1988 # Add B --> .G to J

	1989 J.append(x.lr_next)

	1990 x.lr0_added = self._add_count

	1991 didadd = 1

	1992

	1993 return J

	1994

	1995 # Compute the LR(0) goto function goto(I,X) where I is a set

	1996 # of LR(0) items and X is a grammar symbol. This function is written

	1997 # in a way that guarantees uniqueness of the generated goto sets

	1998 # (i.e. the same goto set will never be returned as two different Python

	1999 # objects). With uniqueness, we can later do fast set comparisons using

	2000 # id(obj) instead of element-wise comparison.

	2001

	2002 def lr0_goto(self,I,x):

	2003 # First we look for a previously cached entry

	2004 g = self.lr_goto_cache.get((id(I),x),None)

	2005 if g: return g

	2006

	2007 # Now we generate the goto set in a way that guarantees uniqueness

	2008 # of the result

	2009

	2010 s = self.lr_goto_cache.get(x,None)

	2011 if not s:

	2012 s = { }

	2013 self.lr_goto_cache[x] = s

	2014

	2015 gs = [ ]

	2016 for p in I:

	2017 n = p.lr_next

	2018 if n and n.lr_before == x:

	2019 s1 = s.get(id(n),None)

	2020 if not s1:

	2021 s1 = { }

	2022 s[id(n)] = s1

	2023 gs.append(n)

	2024 s = s1

	2025 g = s.get('$end',None)

	2026 if not g:

	2027 if gs:

	2028 g = self.lr0_closure(gs)

	2029 s['$end'] = g

	2030 else:

	2031 s['$end'] = gs

	2032 self.lr_goto_cache[(id(I),x)] = g

	2033 return g

	2034

	2035 # Compute the LR(0) sets of item function

	2036 def lr0_items(self):

	2037

	2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]

	2039 i = 0

	2040 for I in C:

	2041 self.lr0_cidhash[id(I)] = i

	2042 i += 1

	2043

	2044 # Loop over the items in C and each grammar symbols

	2045 i = 0

	2046 while i < len(C):

	2047 I = C[i]

	2048 i += 1

	2049

	2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets

	2051 asyms = { }

	2052 for ii in I:

	2053 for s in ii.usyms:

	2054 asyms[s] = None

	2055

	2056 for x in asyms:

	2057 g = self.lr0_goto(I,x)

	2058 if not g: continue

	2059 if id(g) in self.lr0_cidhash: continue

	2060 self.lr0_cidhash[id(g)] = len(C)

	2061 C.append(g)

	2062

	2063 return C

	2064

	2065 # -------------------------------------------------------------------------- ---

	2066 # ==== LALR(1) Parsing ====

	2067 #

	2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of

	2069 # relying upon Follow() sets when performing reductions, a more selective

	2070 # lookahead set that incorporates the state of the LR(0) machine is utilized .

	2071 # Thus, we mainly just have to focus on calculating the lookahead sets.

	2072 #

	2073 # The method used here is due to DeRemer and Pennelo (1982).

	2074 #

	2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

	2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems ,

	2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649

	2078 #

	2079 # Further details can also be found in:

	2080 #

	2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing ",

	2082 # McGraw-Hill Book Company, (1985).

	2083 #

	2084 # -------------------------------------------------------------------------- ---

	2085

	2086 # -------------------------------------------------------------------------- ---

	2087 # compute_nullable_nonterminals()

	2088 #

	2089 # Creates a dictionary containing all of the non-terminals that might produc e

	2090 # an empty production.

	2091 # -------------------------------------------------------------------------- ---

	2092

	2093 def compute_nullable_nonterminals(self):

	2094 nullable = {}

	2095 num_nullable = 0

	2096 while 1:

	2097 for p in self.grammar.Productions[1:]:

	2098 if p.len == 0:

	2099 nullable[p.name] = 1

	2100 continue

	2101 for t in p.prod:

	2102 if not t in nullable: break

	2103 else:

	2104 nullable[p.name] = 1

	2105 if len(nullable) == num_nullable: break

	2106 num_nullable = len(nullable)

	2107 return nullable

	2108

	2109 # -------------------------------------------------------------------------- ---

	2110 # find_nonterminal_trans(C)

	2111 #

	2112 # Given a set of LR(0) items, this functions finds all of the non-terminal

	2113 # transitions. These are transitions in which a dot appears immediately b efore

	2114 # a non-terminal. Returns a list of tuples of the form (state,N) where sta te

	2115 # is the state number and N is the nonterminal symbol.

	2116 #

	2117 # The input C is the set of LR(0) items.

	2118 # -------------------------------------------------------------------------- ---

	2119

	2120 def find_nonterminal_transitions(self,C):

	2121 trans = []

	2122 for state in range(len(C)):

	2123 for p in C[state]:

	2124 if p.lr_index < p.len - 1:

	2125 t = (state,p.prod[p.lr_index+1])

	2126 if t[1] in self.grammar.Nonterminals:

	2127 if t not in trans: trans.append(t)

	2128 state = state + 1

	2129 return trans

	2130

	2131 # -------------------------------------------------------------------------- ---

	2132 # dr_relation()

	2133 #

	2134 # Computes the DR(p,A) relationships for non-terminal transitions. The inpu t

	2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol .

	2136 #

	2137 # Returns a list of terminals.

	2138 # -------------------------------------------------------------------------- ---

	2139

	2140 def dr_relation(self,C,trans,nullable):

	2141 dr_set = { }

	2142 state,N = trans

	2143 terms = []

	2144

	2145 g = self.lr0_goto(C[state],N)

	2146 for p in g:

	2147 if p.lr_index < p.len - 1:

	2148 a = p.prod[p.lr_index+1]

	2149 if a in self.grammar.Terminals:

	2150 if a not in terms: terms.append(a)

	2151

	2152 # This extra bit is to handle the start state

	2153 if state == 0 and N == self.grammar.Productions[0].prod[0]:

	2154 terms.append('$end')

	2155

	2156 return terms

	2157

	2158 # -------------------------------------------------------------------------- ---

	2159 # reads_relation()

	2160 #

	2161 # Computes the READS() relation (p,A) READS (t,C).

	2162 # -------------------------------------------------------------------------- ---

	2163

	2164 def reads_relation(self,C, trans, empty):

	2165 # Look for empty transitions

	2166 rel = []

	2167 state, N = trans

	2168

	2169 g = self.lr0_goto(C[state],N)

	2170 j = self.lr0_cidhash.get(id(g),-1)

	2171 for p in g:

	2172 if p.lr_index < p.len - 1:

	2173 a = p.prod[p.lr_index + 1]

	2174 if a in empty:

	2175 rel.append((j,a))

	2176

	2177 return rel

	2178

	2179 # -------------------------------------------------------------------------- ---

	2180 # compute_lookback_includes()

	2181 #

	2182 # Determines the lookback and includes relations

	2183 #

	2184 # LOOKBACK:

	2185 #

	2186 # This relation is determined by running the LR(0) state machine forward.

	2187 # For example, starting with a production "N : . A B C", we run it forward

	2188 # to obtain "N : A B C ." We then build a relationship between this final

	2189 # state and the starting state. These relationships are stored in a dictio nary

	2190 # lookdict.

	2191 #

	2192 # INCLUDES:

	2193 #

	2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

	2195 #

	2196 # This relation is used to determine non-terminal transitions that occur

	2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

	2198 # if the following holds:

	2199 #

	2200 # B -> LAT, where T -> epsilon and p' -L-> p

	2201 #

	2202 # L is essentially a prefix (which may be empty), T is a suffix that must be

	2203 # able to derive an empty string. State p' must lead to state p with the st ring L.

	2204 #

	2205 # -------------------------------------------------------------------------- ---

	2206

	2207 def compute_lookback_includes(self,C,trans,nullable):

	2208

	2209 lookdict = {} # Dictionary of lookback relations

	2210 includedict = {} # Dictionary of include relations

	2211

	2212 # Make a dictionary of non-terminal transitions

	2213 dtrans = {}

	2214 for t in trans:

	2215 dtrans[t] = 1

	2216

	2217 # Loop over all transitions and compute lookbacks and includes

	2218 for state,N in trans:

	2219 lookb = []

	2220 includes = []

	2221 for p in C[state]:

	2222 if p.name != N: continue

	2223

	2224 # Okay, we have a name match. We now follow the production all the way

	2225 # through the state machine until we get the . on the right hand side

	2226

	2227 lr_index = p.lr_index

	2228 j = state

	2229 while lr_index < p.len - 1:

	2230 lr_index = lr_index + 1

	2231 t = p.prod[lr_index]

	2232

	2233 # Check to see if this symbol and state are a non-terminal transition

	2234 if (j,t) in dtrans:

	2235 # Yes. Okay, there is some chance that this is an in cludes relation

	2236 # the only way to know for certain is whether the res t of the

	2237 # production derives empty

	2238

	2239 li = lr_index + 1

	2240 while li < p.len:

	2241 if p.prod[li] in self.grammar.Terminals: break # No forget it

	2242 if not p.prod[li] in nullable: break

	2243 li = li + 1

	2244 else:

	2245 # Appears to be a relation between (j,t) and (st ate,N)

	2246 includes.append((j,t))

	2247

	2248 g = self.lr0_goto(C[j],t) # Go to next set

	2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state

	2250

	2251 # When we get here, j is the final state, now we have to locate the production

	2252 for r in C[j]:

	2253 if r.name != p.name: continue

	2254 if r.len != p.len: continue

	2255 i = 0

	2256 # This look is comparing a production ". A B C" with "A B C ."

	2257 while i < r.lr_index:

	2258 if r.prod[i] != p.prod[i+1]: break

	2259 i = i + 1

	2260 else:

	2261 lookb.append((j,r))

	2262 for i in includes:

	2263 if not i in includedict: includedict[i] = []

	2264 includedict[i].append((state,N))

	2265 lookdict[(state,N)] = lookb

	2266

	2267 return lookdict,includedict

	2268

	2269 # -------------------------------------------------------------------------- ---

	2270 # compute_read_sets()

	2271 #

	2272 # Given a set of LR(0) items, this function computes the read sets.

	2273 #

	2274 # Inputs: C = Set of LR(0) items

	2275 # ntrans = Set of nonterminal transitions

	2276 # nullable = Set of empty transitions

	2277 #

	2278 # Returns a set containing the read sets

	2279 # -------------------------------------------------------------------------- ---

	2280

	2281 def compute_read_sets(self,C, ntrans, nullable):

	2282 FP = lambda x: self.dr_relation(C,x,nullable)

	2283 R = lambda x: self.reads_relation(C,x,nullable)

	2284 F = digraph(ntrans,R,FP)

	2285 return F

	2286

	2287 # -------------------------------------------------------------------------- ---

	2288 # compute_follow_sets()

	2289 #

	2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,

	2291 # and an include set, this function computes the follow sets

	2292 #

	2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}

	2294 #

	2295 # Inputs:

	2296 # ntrans = Set of nonterminal transitions

	2297 # readsets = Readset (previously computed)

	2298 # inclsets = Include sets (previously computed)

	2299 #

	2300 # Returns a set containing the follow sets

	2301 # -------------------------------------------------------------------------- ---

	2302

	2303 def compute_follow_sets(self,ntrans,readsets,inclsets):

	2304 FP = lambda x: readsets[x]

	2305 R = lambda x: inclsets.get(x,[])

	2306 F = digraph(ntrans,R,FP)

	2307 return F

	2308

	2309 # -------------------------------------------------------------------------- ---

	2310 # add_lookaheads()

	2311 #

	2312 # Attaches the lookahead symbols to grammar rules.

	2313 #

	2314 # Inputs: lookbacks - Set of lookback relations

	2315 # followset - Computed follow set

	2316 #

	2317 # This function directly attaches the lookaheads to productions contained

	2318 # in the lookbacks set

	2319 # -------------------------------------------------------------------------- ---

	2320

	2321 def add_lookaheads(self,lookbacks,followset):

	2322 for trans,lb in lookbacks.items():

	2323 # Loop over productions in lookback

	2324 for state,p in lb:

	2325 if not state in p.lookaheads:

	2326 p.lookaheads[state] = []

	2327 f = followset.get(trans,[])

	2328 for a in f:

	2329 if a not in p.lookaheads[state]: p.lookaheads[state].appen d(a)

	2330

	2331 # -------------------------------------------------------------------------- ---

	2332 # add_lalr_lookaheads()

	2333 #

	2334 # This function does all of the work of adding lookahead information for use

	2335 # with LALR parsing

	2336 # -------------------------------------------------------------------------- ---

	2337

	2338 def add_lalr_lookaheads(self,C):

	2339 # Determine all of the nullable nonterminals

	2340 nullable = self.compute_nullable_nonterminals()

	2341

	2342 # Find all non-terminal transitions

	2343 trans = self.find_nonterminal_transitions(C)

	2344

	2345 # Compute read sets

	2346 readsets = self.compute_read_sets(C,trans,nullable)

	2347

	2348 # Compute lookback/includes relations

	2349 lookd, included = self.compute_lookback_includes(C,trans,nullable)

	2350

	2351 # Compute LALR FOLLOW sets

	2352 followsets = self.compute_follow_sets(trans,readsets,included)

	2353

	2354 # Add all of the lookaheads

	2355 self.add_lookaheads(lookd,followsets)

	2356

	2357 # -------------------------------------------------------------------------- ---

	2358 # lr_parse_table()

	2359 #

	2360 # This function constructs the parse tables for SLR or LALR

	2361 # -------------------------------------------------------------------------- ---

	2362 def lr_parse_table(self):

	2363 Productions = self.grammar.Productions

	2364 Precedence = self.grammar.Precedence

	2365 goto = self.lr_goto # Goto array

	2366 action = self.lr_action # Action array

	2367 log = self.log # Logger for output

	2368

	2369 actionp = { } # Action production array (temporary)

	2370

	2371 log.info("Parsing method: %s", self.lr_method)

	2372

	2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

	2374 # This determines the number of states

	2375

	2376 C = self.lr0_items()

	2377

	2378 if self.lr_method == 'LALR':

	2379 self.add_lalr_lookaheads(C)

	2380

	2381 # Build the parser table, state by state

	2382 st = 0

	2383 for I in C:

	2384 # Loop over each production in I

	2385 actlist = [ ] # List of actions

	2386 st_action = { }

	2387 st_actionp = { }

	2388 st_goto = { }

	2389 log.info("")

	2390 log.info("state %d", st)

	2391 log.info("")

	2392 for p in I:

	2393 log.info(" (%d) %s", p.number, str(p))

	2394 log.info("")

	2395

	2396 for p in I:

	2397 if p.len == p.lr_index + 1:

	2398 if p.name == "S'":

	2399 # Start symbol. Accept!

	2400 st_action["$end"] = 0

	2401 st_actionp["$end"] = p

	2402 else:

	2403 # We are at the end of a production. Reduce!

	2404 if self.lr_method == 'LALR':

	2405 laheads = p.lookaheads[st]

	2406 else:

	2407 laheads = self.grammar.Follow[p.name]

	2408 for a in laheads:

	2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))

	2410 r = st_action.get(a,None)

	2411 if r is not None:

	2412 # Whoa. Have a shift/reduce or reduce/reduce conflict

	2413 if r > 0:

	2414 # Need to decide on shift or reduce here

	2415 # By default we favor shifting. Need to add

	2416 # some precedence rules here.

	2417 sprec,slevel = Productions[st_actionp[a] .number].prec

	2418 rprec,rlevel = Precedence.get(a,('right' ,0))

	2419 if (slevel < rlevel) or ((slevel == rlev el) and (rprec == 'left')):

	2420 # We really need to reduce here.

	2421 st_action[a] = -p.number

	2422 st_actionp[a] = p

	2423 if not slevel and not rlevel:

	2424 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

	2425 self.sr_conflicts.append((st,a,' reduce'))

	2426 Productions[p.number].reduced += 1

	2427 elif (slevel == rlevel) and (rprec == 'n onassoc'):

	2428 st_action[a] = None

	2429 else:

	2430 # Hmmm. Guess we'll keep the shift

	2431 if not rlevel:

	2432 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

	2433 self.sr_conflicts.append((st,a,' shift'))

	2434 elif r < 0:

	2435 # Reduce/reduce conflict. In this case , we favor the rule

	2436 # that was defined first in the grammar file

	2437 oldp = Productions[-r]

	2438 pp = Productions[p.number]

	2439 if oldp.line > pp.line:

	2440 st_action[a] = -p.number

	2441 st_actionp[a] = p

	2442 chosenp,rejectp = pp,oldp

	2443 Productions[p.number].reduced += 1

	2444 Productions[oldp.number].reduced -= 1

	2445 else:

	2446 chosenp,rejectp = oldp,pp

	2447 self.rr_conflicts.append((st,chosenp,rej ectp))

	2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])

	2449 else:

	2450 raise LALRError("Unknown conflict in sta te %d" % st)

	2451 else:

	2452 st_action[a] = -p.number

	2453 st_actionp[a] = p

	2454 Productions[p.number].reduced += 1

	2455 else:

	2456 i = p.lr_index

	2457 a = p.prod[i+1] # Get symbol right after the "."

	2458 if a in self.grammar.Terminals:

	2459 g = self.lr0_goto(I,a)

	2460 j = self.lr0_cidhash.get(id(g),-1)

	2461 if j >= 0:

	2462 # We are in a shift state

	2463 actlist.append((a,p,"shift and go to state %d" % j))

	2464 r = st_action.get(a,None)

	2465 if r is not None:

	2466 # Whoa have a shift/reduce or shift/shift co nflict

	2467 if r > 0:

	2468 if r != j:

	2469 raise LALRError("Shift/shift conflic t in state %d" % st)

	2470 elif r < 0:

	2471 # Do a precedence check.

	2472 # - if precedence of reduce rule is h igher, we reduce.

	2473 # - if precedence of reduce is same a nd left assoc, we reduce.

	2474 # - otherwise we shift

	2475 rprec,rlevel = Productions[st_actionp[a] .number].prec

	2476 sprec,slevel = Precedence.get(a,('right' ,0))

	2477 if (slevel > rlevel) or ((slevel == rlev el) and (rprec == 'right')):

	2478 # We decide to shift here... highest precedence to shift

	2479 Productions[st_actionp[a].number].re duced -= 1

	2480 st_action[a] = j

	2481 st_actionp[a] = p

	2482 if not rlevel:

	2483 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

	2484 self.sr_conflicts.append((st,a,' shift'))

	2485 elif (slevel == rlevel) and (rprec == 'n onassoc'):

	2486 st_action[a] = None

	2487 else:

	2488 # Hmmm. Guess we'll keep the reduce

	2489 if not slevel and not rlevel:

	2490 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

	2491 self.sr_conflicts.append((st,a,' reduce'))

	2492

	2493 else:

	2494 raise LALRError("Unknown conflict in sta te %d" % st)

	2495 else:

	2496 st_action[a] = j

	2497 st_actionp[a] = p

	2498

	2499 # Print the actions associated with each terminal

	2500 _actprint = { }

	2501 for a,p,m in actlist:

	2502 if a in st_action:

	2503 if p is st_actionp[a]:

	2504 log.info(" %-15s %s",a,m)

	2505 _actprint[(a,m)] = 1

	2506 log.info("")

	2507 # Print the actions that were not used. (debugging)

	2508 not_used = 0

	2509 for a,p,m in actlist:

	2510 if a in st_action:

	2511 if p is not st_actionp[a]:

	2512 if not (a,m) in _actprint:

	2513 log.debug(" ! %-15s [ %s ]",a,m)

	2514 not_used = 1

	2515 _actprint[(a,m)] = 1

	2516 if not_used:

	2517 log.debug("")

	2518

	2519 # Construct the goto table for this state

	2520

	2521 nkeys = { }

	2522 for ii in I:

	2523 for s in ii.usyms:

	2524 if s in self.grammar.Nonterminals:

	2525 nkeys[s] = None

	2526 for n in nkeys:

	2527 g = self.lr0_goto(I,n)

	2528 j = self.lr0_cidhash.get(id(g),-1)

	2529 if j >= 0:

	2530 st_goto[n] = j

	2531 log.info(" %-30s shift and go to state %d",n,j)

	2532

	2533 action[st] = st_action

	2534 actionp[st] = st_actionp

	2535 goto[st] = st_goto

	2536 st += 1

	2537

	2538

	2539 # -------------------------------------------------------------------------- ---

	2540 # write()

	2541 #

	2542 # This function writes the LR parsing tables to a file

	2543 # -------------------------------------------------------------------------- ---

	2544

	2545 def write_table(self,modulename,outputdir='',signature=""):

	2546 basemodulename = modulename.split(".")[-1]

	2547 filename = os.path.join(outputdir,basemodulename) + ".py"

	2548 try:

	2549 f = open(filename,"w")

	2550

	2551 f.write("""

	2552 # %s

	2553 # This file is automatically generated. Do not edit.

	2554 _tabversion = %r

	2555

	2556 _lr_method = %r

	2557

	2558 _lr_signature = %r

	2559 """ % (filename, __tabversion__, self.lr_method, signature))

	2560

	2561 # Change smaller to 0 to go back to original tables

	2562 smaller = 1

	2563

	2564 # Factor out names to try and make smaller

	2565 if smaller:

	2566 items = { }

	2567

	2568 for s,nd in self.lr_action.items():

	2569 for name,v in nd.items():

	2570 i = items.get(name)

	2571 if not i:

	2572 i = ([],[])

	2573 items[name] = i

	2574 i[0].append(s)

	2575 i[1].append(v)

	2576

	2577 f.write("\n_lr_action_items = {")

	2578 for k,v in items.items():

	2579 f.write("%r:([" % k)

	2580 for i in v[0]:

	2581 f.write("%r," % i)

	2582 f.write("],[")

	2583 for i in v[1]:

	2584 f.write("%r," % i)

	2585

	2586 f.write("]),")

	2587 f.write("}\n")

	2588

	2589 f.write("""

	2590 _lr_action = { }

	2591 for _k, _v in _lr_action_items.items():

	2592 for _x,_y in zip(_v[0],_v[1]):

	2593 if not _x in _lr_action: _lr_action[_x] = { }

	2594 _lr_action[_x][_k] = _y

	2595 del _lr_action_items

	2596 """)

	2597

	2598 else:

	2599 f.write("\n_lr_action = { ");

	2600 for k,v in self.lr_action.items():

	2601 f.write("(%r,%r):%r," % (k[0],k[1],v))

	2602 f.write("}\n");

	2603

	2604 if smaller:

	2605 # Factor out names to try and make smaller

	2606 items = { }

	2607

	2608 for s,nd in self.lr_goto.items():

	2609 for name,v in nd.items():

	2610 i = items.get(name)

	2611 if not i:

	2612 i = ([],[])

	2613 items[name] = i

	2614 i[0].append(s)

	2615 i[1].append(v)

	2616

	2617 f.write("\n_lr_goto_items = {")

	2618 for k,v in items.items():

	2619 f.write("%r:([" % k)

	2620 for i in v[0]:

	2621 f.write("%r," % i)

	2622 f.write("],[")

	2623 for i in v[1]:

	2624 f.write("%r," % i)

	2625

	2626 f.write("]),")

	2627 f.write("}\n")

	2628

	2629 f.write("""

	2630 _lr_goto = { }

	2631 for _k, _v in _lr_goto_items.items():

	2632 for _x,_y in zip(_v[0],_v[1]):

	2633 if not _x in _lr_goto: _lr_goto[_x] = { }

	2634 _lr_goto[_x][_k] = _y

	2635 del _lr_goto_items

	2636 """)

	2637 else:

	2638 f.write("\n_lr_goto = { ");

	2639 for k,v in self.lr_goto.items():

	2640 f.write("(%r,%r):%r," % (k[0],k[1],v))

	2641 f.write("}\n");

	2642

	2643 # Write production table

	2644 f.write("_lr_productions = [\n")

	2645 for p in self.lr_productions:

	2646 if p.func:

	2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p .func,p.file,p.line))

	2648 else:

	2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p .len))

	2650 f.write("]\n")

	2651 f.close()

	2652

	2653 except IOError:

	2654 e = sys.exc_info()[1]

	2655 sys.stderr.write("Unable to create '%s'\n" % filename)

	2656 sys.stderr.write(str(e)+"\n")

	2657 return

	2658

	2659

	2660 # -------------------------------------------------------------------------- ---

	2661 # pickle_table()

	2662 #

	2663 # This function pickles the LR parsing tables to a supplied file object

	2664 # -------------------------------------------------------------------------- ---

	2665

	2666 def pickle_table(self,filename,signature=""):

	2667 try:

	2668 import cPickle as pickle

	2669 except ImportError:

	2670 import pickle

	2671 outf = open(filename,"wb")

	2672 pickle.dump(__tabversion__,outf,pickle_protocol)

	2673 pickle.dump(self.lr_method,outf,pickle_protocol)

	2674 pickle.dump(signature,outf,pickle_protocol)

	2675 pickle.dump(self.lr_action,outf,pickle_protocol)

	2676 pickle.dump(self.lr_goto,outf,pickle_protocol)

	2677

	2678 outp = []

	2679 for p in self.lr_productions:

	2680 if p.func:

	2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))

	2682 else:

	2683 outp.append((str(p),p.name,p.len,None,None,None))

	2684 pickle.dump(outp,outf,pickle_protocol)

	2685 outf.close()

	2686

	2687 # -----------------------------------------------------------------------------

	2688 # === INTROSPECTION ===

	2689 #

	2690 # The following functions and classes are used to implement the PLY

	2691 # introspection features followed by the yacc() function itself.

	2692 # -----------------------------------------------------------------------------

	2693

	2694 # -----------------------------------------------------------------------------

	2695 # get_caller_module_dict()

	2696 #

	2697 # This function returns a dictionary containing all of the symbols defined withi n

	2698 # a caller further down the call stack. This is used to get the environment

	2699 # associated with the yacc() call if none was provided.

	2700 # -----------------------------------------------------------------------------

	2701

	2702 def get_caller_module_dict(levels):

	2703 try:

	2704 raise RuntimeError

	2705 except RuntimeError:

	2706 e,b,t = sys.exc_info()

	2707 f = t.tb_frame

	2708 while levels > 0:

	2709 f = f.f_back

	2710 levels -= 1

	2711 ldict = f.f_globals.copy()

	2712 if f.f_globals != f.f_locals:

	2713 ldict.update(f.f_locals)

	2714

	2715 return ldict

	2716

	2717 # -----------------------------------------------------------------------------

	2718 # parse_grammar()

	2719 #

	2720 # This takes a raw grammar rule string and parses it into production data

	2721 # -----------------------------------------------------------------------------

	2722 def parse_grammar(doc,file,line):

	2723 grammar = []

	2724 # Split the doc string into lines

	2725 pstrings = doc.splitlines()

	2726 lastp = None

	2727 dline = line

	2728 for ps in pstrings:

	2729 dline += 1

	2730 p = ps.split()

	2731 if not p: continue

	2732 try:

	2733 if p[0] == '\|':

	2734 # This is a continuation of a previous rule

	2735 if not lastp:

	2736 raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))

	2737 prodname = lastp

	2738 syms = p[1:]

	2739 else:

	2740 prodname = p[0]

	2741 lastp = prodname

	2742 syms = p[2:]

	2743 assign = p[1]

	2744 if assign != ':' and assign != '::=':

	2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (fil e,dline))

	2746

	2747 grammar.append((file,dline,prodname,syms))

	2748 except SyntaxError:

	2749 raise

	2750 except Exception:

	2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,p s.strip()))

	2752

	2753 return grammar

	2754

	2755 # -----------------------------------------------------------------------------

	2756 # ParserReflect()

	2757 #

	2758 # This class represents information extracted for building a parser including

	2759 # start symbol, error function, tokens, precedence list, action functions,

	2760 # etc.

	2761 # -----------------------------------------------------------------------------

	2762 class ParserReflect(object):

	2763 def __init__(self,pdict,log=None):

	2764 self.pdict = pdict

	2765 self.start = None

	2766 self.error_func = None

	2767 self.tokens = None

	2768 self.files = {}

	2769 self.grammar = []

	2770 self.error = 0

	2771

	2772 if log is None:

	2773 self.log = PlyLogger(sys.stderr)

	2774 else:

	2775 self.log = log

	2776

	2777 # Get all of the basic information

	2778 def get_all(self):

	2779 self.get_start()

	2780 self.get_error_func()

	2781 self.get_tokens()

	2782 self.get_precedence()

	2783 self.get_pfunctions()

	2784

	2785 # Validate all of the information

	2786 def validate_all(self):

	2787 self.validate_start()

	2788 self.validate_error_func()

	2789 self.validate_tokens()

	2790 self.validate_precedence()

	2791 self.validate_pfunctions()

	2792 self.validate_files()

	2793 return self.error

	2794

	2795 # Compute a signature over the grammar

	2796 def signature(self):

	2797 try:

	2798 from hashlib import md5

	2799 except ImportError:

	2800 from md5 import md5

	2801 try:

	2802 sig = md5()

	2803 if self.start:

	2804 sig.update(self.start.encode('latin-1'))

	2805 if self.prec:

	2806 sig.update("".join(["".join(p) for p in self.prec]).encode('lati n-1'))

	2807 if self.tokens:

	2808 sig.update(" ".join(self.tokens).encode('latin-1'))

	2809 for f in self.pfuncs:

	2810 if f[3]:

	2811 sig.update(f[3].encode('latin-1'))

	2812 except (TypeError,ValueError):

	2813 pass

	2814 return sig.digest()

	2815

	2816 # -------------------------------------------------------------------------- ---

	2817 # validate_file()

	2818 #

	2819 # This method checks to see if there are duplicated p_rulename() functions

	2820 # in the parser module file. Without this function, it is really easy for

	2821 # users to make mistakes by cutting and pasting code fragments (and it's a r eal

	2822 # bugger to try and figure out why the resulting parser doesn't work). Ther efore,

	2823 # we just do a little regular expression pattern matching of def statements

	2824 # to try and detect duplicates.

	2825 # -------------------------------------------------------------------------- ---

	2826

	2827 def validate_files(self):

	2828 # Match def p_funcname(

	2829 fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')

	2830

	2831 for filename in self.files.keys():

	2832 base,ext = os.path.splitext(filename)

	2833 if ext != '.py': return 1 # No idea. Assume it's okay.

	2834

	2835 try:

	2836 f = open(filename)

	2837 lines = f.readlines()

	2838 f.close()

	2839 except IOError:

	2840 continue

	2841

	2842 counthash = { }

	2843 for linen,l in enumerate(lines):

	2844 linen += 1

	2845 m = fre.match(l)

	2846 if m:

	2847 name = m.group(1)

	2848 prev = counthash.get(name)

	2849 if not prev:

	2850 counthash[name] = linen

	2851 else:

	2852 self.log.warning("%s:%d: Function %s redefined. Previous ly defined on line %d", filename,linen,name,prev)

	2853

	2854 # Get the start symbol

	2855 def get_start(self):

	2856 self.start = self.pdict.get('start')

	2857

	2858 # Validate the start symbol

	2859 def validate_start(self):

	2860 if self.start is not None:

	2861 if not isinstance(self.start,str):

	2862 self.log.error("'start' must be a string")

	2863

	2864 # Look for error handler

	2865 def get_error_func(self):

	2866 self.error_func = self.pdict.get('p_error')

	2867

	2868 # Validate the error function

	2869 def validate_error_func(self):

	2870 if self.error_func:

	2871 if isinstance(self.error_func,types.FunctionType):

	2872 ismethod = 0

	2873 elif isinstance(self.error_func, types.MethodType):

	2874 ismethod = 1

	2875 else:

	2876 self.log.error("'p_error' defined, but is not a function or meth od")

	2877 self.error = 1

	2878 return

	2879

	2880 eline = func_code(self.error_func).co_firstlineno

	2881 efile = func_code(self.error_func).co_filename

	2882 self.files[efile] = 1

	2883

	2884 if (func_code(self.error_func).co_argcount != 1+ismethod):

	2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,elin e)

	2886 self.error = 1

	2887

	2888 # Get the tokens map

	2889 def get_tokens(self):

	2890 tokens = self.pdict.get("tokens",None)

	2891 if not tokens:

	2892 self.log.error("No token list is defined")

	2893 self.error = 1

	2894 return

	2895

	2896 if not isinstance(tokens,(list, tuple)):

	2897 self.log.error("tokens must be a list or tuple")

	2898 self.error = 1

	2899 return

	2900

	2901 if not tokens:

	2902 self.log.error("tokens is empty")

	2903 self.error = 1

	2904 return

	2905

	2906 self.tokens = tokens

	2907

	2908 # Validate the tokens

	2909 def validate_tokens(self):

	2910 # Validate the tokens.

	2911 if 'error' in self.tokens:

	2912 self.log.error("Illegal token name 'error'. Is a reserved word")

	2913 self.error = 1

	2914 return

	2915

	2916 terminals = {}

	2917 for n in self.tokens:

	2918 if n in terminals:

	2919 self.log.warning("Token '%s' multiply defined", n)

	2920 terminals[n] = 1

	2921

	2922 # Get the precedence map (if any)

	2923 def get_precedence(self):

	2924 self.prec = self.pdict.get("precedence",None)

	2925

	2926 # Validate and parse the precedence map

	2927 def validate_precedence(self):

	2928 preclist = []

	2929 if self.prec:

	2930 if not isinstance(self.prec,(list,tuple)):

	2931 self.log.error("precedence must be a list or tuple")

	2932 self.error = 1

	2933 return

	2934 for level,p in enumerate(self.prec):

	2935 if not isinstance(p,(list,tuple)):

	2936 self.log.error("Bad precedence table")

	2937 self.error = 1

	2938 return

	2939

	2940 if len(p) < 2:

	2941 self.log.error("Malformed precedence entry %s. Must be (asso c, term, ..., term)",p)

	2942 self.error = 1

	2943 return

	2944 assoc = p[0]

	2945 if not isinstance(assoc,str):

	2946 self.log.error("precedence associativity must be a string")

	2947 self.error = 1

	2948 return

	2949 for term in p[1:]:

	2950 if not isinstance(term,str):

	2951 self.log.error("precedence items must be strings")

	2952 self.error = 1

	2953 return

	2954 preclist.append((term,assoc,level+1))

	2955 self.preclist = preclist

	2956

	2957 # Get all p_functions from the grammar

	2958 def get_pfunctions(self):

	2959 p_functions = []

	2960 for name, item in self.pdict.items():

	2961 if name[:2] != 'p_': continue

	2962 if name == 'p_error': continue

	2963 if isinstance(item,(types.FunctionType,types.MethodType)):

	2964 line = func_code(item).co_firstlineno

	2965 file = func_code(item).co_filename

	2966 p_functions.append((line,file,name,item.__doc__))

	2967

	2968 # Sort all of the actions by line number

	2969 p_functions.sort()

	2970 self.pfuncs = p_functions

	2971

	2972

	2973 # Validate all of the p_functions

	2974 def validate_pfunctions(self):

	2975 grammar = []

	2976 # Check for non-empty symbols

	2977 if len(self.pfuncs) == 0:

	2978 self.log.error("no rules of the form p_rulename are defined")

	2979 self.error = 1

	2980 return

	2981

	2982 for line, file, name, doc in self.pfuncs:

	2983 func = self.pdict[name]

	2984 if isinstance(func, types.MethodType):

	2985 reqargs = 2

	2986 else:

	2987 reqargs = 1

	2988 if func_code(func).co_argcount > reqargs:

	2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,li ne,func.__name__)

	2990 self.error = 1

	2991 elif func_code(func).co_argcount < reqargs:

	2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line ,func.__name__)

	2993 self.error = 1

	2994 elif not func.__doc__:

	2995 self.log.warning("%s:%d: No documentation string specified in fu nction '%s' (ignored)",file,line,func.__name__)

	2996 else:

	2997 try:

	2998 parsed_g = parse_grammar(doc,file,line)

	2999 for g in parsed_g:

	3000 grammar.append((name, g))

	3001 except SyntaxError:

	3002 e = sys.exc_info()[1]

	3003 self.log.error(str(e))

	3004 self.error = 1

	3005

	3006 # Looks like a valid grammar rule

	3007 # Mark the file in which defined.

	3008 self.files[file] = 1

	3009

	3010 # Secondary validation step that looks for p_ definitions that are not f unctions

	3011 # or functions that look like they might be grammar rules.

	3012

	3013 for n,v in self.pdict.items():

	3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.Metho dType)): continue

	3015 if n[0:2] == 't_': continue

	3016 if n[0:2] == 'p_' and n != 'p_error':

	3017 self.log.warning("'%s' not defined as a function", n)

	3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount = = 1) or

	3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):

	3020 try:

	3021 doc = v.__doc__.split(" ")

	3022 if doc[1] == ':':

	3023 self.log.warning("%s:%d: Possible grammar rule '%s' defi ned without p_ prefix",

	3024 func_code(v).co_filename, func_code(v). co_firstlineno,n)

	3025 except Exception:

	3026 pass

	3027

	3028 self.grammar = grammar

	3029

	3030 # -----------------------------------------------------------------------------

	3031 # yacc(module)

	3032 #

	3033 # Build a parser

	3034 # -----------------------------------------------------------------------------

	3035

	3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star t=None,

	3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,out putdir='',

	3038 debuglog=None, errorlog = None, picklefile=None):

	3039

	3040 global parse # Reference to the parsing method of the last b uilt parser

	3041

	3042 # If pickling is enabled, table files are not created

	3043

	3044 if picklefile:

	3045 write_tables = 0

	3046

	3047 if errorlog is None:

	3048 errorlog = PlyLogger(sys.stderr)

	3049

	3050 # Get the module dictionary used for the parser

	3051 if module:

	3052 _items = [(k,getattr(module,k)) for k in dir(module)]

	3053 pdict = dict(_items)

	3054 else:

	3055 pdict = get_caller_module_dict(2)

	3056

	3057 # Collect parser information from the dictionary

	3058 pinfo = ParserReflect(pdict,log=errorlog)

	3059 pinfo.get_all()

	3060

	3061 if pinfo.error:

	3062 raise YaccError("Unable to build parser")

	3063

	3064 # Check signature against table files (if any)

	3065 signature = pinfo.signature()

	3066

	3067 # Read the tables

	3068 try:

	3069 lr = LRTable()

	3070 if picklefile:

	3071 read_signature = lr.read_pickle(picklefile)

	3072 else:

	3073 read_signature = lr.read_table(tabmodule)

	3074 if optimize or (read_signature == signature):

	3075 try:

	3076 lr.bind_callables(pinfo.pdict)

	3077 parser = LRParser(lr,pinfo.error_func)

	3078 parse = parser.parse

	3079 return parser

	3080 except Exception:

	3081 e = sys.exc_info()[1]

	3082 errorlog.warning("There was a problem loading the table file: %s ", repr(e))

	3083 except VersionError:

	3084 e = sys.exc_info()

	3085 errorlog.warning(str(e))

	3086 except Exception:

	3087 pass

	3088

	3089 if debuglog is None:

	3090 if debug:

	3091 debuglog = PlyLogger(open(debugfile,"w"))

	3092 else:

	3093 debuglog = NullLogger()

	3094

	3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __ver sion__)

	3096

	3097

	3098 errors = 0

	3099

	3100 # Validate the parser information

	3101 if pinfo.validate_all():

	3102 raise YaccError("Unable to build parser")

	3103

	3104 if not pinfo.error_func:

	3105 errorlog.warning("no p_error() function is defined")

	3106

	3107 # Create a grammar object

	3108 grammar = Grammar(pinfo.tokens)

	3109

	3110 # Set precedence level for terminals

	3111 for term, assoc, level in pinfo.preclist:

	3112 try:

	3113 grammar.set_precedence(term,assoc,level)

	3114 except GrammarError:

	3115 e = sys.exc_info()[1]

	3116 errorlog.warning("%s",str(e))

	3117

	3118 # Add productions to the grammar

	3119 for funcname, gram in pinfo.grammar:

	3120 file, line, prodname, syms = gram

	3121 try:

	3122 grammar.add_production(prodname,syms,funcname,file,line)

	3123 except GrammarError:

	3124 e = sys.exc_info()[1]

	3125 errorlog.error("%s",str(e))

	3126 errors = 1

	3127

	3128 # Set the grammar start symbols

	3129 try:

	3130 if start is None:

	3131 grammar.set_start(pinfo.start)

	3132 else:

	3133 grammar.set_start(start)

	3134 except GrammarError:

	3135 e = sys.exc_info()[1]

	3136 errorlog.error(str(e))

	3137 errors = 1

	3138

	3139 if errors:

	3140 raise YaccError("Unable to build parser")

	3141

	3142 # Verify the grammar structure

	3143 undefined_symbols = grammar.undefined_symbols()

	3144 for sym, prod in undefined_symbols:

	3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)

	3146 errors = 1

	3147

	3148 unused_terminals = grammar.unused_terminals()

	3149 if unused_terminals:

	3150 debuglog.info("")

	3151 debuglog.info("Unused terminals:")

	3152 debuglog.info("")

	3153 for term in unused_terminals:

	3154 errorlog.warning("Token '%s' defined, but not used", term)

	3155 debuglog.info(" %s", term)

	3156

	3157 # Print out all productions to the debug log

	3158 if debug:

	3159 debuglog.info("")

	3160 debuglog.info("Grammar")

	3161 debuglog.info("")

	3162 for n,p in enumerate(grammar.Productions):

	3163 debuglog.info("Rule %-5d %s", n, p)

	3164

	3165 # Find unused non-terminals

	3166 unused_rules = grammar.unused_rules()

	3167 for prod in unused_rules:

	3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, pr od.line, prod.name)

	3169

	3170 if len(unused_terminals) == 1:

	3171 errorlog.warning("There is 1 unused token")

	3172 if len(unused_terminals) > 1:

	3173 errorlog.warning("There are %d unused tokens", len(unused_terminals))

	3174

	3175 if len(unused_rules) == 1:

	3176 errorlog.warning("There is 1 unused rule")

	3177 if len(unused_rules) > 1:

	3178 errorlog.warning("There are %d unused rules", len(unused_rules))

	3179

	3180 if debug:

	3181 debuglog.info("")

	3182 debuglog.info("Terminals, with rules where they appear")

	3183 debuglog.info("")

	3184 terms = list(grammar.Terminals)

	3185 terms.sort()

	3186 for term in terms:

	3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar. Terminals[term]]))

	3188

	3189 debuglog.info("")

	3190 debuglog.info("Nonterminals, with rules where they appear")

	3191 debuglog.info("")

	3192 nonterms = list(grammar.Nonterminals)

	3193 nonterms.sort()

	3194 for nonterm in nonterms:

	3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in gramm ar.Nonterminals[nonterm]]))

	3196 debuglog.info("")

	3197

	3198 if check_recursion:

	3199 unreachable = grammar.find_unreachable()

	3200 for u in unreachable:

	3201 errorlog.warning("Symbol '%s' is unreachable",u)

	3202

	3203 infinite = grammar.infinite_cycles()

	3204 for inf in infinite:

	3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf)

	3206 errors = 1

	3207

	3208 unused_prec = grammar.unused_precedence()

	3209 for term, assoc in unused_prec:

	3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", a ssoc, term)

	3211 errors = 1

	3212

	3213 if errors:

	3214 raise YaccError("Unable to build parser")

	3215

	3216 # Run the LRGeneratedTable on the grammar

	3217 if debug:

	3218 errorlog.debug("Generating %s tables", method)

	3219

	3220 lr = LRGeneratedTable(grammar,method,debuglog)

	3221

	3222 if debug:

	3223 num_sr = len(lr.sr_conflicts)

	3224

	3225 # Report shift/reduce and reduce/reduce conflicts

	3226 if num_sr == 1:

	3227 errorlog.warning("1 shift/reduce conflict")

	3228 elif num_sr > 1:

	3229 errorlog.warning("%d shift/reduce conflicts", num_sr)

	3230

	3231 num_rr = len(lr.rr_conflicts)

	3232 if num_rr == 1:

	3233 errorlog.warning("1 reduce/reduce conflict")

	3234 elif num_rr > 1:

	3235 errorlog.warning("%d reduce/reduce conflicts", num_rr)

	3236

	3237 # Write out conflicts to the output file

	3238 if debug and (lr.sr_conflicts or lr.rr_conflicts):

	3239 debuglog.warning("")

	3240 debuglog.warning("Conflicts:")

	3241 debuglog.warning("")

	3242

	3243 for state, tok, resolution in lr.sr_conflicts:

	3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)

	3245

	3246 already_reported = {}

	3247 for state, rule, rejected in lr.rr_conflicts:

	3248 if (state,id(rule),id(rejected)) in already_reported:

	3249 continue

	3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

	3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state)

	3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

	3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state)

	3254 already_reported[state,id(rule),id(rejected)] = 1

	3255

	3256 warned_never = []

	3257 for state, rule, rejected in lr.rr_conflicts:

	3258 if not rejected.reduced and (rejected not in warned_never):

	3259 debuglog.warning("Rule (%s) is never reduced", rejected)

	3260 errorlog.warning("Rule (%s) is never reduced", rejected)

	3261 warned_never.append(rejected)

	3262

	3263 # Write the table file if requested

	3264 if write_tables:

	3265 lr.write_table(tabmodule,outputdir,signature)

	3266

	3267 # Write a pickled version of the tables

	3268 if picklefile:

	3269 lr.pickle_table(picklefile,signature)

	3270

	3271 # Build the parser

	3272 lr.bind_callables(pinfo.pdict)

	3273 parser = LRParser(lr,pinfo.error_func)

	3274

	3275 parse = parser.parse

	3276 return parser

OLD	NEW

« no previous file with comments | « third_party/ply/lex.py ('k') | no next file » | no next file with comments »