tools/nixysa/third_party/ply-3.1/ply/yacc.py - Issue 2043006: WTF NPAPI extension. Early draft.

Side by Side Diff: tools/nixysa/third_party/ply-3.1/ply/yacc.py

Issue 2043006: WTF NPAPI extension. Early draft. Base URL: http://src.chromium.org/svn/trunk/src/

Patch Set: Created 10 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 # -----------------------------------------------------------------------------

	2 # ply: yacc.py

	3 #

	4 # Author(s): David M. Beazley (dave@dabeaz.com)

	5 #

	6 # Copyright (C) 2001-2009, David M. Beazley

	7 #

	8 # This library is free software; you can redistribute it and/or

	9 # modify it under the terms of the GNU Lesser General Public

	10 # License as published by the Free Software Foundation; either

	11 # version 2.1 of the License, or (at your option) any later version.

	12 #

	13 # This library is distributed in the hope that it will be useful,

	14 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

	16 # Lesser General Public License for more details.

	17 #

	18 # You should have received a copy of the GNU Lesser General Public

	19 # License along with this library; if not, write to the Free Software

	20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

	21 #

	22 # See the file COPYING for a complete copy of the LGPL.

	23 #

	24 #

	25 # This implements an LR parser that is constructed from grammar rules defined

	26 # as Python functions. The grammer is specified by supplying the BNF inside

	27 # Python documentation strings. The inspiration for this technique was borrowed

	28 # from John Aycock's Spark parsing system. PLY might be viewed as cross between

	29 # Spark and the GNU bison utility.

	30 #

	31 # The current implementation is only somewhat object-oriented. The

	32 # LR parser itself is defined in terms of an object (which allows multiple

	33 # parsers to co-exist). However, most of the variables used during table

	34 # construction are defined in terms of global variables. Users shouldn't

	35 # notice unless they are trying to define multiple parsers at the same

	36 # time using threads (in which case they should have their head examined).

	37 #

	38 # This implementation supports both SLR and LALR(1) parsing. LALR(1)

	39 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),

	40 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,

	41 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced

	42 # by the more efficient DeRemer and Pennello algorithm.

	43 #

	44 # :::::::: WARNING :::::::

	45 #

	46 # Construction of LR parsing tables is fairly complicated and expensive.

	47 # To make this module run fast, a LOT of work has been put into

	48 # optimization---often at the expensive of readability and what might

	49 # consider to be good Python "coding style." Modify the code at your

	50 # own risk!

	51 # ----------------------------------------------------------------------------

	52

	53 __version__ = "3.0"

	54 __tabversion__ = "3.0" # Table version

	55

	56 #-----------------------------------------------------------------------------

	57 # === User configurable parameters ===

	58 #

	59 # Change these to modify the default behavior of yacc (if you wish)

	60 #-----------------------------------------------------------------------------

	61

	62 yaccdebug = 1 # Debugging mode. If set, yacc generates a

	63 # a 'parser.out' file in the current directory

	64

	65 debug_file = 'parser.out' # Default name of the debugging file

	66 tab_module = 'parsetab' # Default name of the table module

	67 default_lr = 'LALR' # Default LR table generation method

	68

	69 error_count = 3 # Number of symbols that must be shifted to leave recovery mode

	70

	71 yaccdevel = 0 # Set to True if developing yacc. This turns off optimized

	72 # implementations of certain functions.

	73

	74 resultlimit = 40 # Size limit of results when running in debug mod e.

	75

	76 import re, types, sys, os.path

	77

	78 # Compatibility function for python 2.6/3.0

	79 if sys.version_info[0] < 3:

	80 def func_code(f):

	81 return f.func_code

	82 else:

	83 def func_code(f):

	84 return f.__code__

	85

	86 # Compatibility

	87 try:

	88 MAXINT = sys.maxint

	89 except AttributeError:

	90 MAXINT = sys.maxsize

	91

	92 # Python 2.x/3.0 compatibility.

	93 def load_ply_lex():

	94 if sys.version_info[0] < 3:

	95 import lex

	96 else:

	97 import ply.lex as lex

	98 return lex

	99

	100 # This object is a stand-in for a logging object created by the

	101 # logging module. PLY will use this by default to create things

	102 # such as the parser.out file. If a user wants more detailed

	103 # information, they can create their own logging object and pass

	104 # it into PLY.

	105

	106 class PlyLogger(object):

	107 def __init__(self,f):

	108 self.f = f

	109 def debug(self,msg,args,*kwargs):

	110 self.f.write((msg % args) + "\n")

	111 info = debug

	112

	113 def warning(self,msg,args,*kwargs):

	114 self.f.write("WARNING: "+ (msg % args) + "\n")

	115

	116 def error(self,msg,args,*kwargs):

	117 self.f.write("ERROR: " + (msg % args) + "\n")

	118

	119 critical = debug

	120

	121 # Null logger is used when no output is generated. Does nothing.

	122 class NullLogger(object):

	123 def __getattribute__(self,name):

	124 return self

	125 def __call__(self,args,*kwargs):

	126 return self

	127

	128 # Exception raised for yacc-related errors

	129 class YaccError(Exception): pass

	130

	131 # Format the result message that the parser produces when running in debug mode.

	132 def format_result(r):

	133 repr_str = repr(r)

	134 if '\n' in repr_str: repr_str = repr(repr_str)

	135 if len(repr_str) > resultlimit:

	136 repr_str = repr_str[:resultlimit]+" ..."

	137 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)

	138 return result

	139

	140

	141 # Format stack entries when the parser is running in debug mode

	142 def format_stack_entry(r):

	143 repr_str = repr(r)

	144 if '\n' in repr_str: repr_str = repr(repr_str)

	145 if len(repr_str) < 16:

	146 return repr_str

	147 else:

	148 return "<%s @ 0x%x>" % (type(r).__name__,id(r))

	149

	150 #-----------------------------------------------------------------------------

	151 # === LR Parsing Engine ===

	152 #

	153 # The following classes are used for the LR parser itself. These are not

	154 # used during table construction and are independent of the actual LR

	155 # table generation algorithm

	156 #-----------------------------------------------------------------------------

	157

	158 # This class is used to hold non-terminal grammar symbols during parsing.

	159 # It normally has the following attributes set:

	160 # .type = Grammar symbol type

	161 # .value = Symbol value

	162 # .lineno = Starting line number

	163 # .endlineno = Ending line number (optional, set automatically)

	164 # .lexpos = Starting lex position

	165 # .endlexpos = Ending lex position (optional, set automatically)

	166

	167 class YaccSymbol:

	168 def __str__(self): return self.type

	169 def __repr__(self): return str(self)

	170

	171 # This class is a wrapper around the objects actually passed to each

	172 # grammar rule. Index lookup and assignment actually assign the

	173 # .value attribute of the underlying YaccSymbol object.

	174 # The lineno() method returns the line number of a given

	175 # item (or 0 if not defined). The linespan() method returns

	176 # a tuple of (startline,endline) representing the range of lines

	177 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)

	178 # representing the range of positional information for a symbol.

	179

	180 class YaccProduction:

	181 def __init__(self,s,stack=None):

	182 self.slice = s

	183 self.stack = stack

	184 self.lexer = None

	185 self.parser= None

	186 def __getitem__(self,n):

	187 if n >= 0: return self.slice[n].value

	188 else: return self.stack[n].value

	189

	190 def __setitem__(self,n,v):

	191 self.slice[n].value = v

	192

	193 def __getslice__(self,i,j):

	194 return [s.value for s in self.slice[i:j]]

	195

	196 def __len__(self):

	197 return len(self.slice)

	198

	199 def lineno(self,n):

	200 return getattr(self.slice[n],"lineno",0)

	201

	202 def set_lineno(self,n,lineno):

	203 self.slice[n].lineno = n

	204

	205 def linespan(self,n):

	206 startline = getattr(self.slice[n],"lineno",0)

	207 endline = getattr(self.slice[n],"endlineno",startline)

	208 return startline,endline

	209

	210 def lexpos(self,n):

	211 return getattr(self.slice[n],"lexpos",0)

	212

	213 def lexspan(self,n):

	214 startpos = getattr(self.slice[n],"lexpos",0)

	215 endpos = getattr(self.slice[n],"endlexpos",startpos)

	216 return startpos,endpos

	217

	218 def error(self):

	219 raise SyntaxError

	220

	221

	222 # -----------------------------------------------------------------------------

	223 # == LRParser ==

	224 #

	225 # The LR Parsing engine.

	226 # -----------------------------------------------------------------------------

	227

	228 class LRParser:

	229 def __init__(self,lrtab,errorf):

	230 self.productions = lrtab.lr_productions

	231 self.action = lrtab.lr_action

	232 self.goto = lrtab.lr_goto

	233 self.errorfunc = errorf

	234

	235 def errok(self):

	236 self.errorok = 1

	237

	238 def restart(self):

	239 del self.statestack[:]

	240 del self.symstack[:]

	241 sym = YaccSymbol()

	242 sym.type = '$end'

	243 self.symstack.append(sym)

	244 self.statestack.append(0)

	245

	246 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

	247 if debug or yaccdevel:

	248 if isinstance(debug,int):

	249 debug = PlyLogger(sys.stderr)

	250 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)

	251 elif tracking:

	252 return self.parseopt(input,lexer,debug,tracking,tokenfunc)

	253 else:

	254 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)

	255

	256

	257 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	258 # parsedebug().

	259 #

	260 # This is the debugging enabled version of parse(). All changes made to the

	261 # parsing engine should be made here. For the non-debugging version,

	262 # copy this code to a method parseopt() and delete all of the sections

	263 # enclosed in:

	264 #

	265 # #--! DEBUG

	266 # statements

	267 # #--! DEBUG

	268 #

	269 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	270

	271 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=No ne):

	272 lookahead = None # Current lookahead symbol

	273 lookaheadstack = [ ] # Stack of lookahead symbols

	274 actions = self.action # Local reference to action table (to a void lookup on self.)

	275 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	276 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	277 pslice = YaccProduction(None) # Production object passed to grammar r ules

	278 errorcount = 0 # Used during error recovery

	279

	280 # --! DEBUG

	281 debug.info("PLY: PARSE DEBUG START")

	282 # --! DEBUG

	283

	284 # If no lexer was given, we will try to use the lex module

	285 if not lexer:

	286 lex = load_ply_lex()

	287 lexer = lex.lexer

	288

	289 # Set up the lexer and parser objects on pslice

	290 pslice.lexer = lexer

	291 pslice.parser = self

	292

	293 # If input was supplied, pass to lexer

	294 if input is not None:

	295 lexer.input(input)

	296

	297 if tokenfunc is None:

	298 # Tokenize function

	299 get_token = lexer.token

	300 else:

	301 get_token = tokenfunc

	302

	303 # Set up the state and symbol stacks

	304

	305 statestack = [ ] # Stack of parsing states

	306 self.statestack = statestack

	307 symstack = [ ] # Stack of grammar symbols

	308 self.symstack = symstack

	309

	310 pslice.stack = symstack # Put in the production

	311 errtoken = None # Err token

	312

	313 # The start state is assumed to be (0,$end)

	314

	315 statestack.append(0)

	316 sym = YaccSymbol()

	317 sym.type = "$end"

	318 symstack.append(sym)

	319 state = 0

	320 while 1:

	321 # Get the next symbol on the input. If a lookahead symbol

	322 # is already set, we just use that. Otherwise, we'll pull

	323 # the next token off of the lookaheadstack or from the lexer

	324

	325 # --! DEBUG

	326 debug.debug('')

	327 debug.debug('State : %s', state)

	328 # --! DEBUG

	329

	330 if not lookahead:

	331 if not lookaheadstack:

	332 lookahead = get_token() # Get the next token

	333 else:

	334 lookahead = lookaheadstack.pop()

	335 if not lookahead:

	336 lookahead = YaccSymbol()

	337 lookahead.type = "$end"

	338

	339 # --! DEBUG

	340 debug.debug('Stack : %s',

	341 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]) , str(lookahead))).lstrip())

	342 # --! DEBUG

	343

	344 # Check the action table

	345 ltype = lookahead.type

	346 t = actions[state].get(ltype)

	347

	348 if t is not None:

	349 if t > 0:

	350 # shift a symbol on the stack

	351 statestack.append(t)

	352 state = t

	353

	354 # --! DEBUG

	355 debug.debug("Action : Shift and goto state %s", t)

	356 # --! DEBUG

	357

	358 symstack.append(lookahead)

	359 lookahead = None

	360

	361 # Decrease error count on successful shift

	362 if errorcount: errorcount -=1

	363 continue

	364

	365 if t < 0:

	366 # reduce a symbol on the stack, emit a production

	367 p = prod[-t]

	368 pname = p.name

	369 plen = p.len

	370

	371 # Get production function

	372 sym = YaccSymbol()

	373 sym.type = pname # Production name

	374 sym.value = None

	375

	376 # --! DEBUG

	377 if plen:

	378 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[- plen:]])+"]",-t)

	379 else:

	380 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, [],-t)

	381

	382 # --! DEBUG

	383

	384 if plen:

	385 targ = symstack[-plen-1:]

	386 targ[0] = sym

	387

	388 # --! TRACKING

	389 if tracking:

	390 t1 = targ[1]

	391 sym.lineno = t1.lineno

	392 sym.lexpos = t1.lexpos

	393 t1 = targ[-1]

	394 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

	395 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

	396

	397 # --! TRACKING

	398

	399 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	400 # The code enclosed in this section is duplicated

	401 # below as a performance optimization. Make sure

	402 # changes get made in both locations.

	403

	404 pslice.slice = targ

	405

	406 try:

	407 # Call the grammar rule with our special slice objec t

	408 del symstack[-plen:]

	409 del statestack[-plen:]

	410 p.callable(pslice)

	411 # --! DEBUG

	412 debug.info("Result : %s", format_result(pslice[0]))

	413 # --! DEBUG

	414 symstack.append(sym)

	415 state = goto[statestack[-1]][pname]

	416 statestack.append(state)

	417 except SyntaxError:

	418 # If an error was set. Enter error recovery state

	419 lookaheadstack.append(lookahead)

	420 symstack.pop()

	421 statestack.pop()

	422 state = statestack[-1]

	423 sym.type = 'error'

	424 lookahead = sym

	425 errorcount = error_count

	426 self.errorok = 0

	427 continue

	428 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	429

	430 else:

	431

	432 # --! TRACKING

	433 if tracking:

	434 sym.lineno = lexer.lineno

	435 sym.lexpos = lexer.lexpos

	436 # --! TRACKING

	437

	438 targ = [ sym ]

	439

	440 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	441 # The code enclosed in this section is duplicated

	442 # above as a performance optimization. Make sure

	443 # changes get made in both locations.

	444

	445 pslice.slice = targ

	446

	447 try:

	448 # Call the grammar rule with our special slice objec t

	449 p.callable(pslice)

	450 # --! DEBUG

	451 debug.info("Result : %s", format_result(pslice[0]))

	452 # --! DEBUG

	453 symstack.append(sym)

	454 state = goto[statestack[-1]][pname]

	455 statestack.append(state)

	456 except SyntaxError:

	457 # If an error was set. Enter error recovery state

	458 lookaheadstack.append(lookahead)

	459 symstack.pop()

	460 statestack.pop()

	461 state = statestack[-1]

	462 sym.type = 'error'

	463 lookahead = sym

	464 errorcount = error_count

	465 self.errorok = 0

	466 continue

	467 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	468

	469 if t == 0:

	470 n = symstack[-1]

	471 result = getattr(n,"value",None)

	472 # --! DEBUG

	473 debug.info("Done : Returning %s", format_result(result))

	474 debug.info("PLY: PARSE DEBUG END")

	475 # --! DEBUG

	476 return result

	477

	478 if t == None:

	479

	480 # --! DEBUG

	481 debug.error('Error : %s',

	482 ("%s . %s" % (" ".join([xx.type for xx in symstack][ 1:]), str(lookahead))).lstrip())

	483 # --! DEBUG

	484

	485 # We have some kind of parsing error here. To handle

	486 # this, we are going to push the current token onto

	487 # the tokenstack and replace it with an 'error' token.

	488 # If there are any synchronization rules, they may

	489 # catch it.

	490 #

	491 # In addition to pushing the error token, we call call

	492 # the user defined p_error() function if this is the

	493 # first syntax error. This function is only called if

	494 # errorcount == 0.

	495 if errorcount == 0 or self.errorok:

	496 errorcount = error_count

	497 self.errorok = 0

	498 errtoken = lookahead

	499 if errtoken.type == "$end":

	500 errtoken = None # End of file!

	501 if self.errorfunc:

	502 global errok,token,restart

	503 errok = self.errok # Set some special functions a vailable in error recovery

	504 token = get_token

	505 restart = self.restart

	506 if errtoken and not hasattr(errtoken,'lexer'):

	507 errtoken.lexer = lexer

	508 tok = self.errorfunc(errtoken)

	509 del errok, token, restart # Delete special functions

	510

	511 if self.errorok:

	512 # User must have done some kind of panic

	513 # mode recovery on their own. The

	514 # returned token is the next lookahead

	515 lookahead = tok

	516 errtoken = None

	517 continue

	518 else:

	519 if errtoken:

	520 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	521 else: lineno = 0

	522 if lineno:

	523 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	524 else:

	525 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	526 else:

	527 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	528 return

	529

	530 else:

	531 errorcount = error_count

	532

	533 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	534 # entire parse has been rolled back and we're completely hosed. The token is

	535 # discarded and we just keep going.

	536

	537 if len(statestack) <= 1 and lookahead.type != "$end":

	538 lookahead = None

	539 errtoken = None

	540 state = 0

	541 # Nuke the pushback stack

	542 del lookaheadstack[:]

	543 continue

	544

	545 # case 2: the statestack has a couple of entries on it, but we'r e

	546 # at the end of the file. nuke the top entry and generate an err or token

	547

	548 # Start nuking entries on the stack

	549 if lookahead.type == "$end":

	550 # Whoa. We're really hosed here. Bail out

	551 return

	552

	553 if lookahead.type != 'error':

	554 sym = symstack[-1]

	555 if sym.type == 'error':

	556 # Hmmm. Error is on top of stack, we'll just nuke input

	557 # symbol and continue

	558 lookahead = None

	559 continue

	560 t = YaccSymbol()

	561 t.type = 'error'

	562 if hasattr(lookahead,"lineno"):

	563 t.lineno = lookahead.lineno

	564 t.value = lookahead

	565 lookaheadstack.append(lookahead)

	566 lookahead = t

	567 else:

	568 symstack.pop()

	569 statestack.pop()

	570 state = statestack[-1] # Potential bug fix

	571

	572 continue

	573

	574 # Call an error function here

	575 raise RuntimeError("yacc: internal parser error!!!\n")

	576

	577 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	578 # parseopt().

	579 #

	580 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.

	581 # Edit the debug version above, then copy any modifications to the method

	582 # below while removing #--! DEBUG sections.

	583 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	584

	585

	586 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):

	587 lookahead = None # Current lookahead symbol

	588 lookaheadstack = [ ] # Stack of lookahead symbols

	589 actions = self.action # Local reference to action table (to a void lookup on self.)

	590 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	591 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	592 pslice = YaccProduction(None) # Production object passed to grammar r ules

	593 errorcount = 0 # Used during error recovery

	594

	595 # If no lexer was given, we will try to use the lex module

	596 if not lexer:

	597 lex = load_ply_lex()

	598 lexer = lex.lexer

	599

	600 # Set up the lexer and parser objects on pslice

	601 pslice.lexer = lexer

	602 pslice.parser = self

	603

	604 # If input was supplied, pass to lexer

	605 if input is not None:

	606 lexer.input(input)

	607

	608 if tokenfunc is None:

	609 # Tokenize function

	610 get_token = lexer.token

	611 else:

	612 get_token = tokenfunc

	613

	614 # Set up the state and symbol stacks

	615

	616 statestack = [ ] # Stack of parsing states

	617 self.statestack = statestack

	618 symstack = [ ] # Stack of grammar symbols

	619 self.symstack = symstack

	620

	621 pslice.stack = symstack # Put in the production

	622 errtoken = None # Err token

	623

	624 # The start state is assumed to be (0,$end)

	625

	626 statestack.append(0)

	627 sym = YaccSymbol()

	628 sym.type = '$end'

	629 symstack.append(sym)

	630 state = 0

	631 while 1:

	632 # Get the next symbol on the input. If a lookahead symbol

	633 # is already set, we just use that. Otherwise, we'll pull

	634 # the next token off of the lookaheadstack or from the lexer

	635

	636 if not lookahead:

	637 if not lookaheadstack:

	638 lookahead = get_token() # Get the next token

	639 else:

	640 lookahead = lookaheadstack.pop()

	641 if not lookahead:

	642 lookahead = YaccSymbol()

	643 lookahead.type = '$end'

	644

	645 # Check the action table

	646 ltype = lookahead.type

	647 t = actions[state].get(ltype)

	648

	649 if t is not None:

	650 if t > 0:

	651 # shift a symbol on the stack

	652 statestack.append(t)

	653 state = t

	654

	655 symstack.append(lookahead)

	656 lookahead = None

	657

	658 # Decrease error count on successful shift

	659 if errorcount: errorcount -=1

	660 continue

	661

	662 if t < 0:

	663 # reduce a symbol on the stack, emit a production

	664 p = prod[-t]

	665 pname = p.name

	666 plen = p.len

	667

	668 # Get production function

	669 sym = YaccSymbol()

	670 sym.type = pname # Production name

	671 sym.value = None

	672

	673 if plen:

	674 targ = symstack[-plen-1:]

	675 targ[0] = sym

	676

	677 # --! TRACKING

	678 if tracking:

	679 t1 = targ[1]

	680 sym.lineno = t1.lineno

	681 sym.lexpos = t1.lexpos

	682 t1 = targ[-1]

	683 sym.endlineno = getattr(t1,"endlineno",t1.lineno)

	684 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)

	685

	686 # --! TRACKING

	687

	688 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	689 # The code enclosed in this section is duplicated

	690 # below as a performance optimization. Make sure

	691 # changes get made in both locations.

	692

	693 pslice.slice = targ

	694

	695 try:

	696 # Call the grammar rule with our special slice objec t

	697 del symstack[-plen:]

	698 del statestack[-plen:]

	699 p.callable(pslice)

	700 symstack.append(sym)

	701 state = goto[statestack[-1]][pname]

	702 statestack.append(state)

	703 except SyntaxError:

	704 # If an error was set. Enter error recovery state

	705 lookaheadstack.append(lookahead)

	706 symstack.pop()

	707 statestack.pop()

	708 state = statestack[-1]

	709 sym.type = 'error'

	710 lookahead = sym

	711 errorcount = error_count

	712 self.errorok = 0

	713 continue

	714 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	715

	716 else:

	717

	718 # --! TRACKING

	719 if tracking:

	720 sym.lineno = lexer.lineno

	721 sym.lexpos = lexer.lexpos

	722 # --! TRACKING

	723

	724 targ = [ sym ]

	725

	726 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	727 # The code enclosed in this section is duplicated

	728 # above as a performance optimization. Make sure

	729 # changes get made in both locations.

	730

	731 pslice.slice = targ

	732

	733 try:

	734 # Call the grammar rule with our special slice objec t

	735 p.callable(pslice)

	736 symstack.append(sym)

	737 state = goto[statestack[-1]][pname]

	738 statestack.append(state)

	739 except SyntaxError:

	740 # If an error was set. Enter error recovery state

	741 lookaheadstack.append(lookahead)

	742 symstack.pop()

	743 statestack.pop()

	744 state = statestack[-1]

	745 sym.type = 'error'

	746 lookahead = sym

	747 errorcount = error_count

	748 self.errorok = 0

	749 continue

	750 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	751

	752 if t == 0:

	753 n = symstack[-1]

	754 return getattr(n,"value",None)

	755

	756 if t == None:

	757

	758 # We have some kind of parsing error here. To handle

	759 # this, we are going to push the current token onto

	760 # the tokenstack and replace it with an 'error' token.

	761 # If there are any synchronization rules, they may

	762 # catch it.

	763 #

	764 # In addition to pushing the error token, we call call

	765 # the user defined p_error() function if this is the

	766 # first syntax error. This function is only called if

	767 # errorcount == 0.

	768 if errorcount == 0 or self.errorok:

	769 errorcount = error_count

	770 self.errorok = 0

	771 errtoken = lookahead

	772 if errtoken.type == '$end':

	773 errtoken = None # End of file!

	774 if self.errorfunc:

	775 global errok,token,restart

	776 errok = self.errok # Set some special functions a vailable in error recovery

	777 token = get_token

	778 restart = self.restart

	779 if errtoken and not hasattr(errtoken,'lexer'):

	780 errtoken.lexer = lexer

	781 tok = self.errorfunc(errtoken)

	782 del errok, token, restart # Delete special functions

	783

	784 if self.errorok:

	785 # User must have done some kind of panic

	786 # mode recovery on their own. The

	787 # returned token is the next lookahead

	788 lookahead = tok

	789 errtoken = None

	790 continue

	791 else:

	792 if errtoken:

	793 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	794 else: lineno = 0

	795 if lineno:

	796 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	797 else:

	798 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	799 else:

	800 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	801 return

	802

	803 else:

	804 errorcount = error_count

	805

	806 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	807 # entire parse has been rolled back and we're completely hosed. The token is

	808 # discarded and we just keep going.

	809

	810 if len(statestack) <= 1 and lookahead.type != '$end':

	811 lookahead = None

	812 errtoken = None

	813 state = 0

	814 # Nuke the pushback stack

	815 del lookaheadstack[:]

	816 continue

	817

	818 # case 2: the statestack has a couple of entries on it, but we'r e

	819 # at the end of the file. nuke the top entry and generate an err or token

	820

	821 # Start nuking entries on the stack

	822 if lookahead.type == '$end':

	823 # Whoa. We're really hosed here. Bail out

	824 return

	825

	826 if lookahead.type != 'error':

	827 sym = symstack[-1]

	828 if sym.type == 'error':

	829 # Hmmm. Error is on top of stack, we'll just nuke input

	830 # symbol and continue

	831 lookahead = None

	832 continue

	833 t = YaccSymbol()

	834 t.type = 'error'

	835 if hasattr(lookahead,"lineno"):

	836 t.lineno = lookahead.lineno

	837 t.value = lookahead

	838 lookaheadstack.append(lookahead)

	839 lookahead = t

	840 else:

	841 symstack.pop()

	842 statestack.pop()

	843 state = statestack[-1] # Potential bug fix

	844

	845 continue

	846

	847 # Call an error function here

	848 raise RuntimeError("yacc: internal parser error!!!\n")

	849

	850 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	851 # parseopt_notrack().

	852 #

	853 # Optimized version of parseopt() with line number tracking removed.

	854 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove

	855 # code in the #--! TRACKING sections

	856 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!

	857

	858 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc =None):

	859 lookahead = None # Current lookahead symbol

	860 lookaheadstack = [ ] # Stack of lookahead symbols

	861 actions = self.action # Local reference to action table (to a void lookup on self.)

	862 goto = self.goto # Local reference to goto table (to avo id lookup on self.)

	863 prod = self.productions # Local reference to production list (t o avoid lookup on self.)

	864 pslice = YaccProduction(None) # Production object passed to grammar r ules

	865 errorcount = 0 # Used during error recovery

	866

	867 # If no lexer was given, we will try to use the lex module

	868 if not lexer:

	869 lex = load_ply_lex()

	870 lexer = lex.lexer

	871

	872 # Set up the lexer and parser objects on pslice

	873 pslice.lexer = lexer

	874 pslice.parser = self

	875

	876 # If input was supplied, pass to lexer

	877 if input is not None:

	878 lexer.input(input)

	879

	880 if tokenfunc is None:

	881 # Tokenize function

	882 get_token = lexer.token

	883 else:

	884 get_token = tokenfunc

	885

	886 # Set up the state and symbol stacks

	887

	888 statestack = [ ] # Stack of parsing states

	889 self.statestack = statestack

	890 symstack = [ ] # Stack of grammar symbols

	891 self.symstack = symstack

	892

	893 pslice.stack = symstack # Put in the production

	894 errtoken = None # Err token

	895

	896 # The start state is assumed to be (0,$end)

	897

	898 statestack.append(0)

	899 sym = YaccSymbol()

	900 sym.type = '$end'

	901 symstack.append(sym)

	902 state = 0

	903 while 1:

	904 # Get the next symbol on the input. If a lookahead symbol

	905 # is already set, we just use that. Otherwise, we'll pull

	906 # the next token off of the lookaheadstack or from the lexer

	907

	908 if not lookahead:

	909 if not lookaheadstack:

	910 lookahead = get_token() # Get the next token

	911 else:

	912 lookahead = lookaheadstack.pop()

	913 if not lookahead:

	914 lookahead = YaccSymbol()

	915 lookahead.type = '$end'

	916

	917 # Check the action table

	918 ltype = lookahead.type

	919 t = actions[state].get(ltype)

	920

	921 if t is not None:

	922 if t > 0:

	923 # shift a symbol on the stack

	924 statestack.append(t)

	925 state = t

	926

	927 symstack.append(lookahead)

	928 lookahead = None

	929

	930 # Decrease error count on successful shift

	931 if errorcount: errorcount -=1

	932 continue

	933

	934 if t < 0:

	935 # reduce a symbol on the stack, emit a production

	936 p = prod[-t]

	937 pname = p.name

	938 plen = p.len

	939

	940 # Get production function

	941 sym = YaccSymbol()

	942 sym.type = pname # Production name

	943 sym.value = None

	944

	945 if plen:

	946 targ = symstack[-plen-1:]

	947 targ[0] = sym

	948

	949 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	950 # The code enclosed in this section is duplicated

	951 # below as a performance optimization. Make sure

	952 # changes get made in both locations.

	953

	954 pslice.slice = targ

	955

	956 try:

	957 # Call the grammar rule with our special slice objec t

	958 del symstack[-plen:]

	959 del statestack[-plen:]

	960 p.callable(pslice)

	961 symstack.append(sym)

	962 state = goto[statestack[-1]][pname]

	963 statestack.append(state)

	964 except SyntaxError:

	965 # If an error was set. Enter error recovery state

	966 lookaheadstack.append(lookahead)

	967 symstack.pop()

	968 statestack.pop()

	969 state = statestack[-1]

	970 sym.type = 'error'

	971 lookahead = sym

	972 errorcount = error_count

	973 self.errorok = 0

	974 continue

	975 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	976

	977 else:

	978

	979 targ = [ sym ]

	980

	981 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	982 # The code enclosed in this section is duplicated

	983 # above as a performance optimization. Make sure

	984 # changes get made in both locations.

	985

	986 pslice.slice = targ

	987

	988 try:

	989 # Call the grammar rule with our special slice objec t

	990 p.callable(pslice)

	991 symstack.append(sym)

	992 state = goto[statestack[-1]][pname]

	993 statestack.append(state)

	994 except SyntaxError:

	995 # If an error was set. Enter error recovery state

	996 lookaheadstack.append(lookahead)

	997 symstack.pop()

	998 statestack.pop()

	999 state = statestack[-1]

	1000 sym.type = 'error'

	1001 lookahead = sym

	1002 errorcount = error_count

	1003 self.errorok = 0

	1004 continue

	1005 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	1006

	1007 if t == 0:

	1008 n = symstack[-1]

	1009 return getattr(n,"value",None)

	1010

	1011 if t == None:

	1012

	1013 # We have some kind of parsing error here. To handle

	1014 # this, we are going to push the current token onto

	1015 # the tokenstack and replace it with an 'error' token.

	1016 # If there are any synchronization rules, they may

	1017 # catch it.

	1018 #

	1019 # In addition to pushing the error token, we call call

	1020 # the user defined p_error() function if this is the

	1021 # first syntax error. This function is only called if

	1022 # errorcount == 0.

	1023 if errorcount == 0 or self.errorok:

	1024 errorcount = error_count

	1025 self.errorok = 0

	1026 errtoken = lookahead

	1027 if errtoken.type == '$end':

	1028 errtoken = None # End of file!

	1029 if self.errorfunc:

	1030 global errok,token,restart

	1031 errok = self.errok # Set some special functions a vailable in error recovery

	1032 token = get_token

	1033 restart = self.restart

	1034 if errtoken and not hasattr(errtoken,'lexer'):

	1035 errtoken.lexer = lexer

	1036 tok = self.errorfunc(errtoken)

	1037 del errok, token, restart # Delete special functions

	1038

	1039 if self.errorok:

	1040 # User must have done some kind of panic

	1041 # mode recovery on their own. The

	1042 # returned token is the next lookahead

	1043 lookahead = tok

	1044 errtoken = None

	1045 continue

	1046 else:

	1047 if errtoken:

	1048 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno

	1049 else: lineno = 0

	1050 if lineno:

	1051 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))

	1052 else:

	1053 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)

	1054 else:

	1055 sys.stderr.write("yacc: Parse error in input. EOF\n" )

	1056 return

	1057

	1058 else:

	1059 errorcount = error_count

	1060

	1061 # case 1: the statestack only has 1 entry on it. If we're in t his state, the

	1062 # entire parse has been rolled back and we're completely hosed. The token is

	1063 # discarded and we just keep going.

	1064

	1065 if len(statestack) <= 1 and lookahead.type != '$end':

	1066 lookahead = None

	1067 errtoken = None

	1068 state = 0

	1069 # Nuke the pushback stack

	1070 del lookaheadstack[:]

	1071 continue

	1072

	1073 # case 2: the statestack has a couple of entries on it, but we'r e

	1074 # at the end of the file. nuke the top entry and generate an err or token

	1075

	1076 # Start nuking entries on the stack

	1077 if lookahead.type == '$end':

	1078 # Whoa. We're really hosed here. Bail out

	1079 return

	1080

	1081 if lookahead.type != 'error':

	1082 sym = symstack[-1]

	1083 if sym.type == 'error':

	1084 # Hmmm. Error is on top of stack, we'll just nuke input

	1085 # symbol and continue

	1086 lookahead = None

	1087 continue

	1088 t = YaccSymbol()

	1089 t.type = 'error'

	1090 if hasattr(lookahead,"lineno"):

	1091 t.lineno = lookahead.lineno

	1092 t.value = lookahead

	1093 lookaheadstack.append(lookahead)

	1094 lookahead = t

	1095 else:

	1096 symstack.pop()

	1097 statestack.pop()

	1098 state = statestack[-1] # Potential bug fix

	1099

	1100 continue

	1101

	1102 # Call an error function here

	1103 raise RuntimeError("yacc: internal parser error!!!\n")

	1104

	1105 # -----------------------------------------------------------------------------

	1106 # === Grammar Representation ===

	1107 #

	1108 # The following functions, classes, and variables are used to represent and

	1109 # manipulate the rules that make up a grammar.

	1110 # -----------------------------------------------------------------------------

	1111

	1112 import re

	1113

	1114 # regex matching identifiers

	1115 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')

	1116

	1117 # -----------------------------------------------------------------------------

	1118 # class Production:

	1119 #

	1120 # This class stores the raw information about a single production or grammar rul e.

	1121 # A grammar rule refers to a specification such as this:

	1122 #

	1123 # expr : expr PLUS term

	1124 #

	1125 # Here are the basic attributes defined on all productions

	1126 #

	1127 # name - Name of the production. For example 'expr'

	1128 # prod - A list of symbols on the right side ['expr','PLUS','term']

	1129 # prec - Production precedence level

	1130 # number - Production number.

	1131 # func - Function that executes on reduce

	1132 # file - File where production function is defined

	1133 # lineno - Line number where production function is defined

	1134 #

	1135 # The following attributes are defined or optional.

	1136 #

	1137 # len - Length of the production (number of symbols on right hand si de)

	1138 # usyms - Set of unique symbols found in the production

	1139 # -----------------------------------------------------------------------------

	1140

	1141 class Production(object):

	1142 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='', line=0):

	1143 self.name = name

	1144 self.prod = tuple(prod)

	1145 self.number = number

	1146 self.func = func

	1147 self.callable = None

	1148 self.file = file

	1149 self.line = line

	1150 self.prec = precedence

	1151

	1152 # Internal settings used during table construction

	1153

	1154 self.len = len(self.prod) # Length of the production

	1155

	1156 # Create a list of unique production symbols used in the production

	1157 self.usyms = [ ]

	1158 for s in self.prod:

	1159 if s not in self.usyms:

	1160 self.usyms.append(s)

	1161

	1162 # List of all LR items for the production

	1163 self.lr_items = []

	1164 self.lr_next = None

	1165

	1166 # Create a string representation

	1167 if self.prod:

	1168 self.str = "%s -> %s" % (self.name," ".join(self.prod))

	1169 else:

	1170 self.str = "%s -> <empty>" % self.name

	1171

	1172 def __str__(self):

	1173 return self.str

	1174

	1175 def __repr__(self):

	1176 return "Production("+str(self)+")"

	1177

	1178 def __len__(self):

	1179 return len(self.prod)

	1180

	1181 def __nonzero__(self):

	1182 return 1

	1183

	1184 def __getitem__(self,index):

	1185 return self.prod[index]

	1186

	1187 # Return the nth lr_item from the production (or None if at the end)

	1188 def lr_item(self,n):

	1189 if n > len(self.prod): return None

	1190 p = LRItem(self,n)

	1191

	1192 # Precompute the list of productions immediately following. Hack. Remov e later

	1193 try:

	1194 p.lr_after = Prodnames[p.prod[n+1]]

	1195 except (IndexError,KeyError):

	1196 p.lr_after = []

	1197 try:

	1198 p.lr_before = p.prod[n-1]

	1199 except IndexError:

	1200 p.lr_before = None

	1201

	1202 return p

	1203

	1204 # Bind the production function name to a callable

	1205 def bind(self,pdict):

	1206 if self.func:

	1207 self.callable = pdict[self.func]

	1208

	1209 # This class serves as a minimal standin for Production objects when

	1210 # reading table data from files. It only contains information

	1211 # actually used by the LR parsing engine, plus some additional

	1212 # debugging information.

	1213 class MiniProduction(object):

	1214 def __init__(self,str,name,len,func,file,line):

	1215 self.name = name

	1216 self.len = len

	1217 self.func = func

	1218 self.callable = None

	1219 self.file = file

	1220 self.line = line

	1221 self.str = str

	1222 def __str__(self):

	1223 return self.str

	1224 def __repr__(self):

	1225 return "MiniProduction(%s)" % self.str

	1226

	1227 # Bind the production function name to a callable

	1228 def bind(self,pdict):

	1229 if self.func:

	1230 self.callable = pdict[self.func]

	1231

	1232

	1233 # -----------------------------------------------------------------------------

	1234 # class LRItem

	1235 #

	1236 # This class represents a specific stage of parsing a production rule. For

	1237 # example:

	1238 #

	1239 # expr : expr . PLUS term

	1240 #

	1241 # In the above, the "." represents the current location of the parse. Here

	1242 # basic attributes:

	1243 #

	1244 # name - Name of the production. For example 'expr'

	1245 # prod - A list of symbols on the right side ['expr','.', 'PLUS','te rm']

	1246 # number - Production number.

	1247 #

	1248 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term '

	1249 # then lr_next refers to 'expr -> expr PLUS . term'

	1250 # lr_index - LR item index (location of the ".") in the prod list.

	1251 # lookaheads - LALR lookahead symbols for this item

	1252 # len - Length of the production (number of symbols on right hand s ide)

	1253 # lr_after - List of all productions that immediately follow

	1254 # lr_before - Grammar symbol immediately before

	1255 # -----------------------------------------------------------------------------

	1256

	1257 class LRItem(object):

	1258 def __init__(self,p,n):

	1259 self.name = p.name

	1260 self.prod = list(p.prod)

	1261 self.number = p.number

	1262 self.lr_index = n

	1263 self.lookaheads = { }

	1264 self.prod.insert(n,".")

	1265 self.prod = tuple(self.prod)

	1266 self.len = len(self.prod)

	1267 self.usyms = p.usyms

	1268

	1269 def __str__(self):

	1270 if self.prod:

	1271 s = "%s -> %s" % (self.name," ".join(self.prod))

	1272 else:

	1273 s = "%s -> <empty>" % self.name

	1274 return s

	1275

	1276 def __repr__(self):

	1277 return "LRItem("+str(self)+")"

	1278

	1279 def __len__(self):

	1280 return len(self.prod)

	1281

	1282 def __getitem__(self,index):

	1283 return self.prod[index]

	1284

	1285 # -----------------------------------------------------------------------------

	1286 # rightmost_terminal()

	1287 #

	1288 # Return the rightmost terminal from a list of symbols. Used in add_production( )

	1289 # -----------------------------------------------------------------------------

	1290 def rightmost_terminal(symbols, terminals):

	1291 i = len(symbols) - 1

	1292 while i >= 0:

	1293 if symbols[i] in terminals:

	1294 return symbols[i]

	1295 i -= 1

	1296 return None

	1297

	1298 # -----------------------------------------------------------------------------

	1299 # === GRAMMAR CLASS ===

	1300 #

	1301 # The following class represents the contents of the specified grammar along

	1302 # with various computed properties such as first sets, follow sets, LR items, et c.

	1303 # This data is used for critical parts of the table generation process later.

	1304 # -----------------------------------------------------------------------------

	1305

	1306 class GrammarError(YaccError): pass

	1307

	1308 class Grammar(object):

	1309 def __init__(self,terminals):

	1310 self.Productions = [None] # A list of all of the productions. The fir st

	1311 # entry is always reserved for the purpose o f

	1312 # building an augmented grammar

	1313

	1314 self.Prodnames = { } # A dictionary mapping the names of nontermi nals to a list of all

	1315 # productions of that nonterminal.

	1316

	1317 self.Prodmap = { } # A dictionary that is only used to detect d uplicate

	1318 # productions.

	1319

	1320 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a

	1321 # list of the rules where they are used.

	1322

	1323 for term in terminals:

	1324 self.Terminals[term] = []

	1325

	1326 self.Terminals['error'] = []

	1327

	1328 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list

	1329 # of rule numbers where they are used.

	1330

	1331 self.First = { } # A dictionary of precomputed FIRST(x) symbo ls

	1332

	1333 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symb ols

	1334

	1335 self.Precedence = { } # Precedence rules for each terminal. Contai ns tuples of the

	1336 # form ('right',level) or ('nonassoc', level ) or ('left',level)

	1337

	1338 self.UsedPrecedence = { } # Precedence rules that were actually used b y the grammer.

	1339 # This is only used to provide error checkin g and to generate

	1340 # a warning about unused precedence rules.

	1341

	1342 self.Start = None # Starting symbol for the grammar

	1343

	1344

	1345 def __len__(self):

	1346 return len(self.Productions)

	1347

	1348 def __getitem__(self,index):

	1349 return self.Productions[index]

	1350

	1351 # -------------------------------------------------------------------------- ---

	1352 # set_precedence()

	1353 #

	1354 # Sets the precedence for a given terminal. assoc is the associativity such as

	1355 # 'left','right', or 'nonassoc'. level is a numeric level.

	1356 #

	1357 # -------------------------------------------------------------------------- ---

	1358

	1359 def set_precedence(self,term,assoc,level):

	1360 assert self.Productions == [None],"Must call set_precedence() before add _production()"

	1361 if term in self.Precedence:

	1362 raise GrammarError("Precedence already specified for terminal '%s'" % term)

	1363 if assoc not in ['left','right','nonassoc']:

	1364 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")

	1365 self.Precedence[term] = (assoc,level)

	1366

	1367 # -------------------------------------------------------------------------- ---

	1368 # add_production()

	1369 #

	1370 # Given an action function, this function assembles a production rule and

	1371 # computes its precedence level.

	1372 #

	1373 # The production rule is supplied as a list of symbols. For example,

	1374 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and

	1375 # symbols ['expr','PLUS','term'].

	1376 #

	1377 # Precedence is determined by the precedence of the right-most non-terminal

	1378 # or the precedence of a terminal specified by %prec.

	1379 #

	1380 # A variety of error checks are performed to make sure production symbols

	1381 # are valid and that %prec is used correctly.

	1382 # -------------------------------------------------------------------------- ---

	1383

	1384 def add_production(self,prodname,syms,func=None,file='',line=0):

	1385

	1386 if prodname in self.Terminals:

	1387 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined a s a token" % (file,line,prodname))

	1388 if prodname == 'error':

	1389 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserv ed word" % (file,line,prodname))

	1390 if not _is_identifier.match(prodname):

	1391 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prod name))

	1392

	1393 # Look for literal tokens

	1394 for n,s in enumerate(syms):

	1395 if s[0] in "'\"":

	1396 try:

	1397 c = eval(s)

	1398 if (len(c) > 1):

	1399 raise GrammarError("%s:%d: Literal token %s in rule '% s' may only be a single character" % (file,line,s, prodname))

	1400 if not c in self.Terminals:

	1401 self.Terminals[c] = []

	1402 syms[n] = c

	1403 continue

	1404 except SyntaxError:

	1405 pass

	1406 if not _is_identifier.match(s) and s != '%prec':

	1407 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (fi le,line,s, prodname))

	1408

	1409 # Determine the precedence level

	1410 if '%prec' in syms:

	1411 if syms[-1] == '%prec':

	1412 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))

	1413 if syms[-2] != '%prec':

	1414 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))

	1415 precname = syms[-1]

	1416 prodprec = self.Precedence.get(precname,None)

	1417 if not prodprec:

	1418 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))

	1419 else:

	1420 self.UsedPrecedence[precname] = 1

	1421 del syms[-2:] # Drop %prec from the rule

	1422 else:

	1423 # If no %prec, precedence is determined by the rightmost terminal sy mbol

	1424 precname = rightmost_terminal(syms,self.Terminals)

	1425 prodprec = self.Precedence.get(precname,('right',0))

	1426

	1427 # See if the rule is already in the rulemap

	1428 map = "%s -> %s" % (prodname,syms)

	1429 if map in self.Prodmap:

	1430 m = self.Prodmap[map]

	1431 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +

	1432 "Previous definition at %s:%d" % (file,line, m.fi le, m.line))

	1433

	1434 # From this point on, everything is valid. Create a new Production inst ance

	1435 pnumber = len(self.Productions)

	1436 if not prodname in self.Nonterminals:

	1437 self.Nonterminals[prodname] = [ ]

	1438

	1439 # Add the production number to Terminals and Nonterminals

	1440 for t in syms:

	1441 if t in self.Terminals:

	1442 self.Terminals[t].append(pnumber)

	1443 else:

	1444 if not t in self.Nonterminals:

	1445 self.Nonterminals[t] = [ ]

	1446 self.Nonterminals[t].append(pnumber)

	1447

	1448 # Create a production and add it to the list of productions

	1449 p = Production(pnumber,prodname,syms,prodprec,func,file,line)

	1450 self.Productions.append(p)

	1451 self.Prodmap[map] = p

	1452

	1453 # Add to the global productions list

	1454 try:

	1455 self.Prodnames[prodname].append(p)

	1456 except KeyError:

	1457 self.Prodnames[prodname] = [ p ]

	1458 return 0

	1459

	1460 # -------------------------------------------------------------------------- ---

	1461 # set_start()

	1462 #

	1463 # Sets the starting symbol and creates the augmented grammar. Production

	1464 # rule 0 is S' -> start where start is the start symbol.

	1465 # -------------------------------------------------------------------------- ---

	1466

	1467 def set_start(self,start=None):

	1468 if not start:

	1469 start = self.Productions[1].name

	1470 if start not in self.Nonterminals:

	1471 raise GrammarError("start symbol %s undefined" % start)

	1472 self.Productions[0] = Production(0,"S'",[start])

	1473 self.Nonterminals[start].append(0)

	1474 self.Start = start

	1475

	1476 # -------------------------------------------------------------------------- ---

	1477 # find_unreachable()

	1478 #

	1479 # Find all of the nonterminal symbols that can't be reached from the startin g

	1480 # symbol. Returns a list of nonterminals that can't be reached.

	1481 # -------------------------------------------------------------------------- ---

	1482

	1483 def find_unreachable(self):

	1484

	1485 # Mark all symbols that are reachable from a symbol s

	1486 def mark_reachable_from(s):

	1487 if reachable[s]:

	1488 # We've already reached symbol s.

	1489 return

	1490 reachable[s] = 1

	1491 for p in self.Prodnames.get(s,[]):

	1492 for r in p.prod:

	1493 mark_reachable_from(r)

	1494

	1495 reachable = { }

	1496 for s in list(self.Terminals) + list(self.Nonterminals):

	1497 reachable[s] = 0

	1498

	1499 mark_reachable_from( self.Productions[0].prod[0] )

	1500

	1501 return [s for s in list(self.Nonterminals)

	1502 if not reachable[s]]

	1503

	1504 # -------------------------------------------------------------------------- ---

	1505 # infinite_cycles()

	1506 #

	1507 # This function looks at the various parsing rules and tries to detect

	1508 # infinite recursion cycles (grammar rules where there is no possible way

	1509 # to derive a string of only terminals).

	1510 # -------------------------------------------------------------------------- ---

	1511

	1512 def infinite_cycles(self):

	1513 terminates = {}

	1514

	1515 # Terminals:

	1516 for t in self.Terminals:

	1517 terminates[t] = 1

	1518

	1519 terminates['$end'] = 1

	1520

	1521 # Nonterminals:

	1522

	1523 # Initialize to false:

	1524 for n in self.Nonterminals:

	1525 terminates[n] = 0

	1526

	1527 # Then propagate termination until no change:

	1528 while 1:

	1529 some_change = 0

	1530 for (n,pl) in self.Prodnames.items():

	1531 # Nonterminal n terminates iff any of its productions terminates .

	1532 for p in pl:

	1533 # Production p terminates iff all of its rhs symbols termina te.

	1534 for s in p.prod:

	1535 if not terminates[s]:

	1536 # The symbol s does not terminate,

	1537 # so production p does not terminate.

	1538 p_terminates = 0

	1539 break

	1540 else:

	1541 # didn't break from the loop,

	1542 # so every symbol s terminates

	1543 # so production p terminates.

	1544 p_terminates = 1

	1545

	1546 if p_terminates:

	1547 # symbol n terminates!

	1548 if not terminates[n]:

	1549 terminates[n] = 1

	1550 some_change = 1

	1551 # Don't need to consider any more productions for this n .

	1552 break

	1553

	1554 if not some_change:

	1555 break

	1556

	1557 infinite = []

	1558 for (s,term) in terminates.items():

	1559 if not term:

	1560 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

	1561 # s is used-but-not-defined, and we've already warned of tha t,

	1562 # so it would be overkill to say that it's also non-terminat ing.

	1563 pass

	1564 else:

	1565 infinite.append(s)

	1566

	1567 return infinite

	1568

	1569

	1570 # -------------------------------------------------------------------------- ---

	1571 # undefined_symbols()

	1572 #

	1573 # Find all symbols that were used the grammar, but not defined as tokens or

	1574 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symb ol

	1575 # and prod is the production where the symbol was used.

	1576 # -------------------------------------------------------------------------- ---

	1577 def undefined_symbols(self):

	1578 result = []

	1579 for p in self.Productions:

	1580 if not p: continue

	1581

	1582 for s in p.prod:

	1583 if not s in self.Prodnames and not s in self.Terminals and s != 'error':

	1584 result.append((s,p))

	1585 return result

	1586

	1587 # -------------------------------------------------------------------------- ---

	1588 # unused_terminals()

	1589 #

	1590 # Find all terminals that were defined, but not used by the grammar. Return s

	1591 # a list of all symbols.

	1592 # -------------------------------------------------------------------------- ---

	1593 def unused_terminals(self):

	1594 unused_tok = []

	1595 for s,v in self.Terminals.items():

	1596 if s != 'error' and not v:

	1597 unused_tok.append(s)

	1598

	1599 return unused_tok

	1600

	1601 # -------------------------------------------------------------------------- ----

	1602 # unused_rules()

	1603 #

	1604 # Find all grammar rules that were defined, but not used (maybe not reachab le)

	1605 # Returns a list of productions.

	1606 # -------------------------------------------------------------------------- ----

	1607

	1608 def unused_rules(self):

	1609 unused_prod = []

	1610 for s,v in self.Nonterminals.items():

	1611 if not v:

	1612 p = self.Prodnames[s][0]

	1613 unused_prod.append(p)

	1614 return unused_prod

	1615

	1616 # -------------------------------------------------------------------------- ---

	1617 # unused_precedence()

	1618 #

	1619 # Returns a list of tuples (term,precedence) corresponding to precedence

	1620 # rules that were never used by the grammar. term is the name of the termin al

	1621 # on which precedence was applied and precedence is a string such as 'left' or

	1622 # 'right' corresponding to the type of precedence.

	1623 # -------------------------------------------------------------------------- ---

	1624

	1625 def unused_precedence(self):

	1626 unused = []

	1627 for termname in self.Precedence:

	1628 if not (termname in self.Terminals or termname in self.UsedPrecedenc e):

	1629 unused.append((termname,self.Precedence[termname][0]))

	1630

	1631 return unused

	1632

	1633 # -------------------------------------------------------------------------

	1634 # _first()

	1635 #

	1636 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.

	1637 #

	1638 # During execution of compute_first1, the result may be incomplete.

	1639 # Afterward (e.g., when called from compute_follow()), it will be complete.

	1640 # -------------------------------------------------------------------------

	1641 def _first(self,beta):

	1642

	1643 # We are computing First(x1,x2,x3,...,xn)

	1644 result = [ ]

	1645 for x in beta:

	1646 x_produces_empty = 0

	1647

	1648 # Add all the non-<empty> symbols of First[x] to the result.

	1649 for f in self.First[x]:

	1650 if f == '<empty>':

	1651 x_produces_empty = 1

	1652 else:

	1653 if f not in result: result.append(f)

	1654

	1655 if x_produces_empty:

	1656 # We have to consider the next x in beta,

	1657 # i.e. stay in the loop.

	1658 pass

	1659 else:

	1660 # We don't have to consider any further symbols in beta.

	1661 break

	1662 else:

	1663 # There was no 'break' from the loop,

	1664 # so x_produces_empty was true for all x in beta,

	1665 # so beta produces empty as well.

	1666 result.append('<empty>')

	1667

	1668 return result

	1669

	1670 # -------------------------------------------------------------------------

	1671 # compute_first()

	1672 #

	1673 # Compute the value of FIRST1(X) for all symbols

	1674 # -------------------------------------------------------------------------

	1675 def compute_first(self):

	1676 if self.First:

	1677 return self.First

	1678

	1679 # Terminals:

	1680 for t in self.Terminals:

	1681 self.First[t] = [t]

	1682

	1683 self.First['$end'] = ['$end']

	1684

	1685 # Nonterminals:

	1686

	1687 # Initialize to the empty set:

	1688 for n in self.Nonterminals:

	1689 self.First[n] = []

	1690

	1691 # Then propagate symbols until no change:

	1692 while 1:

	1693 some_change = 0

	1694 for n in self.Nonterminals:

	1695 for p in self.Prodnames[n]:

	1696 for f in self._first(p.prod):

	1697 if f not in self.First[n]:

	1698 self.First[n].append( f )

	1699 some_change = 1

	1700 if not some_change:

	1701 break

	1702

	1703 return self.First

	1704

	1705 # ---------------------------------------------------------------------

	1706 # compute_follow()

	1707 #

	1708 # Computes all of the follow sets for every non-terminal symbol. The

	1709 # follow set is the set of all symbols that might follow a given

	1710 # non-terminal. See the Dragon book, 2nd Ed. p. 189.

	1711 # ---------------------------------------------------------------------

	1712 def compute_follow(self,start=None):

	1713 # If already computed, return the result

	1714 if self.Follow:

	1715 return self.Follow

	1716

	1717 # If first sets not computed yet, do that first.

	1718 if not self.First:

	1719 self.compute_first()

	1720

	1721 # Add '$end' to the follow list of the start symbol

	1722 for k in self.Nonterminals:

	1723 self.Follow[k] = [ ]

	1724

	1725 if not start:

	1726 start = self.Productions[1].name

	1727

	1728 self.Follow[start] = [ '$end' ]

	1729

	1730 while 1:

	1731 didadd = 0

	1732 for p in self.Productions[1:]:

	1733 # Here is the production set

	1734 for i in range(len(p.prod)):

	1735 B = p.prod[i]

	1736 if B in self.Nonterminals:

	1737 # Okay. We got a non-terminal in a production

	1738 fst = self._first(p.prod[i+1:])

	1739 hasempty = 0

	1740 for f in fst:

	1741 if f != '<empty>' and f not in self.Follow[B]:

	1742 self.Follow[B].append(f)

	1743 didadd = 1

	1744 if f == '<empty>':

	1745 hasempty = 1

	1746 if hasempty or i == (len(p.prod)-1):

	1747 # Add elements of follow(a) to follow(b)

	1748 for f in self.Follow[p.name]:

	1749 if f not in self.Follow[B]:

	1750 self.Follow[B].append(f)

	1751 didadd = 1

	1752 if not didadd: break

	1753 return self.Follow

	1754

	1755

	1756 # -------------------------------------------------------------------------- ---

	1757 # build_lritems()

	1758 #

	1759 # This function walks the list of productions and builds a complete set of t he

	1760 # LR items. The LR items are stored in two ways: First, they are uniquely

	1761 # numbered and placed in the list _lritems. Second, a linked list of LR ite ms

	1762 # is built for each production. For example:

	1763 #

	1764 # E -> E PLUS E

	1765 #

	1766 # Creates the list

	1767 #

	1768 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]

	1769 # -------------------------------------------------------------------------- ---

	1770

	1771 def build_lritems(self):

	1772 for p in self.Productions:

	1773 lastlri = p

	1774 i = 0

	1775 lr_items = []

	1776 while 1:

	1777 if i > len(p):

	1778 lri = None

	1779 else:

	1780 lri = LRItem(p,i)

	1781 # Precompute the list of productions immediately following

	1782 try:

	1783 lri.lr_after = self.Prodnames[lri.prod[i+1]]

	1784 except (IndexError,KeyError):

	1785 lri.lr_after = []

	1786 try:

	1787 lri.lr_before = lri.prod[i-1]

	1788 except IndexError:

	1789 lri.lr_before = None

	1790

	1791 lastlri.lr_next = lri

	1792 if not lri: break

	1793 lr_items.append(lri)

	1794 lastlri = lri

	1795 i += 1

	1796 p.lr_items = lr_items

	1797

	1798 # -----------------------------------------------------------------------------

	1799 # == Class LRTable ==

	1800 #

	1801 # This basic class represents a basic table of LR parsing information.

	1802 # Methods for generating the tables are not defined here. They are defined

	1803 # in the derived class LRGeneratedTable.

	1804 # -----------------------------------------------------------------------------

	1805

	1806 class VersionError(YaccError): pass

	1807

	1808 class LRTable(object):

	1809 def __init__(self):

	1810 self.lr_action = None

	1811 self.lr_goto = None

	1812 self.lr_productions = None

	1813 self.lr_method = None

	1814

	1815 def read_table(self,module):

	1816 if isinstance(module,types.ModuleType):

	1817 parsetab = module

	1818 else:

	1819 if sys.version_info[0] < 3:

	1820 exec("import %s as parsetab" % module)

	1821 else:

	1822 env = { }

	1823 exec("import %s as parsetab" % module, env, env)

	1824 parsetab = env['parsetab']

	1825

	1826 if parsetab._tabversion != __tabversion__:

	1827 raise VersionError("yacc table file version is out of date")

	1828

	1829 self.lr_action = parsetab._lr_action

	1830 self.lr_goto = parsetab._lr_goto

	1831

	1832 self.lr_productions = []

	1833 for p in parsetab._lr_productions:

	1834 self.lr_productions.append(MiniProduction(*p))

	1835

	1836 self.lr_method = parsetab._lr_method

	1837 return parsetab._lr_signature

	1838

	1839 # Bind all production function names to callable objects in pdict

	1840 def bind_callables(self,pdict):

	1841 for p in self.lr_productions:

	1842 p.bind(pdict)

	1843

	1844 # -----------------------------------------------------------------------------

	1845 # === LR Generator ===

	1846 #

	1847 # The following classes and functions are used to generate LR parsing tables on

	1848 # a grammar.

	1849 # -----------------------------------------------------------------------------

	1850

	1851 # -----------------------------------------------------------------------------

	1852 # digraph()

	1853 # traverse()

	1854 #

	1855 # The following two functions are used to compute set valued functions

	1856 # of the form:

	1857 #

	1858 # F(x) = F'(x) U U{F(y) \| x R y}

	1859 #

	1860 # This is used to compute the values of Read() sets as well as FOLLOW sets

	1861 # in LALR(1) generation.

	1862 #

	1863 # Inputs: X - An input set

	1864 # R - A relation

	1865 # FP - Set-valued function

	1866 # ------------------------------------------------------------------------------

	1867

	1868 def digraph(X,R,FP):

	1869 N = { }

	1870 for x in X:

	1871 N[x] = 0

	1872 stack = []

	1873 F = { }

	1874 for x in X:

	1875 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)

	1876 return F

	1877

	1878 def traverse(x,N,stack,F,X,R,FP):

	1879 stack.append(x)

	1880 d = len(stack)

	1881 N[x] = d

	1882 F[x] = FP(x) # F(X) <- F'(x)

	1883

	1884 rel = R(x) # Get y's related to x

	1885 for y in rel:

	1886 if N[y] == 0:

	1887 traverse(y,N,stack,F,X,R,FP)

	1888 N[x] = min(N[x],N[y])

	1889 for a in F.get(y,[]):

	1890 if a not in F[x]: F[x].append(a)

	1891 if N[x] == d:

	1892 N[stack[-1]] = MAXINT

	1893 F[stack[-1]] = F[x]

	1894 element = stack.pop()

	1895 while element != x:

	1896 N[stack[-1]] = MAXINT

	1897 F[stack[-1]] = F[x]

	1898 element = stack.pop()

	1899

	1900 class LALRError(YaccError): pass

	1901

	1902 # -----------------------------------------------------------------------------

	1903 # == LRGeneratedTable ==

	1904 #

	1905 # This class implements the LR table generation algorithm. There are no

	1906 # public methods except for write()

	1907 # -----------------------------------------------------------------------------

	1908

	1909 class LRGeneratedTable(LRTable):

	1910 def __init__(self,grammar,method='LALR',log=None):

	1911 if method not in ['SLR','LALR']:

	1912 raise LALRError("Unsupported method %s" % method)

	1913

	1914 self.grammar = grammar

	1915 self.lr_method = method

	1916

	1917 # Set up the logger

	1918 if not log:

	1919 log = NullLogger()

	1920 self.log = log

	1921

	1922 # Internal attributes

	1923 self.lr_action = {} # Action table

	1924 self.lr_goto = {} # Goto table

	1925 self.lr_productions = grammar.Productions # Copy of grammar Producti on array

	1926 self.lr_goto_cache = {} # Cache of computed gotos

	1927 self.lr0_cidhash = {} # Cache of closures

	1928

	1929 self._add_count = 0 # Internal counter used to detect cycles

	1930

	1931 # Diagonistic information filled in by the table generator

	1932 self.sr_conflict = 0

	1933 self.rr_conflict = 0

	1934 self.conflicts = [] # List of conflicts

	1935

	1936 self.sr_conflicts = []

	1937 self.rr_conflicts = []

	1938

	1939 # Build the tables

	1940 self.grammar.build_lritems()

	1941 self.grammar.compute_first()

	1942 self.grammar.compute_follow()

	1943 self.lr_parse_table()

	1944

	1945 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.

	1946

	1947 def lr0_closure(self,I):

	1948 self._add_count += 1

	1949

	1950 # Add everything in I to J

	1951 J = I[:]

	1952 didadd = 1

	1953 while didadd:

	1954 didadd = 0

	1955 for j in J:

	1956 for x in j.lr_after:

	1957 if getattr(x,"lr0_added",0) == self._add_count: continue

	1958 # Add B --> .G to J

	1959 J.append(x.lr_next)

	1960 x.lr0_added = self._add_count

	1961 didadd = 1

	1962

	1963 return J

	1964

	1965 # Compute the LR(0) goto function goto(I,X) where I is a set

	1966 # of LR(0) items and X is a grammar symbol. This function is written

	1967 # in a way that guarantees uniqueness of the generated goto sets

	1968 # (i.e. the same goto set will never be returned as two different Python

	1969 # objects). With uniqueness, we can later do fast set comparisons using

	1970 # id(obj) instead of element-wise comparison.

	1971

	1972 def lr0_goto(self,I,x):

	1973 # First we look for a previously cached entry

	1974 g = self.lr_goto_cache.get((id(I),x),None)

	1975 if g: return g

	1976

	1977 # Now we generate the goto set in a way that guarantees uniqueness

	1978 # of the result

	1979

	1980 s = self.lr_goto_cache.get(x,None)

	1981 if not s:

	1982 s = { }

	1983 self.lr_goto_cache[x] = s

	1984

	1985 gs = [ ]

	1986 for p in I:

	1987 n = p.lr_next

	1988 if n and n.lr_before == x:

	1989 s1 = s.get(id(n),None)

	1990 if not s1:

	1991 s1 = { }

	1992 s[id(n)] = s1

	1993 gs.append(n)

	1994 s = s1

	1995 g = s.get('$end',None)

	1996 if not g:

	1997 if gs:

	1998 g = self.lr0_closure(gs)

	1999 s['$end'] = g

	2000 else:

	2001 s['$end'] = gs

	2002 self.lr_goto_cache[(id(I),x)] = g

	2003 return g

	2004

	2005 # Compute the LR(0) sets of item function

	2006 def lr0_items(self):

	2007

	2008 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]

	2009 i = 0

	2010 for I in C:

	2011 self.lr0_cidhash[id(I)] = i

	2012 i += 1

	2013

	2014 # Loop over the items in C and each grammar symbols

	2015 i = 0

	2016 while i < len(C):

	2017 I = C[i]

	2018 i += 1

	2019

	2020 # Collect all of the symbols that could possibly be in the goto(I,X) sets

	2021 asyms = { }

	2022 for ii in I:

	2023 for s in ii.usyms:

	2024 asyms[s] = None

	2025

	2026 for x in asyms:

	2027 g = self.lr0_goto(I,x)

	2028 if not g: continue

	2029 if id(g) in self.lr0_cidhash: continue

	2030 self.lr0_cidhash[id(g)] = len(C)

	2031 C.append(g)

	2032

	2033 return C

	2034

	2035 # -------------------------------------------------------------------------- ---

	2036 # ==== LALR(1) Parsing ====

	2037 #

	2038 # LALR(1) parsing is almost exactly the same as SLR except that instead of

	2039 # relying upon Follow() sets when performing reductions, a more selective

	2040 # lookahead set that incorporates the state of the LR(0) machine is utilized .

	2041 # Thus, we mainly just have to focus on calculating the lookahead sets.

	2042 #

	2043 # The method used here is due to DeRemer and Pennelo (1982).

	2044 #

	2045 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)

	2046 # Lookahead Sets", ACM Transactions on Programming Languages and Systems ,

	2047 # Vol. 4, No. 4, Oct. 1982, pp. 615-649

	2048 #

	2049 # Further details can also be found in:

	2050 #

	2051 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing ",

	2052 # McGraw-Hill Book Company, (1985).

	2053 #

	2054 # -------------------------------------------------------------------------- ---

	2055

	2056 # -------------------------------------------------------------------------- ---

	2057 # compute_nullable_nonterminals()

	2058 #

	2059 # Creates a dictionary containing all of the non-terminals that might produc e

	2060 # an empty production.

	2061 # -------------------------------------------------------------------------- ---

	2062

	2063 def compute_nullable_nonterminals(self):

	2064 nullable = {}

	2065 num_nullable = 0

	2066 while 1:

	2067 for p in self.grammar.Productions[1:]:

	2068 if p.len == 0:

	2069 nullable[p.name] = 1

	2070 continue

	2071 for t in p.prod:

	2072 if not t in nullable: break

	2073 else:

	2074 nullable[p.name] = 1

	2075 if len(nullable) == num_nullable: break

	2076 num_nullable = len(nullable)

	2077 return nullable

	2078

	2079 # -------------------------------------------------------------------------- ---

	2080 # find_nonterminal_trans(C)

	2081 #

	2082 # Given a set of LR(0) items, this functions finds all of the non-terminal

	2083 # transitions. These are transitions in which a dot appears immediately b efore

	2084 # a non-terminal. Returns a list of tuples of the form (state,N) where sta te

	2085 # is the state number and N is the nonterminal symbol.

	2086 #

	2087 # The input C is the set of LR(0) items.

	2088 # -------------------------------------------------------------------------- ---

	2089

	2090 def find_nonterminal_transitions(self,C):

	2091 trans = []

	2092 for state in range(len(C)):

	2093 for p in C[state]:

	2094 if p.lr_index < p.len - 1:

	2095 t = (state,p.prod[p.lr_index+1])

	2096 if t[1] in self.grammar.Nonterminals:

	2097 if t not in trans: trans.append(t)

	2098 state = state + 1

	2099 return trans

	2100

	2101 # -------------------------------------------------------------------------- ---

	2102 # dr_relation()

	2103 #

	2104 # Computes the DR(p,A) relationships for non-terminal transitions. The inpu t

	2105 # is a tuple (state,N) where state is a number and N is a nonterminal symbol .

	2106 #

	2107 # Returns a list of terminals.

	2108 # -------------------------------------------------------------------------- ---

	2109

	2110 def dr_relation(self,C,trans,nullable):

	2111 dr_set = { }

	2112 state,N = trans

	2113 terms = []

	2114

	2115 g = self.lr0_goto(C[state],N)

	2116 for p in g:

	2117 if p.lr_index < p.len - 1:

	2118 a = p.prod[p.lr_index+1]

	2119 if a in self.grammar.Terminals:

	2120 if a not in terms: terms.append(a)

	2121

	2122 # This extra bit is to handle the start state

	2123 if state == 0 and N == self.grammar.Productions[0].prod[0]:

	2124 terms.append('$end')

	2125

	2126 return terms

	2127

	2128 # -------------------------------------------------------------------------- ---

	2129 # reads_relation()

	2130 #

	2131 # Computes the READS() relation (p,A) READS (t,C).

	2132 # -------------------------------------------------------------------------- ---

	2133

	2134 def reads_relation(self,C, trans, empty):

	2135 # Look for empty transitions

	2136 rel = []

	2137 state, N = trans

	2138

	2139 g = self.lr0_goto(C[state],N)

	2140 j = self.lr0_cidhash.get(id(g),-1)

	2141 for p in g:

	2142 if p.lr_index < p.len - 1:

	2143 a = p.prod[p.lr_index + 1]

	2144 if a in empty:

	2145 rel.append((j,a))

	2146

	2147 return rel

	2148

	2149 # -------------------------------------------------------------------------- ---

	2150 # compute_lookback_includes()

	2151 #

	2152 # Determines the lookback and includes relations

	2153 #

	2154 # LOOKBACK:

	2155 #

	2156 # This relation is determined by running the LR(0) state machine forward.

	2157 # For example, starting with a production "N : . A B C", we run it forward

	2158 # to obtain "N : A B C ." We then build a relationship between this final

	2159 # state and the starting state. These relationships are stored in a dictio nary

	2160 # lookdict.

	2161 #

	2162 # INCLUDES:

	2163 #

	2164 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).

	2165 #

	2166 # This relation is used to determine non-terminal transitions that occur

	2167 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)

	2168 # if the following holds:

	2169 #

	2170 # B -> LAT, where T -> epsilon and p' -L-> p

	2171 #

	2172 # L is essentially a prefix (which may be empty), T is a suffix that must be

	2173 # able to derive an empty string. State p' must lead to state p with the st ring L.

	2174 #

	2175 # -------------------------------------------------------------------------- ---

	2176

	2177 def compute_lookback_includes(self,C,trans,nullable):

	2178

	2179 lookdict = {} # Dictionary of lookback relations

	2180 includedict = {} # Dictionary of include relations

	2181

	2182 # Make a dictionary of non-terminal transitions

	2183 dtrans = {}

	2184 for t in trans:

	2185 dtrans[t] = 1

	2186

	2187 # Loop over all transitions and compute lookbacks and includes

	2188 for state,N in trans:

	2189 lookb = []

	2190 includes = []

	2191 for p in C[state]:

	2192 if p.name != N: continue

	2193

	2194 # Okay, we have a name match. We now follow the production all the way

	2195 # through the state machine until we get the . on the right hand side

	2196

	2197 lr_index = p.lr_index

	2198 j = state

	2199 while lr_index < p.len - 1:

	2200 lr_index = lr_index + 1

	2201 t = p.prod[lr_index]

	2202

	2203 # Check to see if this symbol and state are a non-terminal transition

	2204 if (j,t) in dtrans:

	2205 # Yes. Okay, there is some chance that this is an in cludes relation

	2206 # the only way to know for certain is whether the res t of the

	2207 # production derives empty

	2208

	2209 li = lr_index + 1

	2210 while li < p.len:

	2211 if p.prod[li] in self.grammar.Terminals: break # No forget it

	2212 if not p.prod[li] in nullable: break

	2213 li = li + 1

	2214 else:

	2215 # Appears to be a relation between (j,t) and (st ate,N)

	2216 includes.append((j,t))

	2217

	2218 g = self.lr0_goto(C[j],t) # Go to next set

	2219 j = self.lr0_cidhash.get(id(g),-1) # Go to next state

	2220

	2221 # When we get here, j is the final state, now we have to locate the production

	2222 for r in C[j]:

	2223 if r.name != p.name: continue

	2224 if r.len != p.len: continue

	2225 i = 0

	2226 # This look is comparing a production ". A B C" with "A B C ."

	2227 while i < r.lr_index:

	2228 if r.prod[i] != p.prod[i+1]: break

	2229 i = i + 1

	2230 else:

	2231 lookb.append((j,r))

	2232 for i in includes:

	2233 if not i in includedict: includedict[i] = []

	2234 includedict[i].append((state,N))

	2235 lookdict[(state,N)] = lookb

	2236

	2237 return lookdict,includedict

	2238

	2239 # -------------------------------------------------------------------------- ---

	2240 # compute_read_sets()

	2241 #

	2242 # Given a set of LR(0) items, this function computes the read sets.

	2243 #

	2244 # Inputs: C = Set of LR(0) items

	2245 # ntrans = Set of nonterminal transitions

	2246 # nullable = Set of empty transitions

	2247 #

	2248 # Returns a set containing the read sets

	2249 # -------------------------------------------------------------------------- ---

	2250

	2251 def compute_read_sets(self,C, ntrans, nullable):

	2252 FP = lambda x: self.dr_relation(C,x,nullable)

	2253 R = lambda x: self.reads_relation(C,x,nullable)

	2254 F = digraph(ntrans,R,FP)

	2255 return F

	2256

	2257 # -------------------------------------------------------------------------- ---

	2258 # compute_follow_sets()

	2259 #

	2260 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,

	2261 # and an include set, this function computes the follow sets

	2262 #

	2263 # Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}

	2264 #

	2265 # Inputs:

	2266 # ntrans = Set of nonterminal transitions

	2267 # readsets = Readset (previously computed)

	2268 # inclsets = Include sets (previously computed)

	2269 #

	2270 # Returns a set containing the follow sets

	2271 # -------------------------------------------------------------------------- ---

	2272

	2273 def compute_follow_sets(self,ntrans,readsets,inclsets):

	2274 FP = lambda x: readsets[x]

	2275 R = lambda x: inclsets.get(x,[])

	2276 F = digraph(ntrans,R,FP)

	2277 return F

	2278

	2279 # -------------------------------------------------------------------------- ---

	2280 # add_lookaheads()

	2281 #

	2282 # Attaches the lookahead symbols to grammar rules.

	2283 #

	2284 # Inputs: lookbacks - Set of lookback relations

	2285 # followset - Computed follow set

	2286 #

	2287 # This function directly attaches the lookaheads to productions contained

	2288 # in the lookbacks set

	2289 # -------------------------------------------------------------------------- ---

	2290

	2291 def add_lookaheads(self,lookbacks,followset):

	2292 for trans,lb in lookbacks.items():

	2293 # Loop over productions in lookback

	2294 for state,p in lb:

	2295 if not state in p.lookaheads:

	2296 p.lookaheads[state] = []

	2297 f = followset.get(trans,[])

	2298 for a in f:

	2299 if a not in p.lookaheads[state]: p.lookaheads[state].appen d(a)

	2300

	2301 # -------------------------------------------------------------------------- ---

	2302 # add_lalr_lookaheads()

	2303 #

	2304 # This function does all of the work of adding lookahead information for use

	2305 # with LALR parsing

	2306 # -------------------------------------------------------------------------- ---

	2307

	2308 def add_lalr_lookaheads(self,C):

	2309 # Determine all of the nullable nonterminals

	2310 nullable = self.compute_nullable_nonterminals()

	2311

	2312 # Find all non-terminal transitions

	2313 trans = self.find_nonterminal_transitions(C)

	2314

	2315 # Compute read sets

	2316 readsets = self.compute_read_sets(C,trans,nullable)

	2317

	2318 # Compute lookback/includes relations

	2319 lookd, included = self.compute_lookback_includes(C,trans,nullable)

	2320

	2321 # Compute LALR FOLLOW sets

	2322 followsets = self.compute_follow_sets(trans,readsets,included)

	2323

	2324 # Add all of the lookaheads

	2325 self.add_lookaheads(lookd,followsets)

	2326

	2327 # -------------------------------------------------------------------------- ---

	2328 # lr_parse_table()

	2329 #

	2330 # This function constructs the parse tables for SLR or LALR

	2331 # -------------------------------------------------------------------------- ---

	2332 def lr_parse_table(self):

	2333 goto = self.lr_goto # Goto array

	2334 action = self.lr_action # Action array

	2335 log = self.log # Logger for output

	2336

	2337 actionp = { } # Action production array (temporary)

	2338

	2339 log.info("Parsing method: %s", self.lr_method)

	2340

	2341 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items

	2342 # This determines the number of states

	2343

	2344 C = self.lr0_items()

	2345

	2346 if self.lr_method == 'LALR':

	2347 self.add_lalr_lookaheads(C)

	2348

	2349 # Build the parser table, state by state

	2350 st = 0

	2351 for I in C:

	2352 # Loop over each production in I

	2353 actlist = [ ] # List of actions

	2354 st_action = { }

	2355 st_actionp = { }

	2356 st_goto = { }

	2357 log.info("")

	2358 log.info("state %d", st)

	2359 log.info("")

	2360 for p in I:

	2361 log.info(" (%d) %s", p.number, str(p))

	2362 log.info("")

	2363

	2364 for p in I:

	2365 if p.len == p.lr_index + 1:

	2366 if p.name == "S'":

	2367 # Start symbol. Accept!

	2368 st_action["$end"] = 0

	2369 st_actionp["$end"] = p

	2370 else:

	2371 # We are at the end of a production. Reduce!

	2372 if self.lr_method == 'LALR':

	2373 laheads = p.lookaheads[st]

	2374 else:

	2375 laheads = self.grammar.Follow[p.name]

	2376 for a in laheads:

	2377 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))

	2378 r = st_action.get(a,None)

	2379 if r is not None:

	2380 # Whoa. Have a shift/reduce or reduce/reduce conflict

	2381 if r > 0:

	2382 # Need to decide on shift or reduce here

	2383 # By default we favor shifting. Need to add

	2384 # some precedence rules here.

	2385 sprec,slevel = self.grammar.Productions[ st_actionp[a].number].prec

	2386 rprec,rlevel = self.grammar.Precedence.g et(a,('right',0))

	2387 if (slevel < rlevel) or ((slevel == rlev el) and (rprec == 'left')):

	2388 # We really need to reduce here.

	2389 st_action[a] = -p.number

	2390 st_actionp[a] = p

	2391 if not slevel and not rlevel:

	2392 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

	2393 self.sr_conflicts.append((st,a,' reduce'))

	2394 elif (slevel == rlevel) and (rprec == 'n onassoc'):

	2395 st_action[a] = None

	2396 else:

	2397 # Hmmm. Guess we'll keep the shift

	2398 if not rlevel:

	2399 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

	2400 self.sr_conflicts.append((st,a,' shift'))

	2401 elif r < 0:

	2402 # Reduce/reduce conflict. In this case , we favor the rule

	2403 # that was defined first in the grammar file

	2404 oldp = self.grammar.Productions[-r]

	2405 pp = self.grammar.Productions[p.number]

	2406 if oldp.line > pp.line:

	2407 st_action[a] = -p.number

	2408 st_actionp[a] = p

	2409 chosenp,rejectp = pp,oldp

	2410 else:

	2411 chosenp,rejectp = oldp,pp

	2412 self.rr_conflicts.append((st,chosenp,rej ectp))

	2413 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])

	2414 else:

	2415 raise LALRError("Unknown conflict in sta te %d" % st)

	2416 else:

	2417 st_action[a] = -p.number

	2418 st_actionp[a] = p

	2419 else:

	2420 i = p.lr_index

	2421 a = p.prod[i+1] # Get symbol right after the "."

	2422 if a in self.grammar.Terminals:

	2423 g = self.lr0_goto(I,a)

	2424 j = self.lr0_cidhash.get(id(g),-1)

	2425 if j >= 0:

	2426 # We are in a shift state

	2427 actlist.append((a,p,"shift and go to state %d" % j))

	2428 r = st_action.get(a,None)

	2429 if r is not None:

	2430 # Whoa have a shift/reduce or shift/shift co nflict

	2431 if r > 0:

	2432 if r != j:

	2433 raise LALRError("Shift/shift conflic t in state %d" % st)

	2434 elif r < 0:

	2435 # Do a precedence check.

	2436 # - if precedence of reduce rule is h igher, we reduce.

	2437 # - if precedence of reduce is same a nd left assoc, we reduce.

	2438 # - otherwise we shift

	2439 rprec,rlevel = self.grammar.Productions[ st_actionp[a].number].prec

	2440 sprec,slevel = self.grammar.Precedence.g et(a,('right',0))

	2441 if (slevel > rlevel) or ((slevel == rlev el) and (rprec == 'right')):

	2442 # We decide to shift here... highest precedence to shift

	2443 st_action[a] = j

	2444 st_actionp[a] = p

	2445 if not rlevel:

	2446 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)

	2447 self.sr_conflicts.append((st,a,' shift'))

	2448 elif (slevel == rlevel) and (rprec == 'n onassoc'):

	2449 st_action[a] = None

	2450 else:

	2451 # Hmmm. Guess we'll keep the reduce

	2452 if not slevel and not rlevel:

	2453 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)

	2454 self.sr_conflicts.append((st,a,' reduce'))

	2455

	2456 else:

	2457 raise LALRError("Unknown conflict in sta te %d" % st)

	2458 else:

	2459 st_action[a] = j

	2460 st_actionp[a] = p

	2461

	2462 # Print the actions associated with each terminal

	2463 _actprint = { }

	2464 for a,p,m in actlist:

	2465 if a in st_action:

	2466 if p is st_actionp[a]:

	2467 log.info(" %-15s %s",a,m)

	2468 _actprint[(a,m)] = 1

	2469 log.info("")

	2470 # Print the actions that were not used. (debugging)

	2471 not_used = 0

	2472 for a,p,m in actlist:

	2473 if a in st_action:

	2474 if p is not st_actionp[a]:

	2475 if not (a,m) in _actprint:

	2476 log.debug(" ! %-15s [ %s ]",a,m)

	2477 not_used = 1

	2478 _actprint[(a,m)] = 1

	2479 if not_used:

	2480 log.debug("")

	2481

	2482 # Construct the goto table for this state

	2483

	2484 nkeys = { }

	2485 for ii in I:

	2486 for s in ii.usyms:

	2487 if s in self.grammar.Nonterminals:

	2488 nkeys[s] = None

	2489 for n in nkeys:

	2490 g = self.lr0_goto(I,n)

	2491 j = self.lr0_cidhash.get(id(g),-1)

	2492 if j >= 0:

	2493 st_goto[n] = j

	2494 log.info(" %-30s shift and go to state %d",n,j)

	2495

	2496 action[st] = st_action

	2497 actionp[st] = st_actionp

	2498 goto[st] = st_goto

	2499 st += 1

	2500

	2501

	2502 # -------------------------------------------------------------------------- ---

	2503 # write()

	2504 #

	2505 # This function writes the LR parsing tables to a file

	2506 # -------------------------------------------------------------------------- ---

	2507

	2508 def write_table(self,modulename,outputdir='',signature=""):

	2509 basemodulename = modulename.split(".")[-1]

	2510 filename = os.path.join(outputdir,basemodulename) + ".py"

	2511 try:

	2512 f = open(filename,"w")

	2513

	2514 f.write("""

	2515 # %s

	2516 # This file is automatically generated. Do not edit.

	2517 _tabversion = %r

	2518

	2519 _lr_method = %r

	2520

	2521 _lr_signature = %r

	2522 """ % (filename, __tabversion__, self.lr_method, signature))

	2523

	2524 # Change smaller to 0 to go back to original tables

	2525 smaller = 1

	2526

	2527 # Factor out names to try and make smaller

	2528 if smaller:

	2529 items = { }

	2530

	2531 for s,nd in self.lr_action.items():

	2532 for name,v in nd.items():

	2533 i = items.get(name)

	2534 if not i:

	2535 i = ([],[])

	2536 items[name] = i

	2537 i[0].append(s)

	2538 i[1].append(v)

	2539

	2540 f.write("\n_lr_action_items = {")

	2541 for k,v in items.items():

	2542 f.write("%r:([" % k)

	2543 for i in v[0]:

	2544 f.write("%r," % i)

	2545 f.write("],[")

	2546 for i in v[1]:

	2547 f.write("%r," % i)

	2548

	2549 f.write("]),")

	2550 f.write("}\n")

	2551

	2552 f.write("""

	2553 _lr_action = { }

	2554 for _k, _v in _lr_action_items.items():

	2555 for _x,_y in zip(_v[0],_v[1]):

	2556 if not _x in _lr_action: _lr_action[_x] = { }

	2557 _lr_action[_x][_k] = _y

	2558 del _lr_action_items

	2559 """)

	2560

	2561 else:

	2562 f.write("\n_lr_action = { ");

	2563 for k,v in self.lr_action.items():

	2564 f.write("(%r,%r):%r," % (k[0],k[1],v))

	2565 f.write("}\n");

	2566

	2567 if smaller:

	2568 # Factor out names to try and make smaller

	2569 items = { }

	2570

	2571 for s,nd in self.lr_goto.items():

	2572 for name,v in nd.items():

	2573 i = items.get(name)

	2574 if not i:

	2575 i = ([],[])

	2576 items[name] = i

	2577 i[0].append(s)

	2578 i[1].append(v)

	2579

	2580 f.write("\n_lr_goto_items = {")

	2581 for k,v in items.items():

	2582 f.write("%r:([" % k)

	2583 for i in v[0]:

	2584 f.write("%r," % i)

	2585 f.write("],[")

	2586 for i in v[1]:

	2587 f.write("%r," % i)

	2588

	2589 f.write("]),")

	2590 f.write("}\n")

	2591

	2592 f.write("""

	2593 _lr_goto = { }

	2594 for _k, _v in _lr_goto_items.items():

	2595 for _x,_y in zip(_v[0],_v[1]):

	2596 if not _x in _lr_goto: _lr_goto[_x] = { }

	2597 _lr_goto[_x][_k] = _y

	2598 del _lr_goto_items

	2599 """)

	2600 else:

	2601 f.write("\n_lr_goto = { ");

	2602 for k,v in self.lr_goto.items():

	2603 f.write("(%r,%r):%r," % (k[0],k[1],v))

	2604 f.write("}\n");

	2605

	2606 # Write production table

	2607 f.write("_lr_productions = [\n")

	2608 for p in self.lr_productions:

	2609 if p.func:

	2610 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p .func,p.file,p.line))

	2611 else:

	2612 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p .len))

	2613 f.write("]\n")

	2614 f.close()

	2615

	2616 except IOError:

	2617 e = sys.exc_info()[1]

	2618 sys.stderr.write("Unable to create '%s'\n" % filename)

	2619 sys.stderr.write(str(e)+"\n")

	2620 return

	2621

	2622

	2623 # -----------------------------------------------------------------------------

	2624 # === INTROSPECTION ===

	2625 #

	2626 # The following functions and classes are used to implement the PLY

	2627 # introspection features followed by the yacc() function itself.

	2628 # -----------------------------------------------------------------------------

	2629

	2630 # -----------------------------------------------------------------------------

	2631 # get_caller_module_dict()

	2632 #

	2633 # This function returns a dictionary containing all of the symbols defined withi n

	2634 # a caller further down the call stack. This is used to get the environment

	2635 # associated with the yacc() call if none was provided.

	2636 # -----------------------------------------------------------------------------

	2637

	2638 def get_caller_module_dict(levels):

	2639 try:

	2640 raise RuntimeError

	2641 except RuntimeError:

	2642 e,b,t = sys.exc_info()

	2643 f = t.tb_frame

	2644 while levels > 0:

	2645 f = f.f_back

	2646 levels -= 1

	2647 ldict = f.f_globals.copy()

	2648 if f.f_globals != f.f_locals:

	2649 ldict.update(f.f_locals)

	2650

	2651 return ldict

	2652

	2653 # -----------------------------------------------------------------------------

	2654 # parse_grammar()

	2655 #

	2656 # This takes a raw grammar rule string and parses it into production data

	2657 # -----------------------------------------------------------------------------

	2658 def parse_grammar(doc,file,line):

	2659 grammar = []

	2660 # Split the doc string into lines

	2661 pstrings = doc.splitlines()

	2662 lastp = None

	2663 dline = line

	2664 for ps in pstrings:

	2665 dline += 1

	2666 p = ps.split()

	2667 if not p: continue

	2668 try:

	2669 if p[0] == '\|':

	2670 # This is a continuation of a previous rule

	2671 if not lastp:

	2672 raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))

	2673 prodname = lastp

	2674 syms = p[1:]

	2675 else:

	2676 prodname = p[0]

	2677 lastp = prodname

	2678 syms = p[2:]

	2679 assign = p[1]

	2680 if assign != ':' and assign != '::=':

	2681 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (fil e,dline))

	2682

	2683 grammar.append((file,dline,prodname,syms))

	2684 except SyntaxError:

	2685 raise

	2686 except Exception:

	2687 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,p s.strip()))

	2688

	2689 return grammar

	2690

	2691 # -----------------------------------------------------------------------------

	2692 # ParserReflect()

	2693 #

	2694 # This class represents information extracted for building a parser including

	2695 # start symbol, error function, tokens, precedence list, action functions,

	2696 # etc.

	2697 # -----------------------------------------------------------------------------

	2698 class ParserReflect(object):

	2699 def __init__(self,pdict,log=None):

	2700 self.pdict = pdict

	2701 self.start = None

	2702 self.error_func = None

	2703 self.tokens = None

	2704 self.files = {}

	2705 self.grammar = []

	2706 self.error = 0

	2707

	2708 if log is None:

	2709 self.log = PlyLogger(sys.stderr)

	2710 else:

	2711 self.log = log

	2712

	2713 # Get all of the basic information

	2714 def get_all(self):

	2715 self.get_start()

	2716 self.get_error_func()

	2717 self.get_tokens()

	2718 self.get_precedence()

	2719 self.get_pfunctions()

	2720

	2721 # Validate all of the information

	2722 def validate_all(self):

	2723 self.validate_start()

	2724 self.validate_error_func()

	2725 self.validate_tokens()

	2726 self.validate_precedence()

	2727 self.validate_pfunctions()

	2728 self.validate_files()

	2729 return self.error

	2730

	2731 # Compute a signature over the grammar

	2732 def signature(self):

	2733 from binascii import crc32

	2734 sig = 0

	2735 try:

	2736 if self.start:

	2737 sig = crc32(self.start.encode('latin-1'),sig)

	2738 if self.prec:

	2739 sig = crc32("".join(["".join(p) for p in self.prec]).encode('lat in-1'),sig)

	2740 if self.tokens:

	2741 sig = crc32(" ".join(self.tokens).encode('latin-1'),sig)

	2742 for f in self.pfuncs:

	2743 if f[3]:

	2744 sig = crc32(f[3].encode('latin-1'),sig)

	2745 except (TypeError,ValueError):

	2746 pass

	2747 return sig

	2748

	2749 # -------------------------------------------------------------------------- ---

	2750 # validate_file()

	2751 #

	2752 # This method checks to see if there are duplicated p_rulename() functions

	2753 # in the parser module file. Without this function, it is really easy for

	2754 # users to make mistakes by cutting and pasting code fragments (and it's a r eal

	2755 # bugger to try and figure out why the resulting parser doesn't work). Ther efore,

	2756 # we just do a little regular expression pattern matching of def statements

	2757 # to try and detect duplicates.

	2758 # -------------------------------------------------------------------------- ---

	2759

	2760 def validate_files(self):

	2761 # Match def p_funcname(

	2762 fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')

	2763

	2764 for filename in self.files.keys():

	2765 base,ext = os.path.splitext(filename)

	2766 if ext != '.py': return 1 # No idea. Assume it's okay.

	2767

	2768 try:

	2769 f = open(filename)

	2770 lines = f.readlines()

	2771 f.close()

	2772 except IOError:

	2773 continue

	2774

	2775 counthash = { }

	2776 for linen,l in enumerate(lines):

	2777 linen += 1

	2778 m = fre.match(l)

	2779 if m:

	2780 name = m.group(1)

	2781 prev = counthash.get(name)

	2782 if not prev:

	2783 counthash[name] = linen

	2784 else:

	2785 self.log.warning("%s:%d: Function %s redefined. Previous ly defined on line %d", filename,linen,name,prev)

	2786

	2787 # Get the start symbol

	2788 def get_start(self):

	2789 self.start = self.pdict.get('start')

	2790

	2791 # Validate the start symbol

	2792 def validate_start(self):

	2793 if self.start is not None:

	2794 if not isinstance(self.start,str):

	2795 self.log.error("'start' must be a string")

	2796

	2797 # Look for error handler

	2798 def get_error_func(self):

	2799 self.error_func = self.pdict.get('p_error')

	2800

	2801 # Validate the error function

	2802 def validate_error_func(self):

	2803 if self.error_func:

	2804 if isinstance(self.error_func,types.FunctionType):

	2805 ismethod = 0

	2806 elif isinstance(self.error_func, types.MethodType):

	2807 ismethod = 1

	2808 else:

	2809 self.log.error("'p_error' defined, but is not a function or meth od")

	2810 self.error = 1

	2811 return

	2812

	2813 eline = func_code(self.error_func).co_firstlineno

	2814 efile = func_code(self.error_func).co_filename

	2815 self.files[efile] = 1

	2816

	2817 if (func_code(self.error_func).co_argcount != 1+ismethod):

	2818 self.log.error("%s:%d: p_error() requires 1 argument",efile,elin e)

	2819 self.error = 1

	2820

	2821 # Get the tokens map

	2822 def get_tokens(self):

	2823 tokens = self.pdict.get("tokens",None)

	2824 if not tokens:

	2825 self.log.error("No token list is defined")

	2826 self.error = 1

	2827 return

	2828

	2829 if not isinstance(tokens,(list, tuple)):

	2830 self.log.error("tokens must be a list or tuple")

	2831 self.error = 1

	2832 return

	2833

	2834 if not tokens:

	2835 self.log.error("tokens is empty")

	2836 self.error = 1

	2837 return

	2838

	2839 self.tokens = tokens

	2840

	2841 # Validate the tokens

	2842 def validate_tokens(self):

	2843 # Validate the tokens.

	2844 if 'error' in self.tokens:

	2845 self.log.error("Illegal token name 'error'. Is a reserved word")

	2846 self.error = 1

	2847 return

	2848

	2849 terminals = {}

	2850 for n in self.tokens:

	2851 if n in terminals:

	2852 self.log.warning("Token '%s' multiply defined", n)

	2853 terminals[n] = 1

	2854

	2855 # Get the precedence map (if any)

	2856 def get_precedence(self):

	2857 self.prec = self.pdict.get("precedence",None)

	2858

	2859 # Validate and parse the precedence map

	2860 def validate_precedence(self):

	2861 preclist = []

	2862 if self.prec:

	2863 if not isinstance(self.prec,(list,tuple)):

	2864 self.log.error("precedence must be a list or tuple")

	2865 self.error = 1

	2866 return

	2867 for level,p in enumerate(self.prec):

	2868 if not isinstance(p,(list,tuple)):

	2869 self.log.error("Bad precedence table")

	2870 self.error = 1

	2871 return

	2872

	2873 if len(p) < 2:

	2874 self.log.error("Malformed precedence entry %s. Must be (asso c, term, ..., term)",p)

	2875 self.error = 1

	2876 return

	2877 assoc = p[0]

	2878 if not isinstance(assoc,str):

	2879 self.log.error("precedence associativity must be a string")

	2880 self.error = 1

	2881 return

	2882 for term in p[1:]:

	2883 if not isinstance(term,str):

	2884 self.log.error("precedence items must be strings")

	2885 self.error = 1

	2886 return

	2887 preclist.append((term,assoc,level+1))

	2888 self.preclist = preclist

	2889

	2890 # Get all p_functions from the grammar

	2891 def get_pfunctions(self):

	2892 p_functions = []

	2893 for name, item in self.pdict.items():

	2894 if name[:2] != 'p_': continue

	2895 if name == 'p_error': continue

	2896 if isinstance(item,(types.FunctionType,types.MethodType)):

	2897 line = func_code(item).co_firstlineno

	2898 file = func_code(item).co_filename

	2899 p_functions.append((line,file,name,item.__doc__))

	2900

	2901 # Sort all of the actions by line number

	2902 p_functions.sort()

	2903 self.pfuncs = p_functions

	2904

	2905

	2906 # Validate all of the p_functions

	2907 def validate_pfunctions(self):

	2908 grammar = []

	2909 # Check for non-empty symbols

	2910 if len(self.pfuncs) == 0:

	2911 self.log.error("no rules of the form p_rulename are defined")

	2912 self.error = 1

	2913 return

	2914

	2915 for line, file, name, doc in self.pfuncs:

	2916 func = self.pdict[name]

	2917 if isinstance(func, types.MethodType):

	2918 reqargs = 2

	2919 else:

	2920 reqargs = 1

	2921 if func_code(func).co_argcount > reqargs:

	2922 self.log.error("%s:%d: Rule '%s' has too many arguments",file,li ne,func.__name__)

	2923 self.error = 1

	2924 elif func_code(func).co_argcount < reqargs:

	2925 self.log.error("%s:%d: Rule '%s' requires an argument",file,line ,func.__name__)

	2926 self.error = 1

	2927 elif not func.__doc__:

	2928 self.log.warning("%s:%d: No documentation string specified in fu nction '%s' (ignored)",file,line,func.__name__)

	2929 else:

	2930 try:

	2931 parsed_g = parse_grammar(doc,file,line)

	2932 for g in parsed_g:

	2933 grammar.append((name, g))

	2934 except SyntaxError:

	2935 e = sys.exc_info()[1]

	2936 self.log.error(str(e))

	2937 self.error = 1

	2938

	2939 # Looks like a valid grammar rule

	2940 # Mark the file in which defined.

	2941 self.files[file] = 1

	2942

	2943 # Secondary validation step that looks for p_ definitions that are not f unctions

	2944 # or functions that look like they might be grammar rules.

	2945

	2946 for n,v in self.pdict.items():

	2947 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.Metho dType)): continue

	2948 if n[0:2] == 't_': continue

	2949 if n[0:2] == 'p_' and n != 'p_error':

	2950 self.log.warning("'%s' not defined as a function", n)

	2951 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount = = 1) or

	2952 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):

	2953 try:

	2954 doc = v.__doc__.split(" ")

	2955 if doc[1] == ':':

	2956 self.log.warning("%s:%d: Possible grammar rule '%s' defi ned without p_ prefix",

	2957 func_code(v).co_filename, func_code(v). co_firstlineno,n)

	2958 except Exception:

	2959 pass

	2960

	2961 self.grammar = grammar

	2962

	2963 # -----------------------------------------------------------------------------

	2964 # yacc(module)

	2965 #

	2966 # Build a parser

	2967 # -----------------------------------------------------------------------------

	2968

	2969 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star t=None,

	2970 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,out putdir='',

	2971 debuglog=None, errorlog = None):

	2972

	2973 global parse # Reference to the parsing method of the last b uilt parser

	2974

	2975 if errorlog is None:

	2976 errorlog = PlyLogger(sys.stderr)

	2977

	2978 # Get the module dictionary used for the parser

	2979 if module:

	2980 _items = [(k,getattr(module,k)) for k in dir(module)]

	2981 pdict = dict(_items)

	2982 else:

	2983 pdict = get_caller_module_dict(2)

	2984

	2985 # Collect parser information from the dictionary

	2986 pinfo = ParserReflect(pdict,log=errorlog)

	2987 pinfo.get_all()

	2988

	2989 if pinfo.error:

	2990 raise YaccError("Unable to build parser")

	2991

	2992 # Check signature against table files (if any)

	2993 signature = pinfo.signature()

	2994

	2995 # Read the tables

	2996 try:

	2997 lr = LRTable()

	2998 read_signature = lr.read_table(tabmodule)

	2999 if optimize or (read_signature == signature):

	3000 try:

	3001 lr.bind_callables(pinfo.pdict)

	3002 parser = LRParser(lr,pinfo.error_func)

	3003 parse = parser.parse

	3004 return parser

	3005 except Exception:

	3006 e = sys.exc_info()[1]

	3007 errorlog.warning("There was a problem loading the table file: %s ", repr(e))

	3008 except VersionError:

	3009 e = sys.exc_info()

	3010 errorlog.warning(str(e))

	3011 except Exception:

	3012 pass

	3013

	3014 if debuglog is None:

	3015 if debug:

	3016 debuglog = PlyLogger(open(debugfile,"w"))

	3017 else:

	3018 debuglog = NullLogger()

	3019

	3020 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __ver sion__)

	3021

	3022

	3023 errors = 0

	3024

	3025 # Validate the parser information

	3026 if pinfo.validate_all():

	3027 raise YaccError("Unable to build parser")

	3028

	3029 if not pinfo.error_func:

	3030 errorlog.warning("no p_error() function is defined")

	3031

	3032 # Create a grammar object

	3033 grammar = Grammar(pinfo.tokens)

	3034

	3035 # Set precedence level for terminals

	3036 for term, assoc, level in pinfo.preclist:

	3037 try:

	3038 grammar.set_precedence(term,assoc,level)

	3039 except GrammarError:

	3040 e = sys.exc_info()[1]

	3041 errorlog.warning("%s",str(e))

	3042

	3043 # Add productions to the grammar

	3044 for funcname, gram in pinfo.grammar:

	3045 file, line, prodname, syms = gram

	3046 try:

	3047 grammar.add_production(prodname,syms,funcname,file,line)

	3048 except GrammarError:

	3049 e = sys.exc_info()[1]

	3050 errorlog.error("%s",str(e))

	3051 errors = 1

	3052

	3053 # Set the grammar start symbols

	3054 try:

	3055 if start is None:

	3056 grammar.set_start(pinfo.start)

	3057 else:

	3058 grammar.set_start(start)

	3059 except GrammarError:

	3060 e = sys.exc_info()[1]

	3061 errorlog.error(str(e))

	3062 errors = 1

	3063

	3064 if errors:

	3065 raise YaccError("Unable to build parser")

	3066

	3067 # Verify the grammar structure

	3068 undefined_symbols = grammar.undefined_symbols()

	3069 for sym, prod in undefined_symbols:

	3070 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)

	3071 errors = 1

	3072

	3073 unused_terminals = grammar.unused_terminals()

	3074 if unused_terminals:

	3075 debuglog.info("")

	3076 debuglog.info("Unused terminals:")

	3077 debuglog.info("")

	3078 for term in unused_terminals:

	3079 errorlog.warning("Token '%s' defined, but not used", term)

	3080 debuglog.info(" %s", term)

	3081

	3082 # Print out all productions to the debug log

	3083 if debug:

	3084 debuglog.info("")

	3085 debuglog.info("Grammar")

	3086 debuglog.info("")

	3087 for n,p in enumerate(grammar.Productions):

	3088 debuglog.info("Rule %-5d %s", n+1, p)

	3089

	3090 # Find unused non-terminals

	3091 unused_rules = grammar.unused_rules()

	3092 for prod in unused_rules:

	3093 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, pr od.line, prod.name)

	3094

	3095 if len(unused_terminals) == 1:

	3096 errorlog.warning("There is 1 unused token")

	3097 if len(unused_terminals) > 1:

	3098 errorlog.warning("There are %d unused tokens", len(unused_terminals))

	3099

	3100 if len(unused_rules) == 1:

	3101 errorlog.warning("There is 1 unused rule")

	3102 if len(unused_rules) > 1:

	3103 errorlog.warning("There are %d unused rules", len(unused_rules))

	3104

	3105 if debug:

	3106 debuglog.info("")

	3107 debuglog.info("Terminals, with rules where they appear")

	3108 debuglog.info("")

	3109 terms = list(grammar.Terminals)

	3110 terms.sort()

	3111 for term in terms:

	3112 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar. Terminals[term]]))

	3113

	3114 debuglog.info("")

	3115 debuglog.info("Nonterminals, with rules where they appear")

	3116 debuglog.info("")

	3117 nonterms = list(grammar.Nonterminals)

	3118 nonterms.sort()

	3119 for nonterm in nonterms:

	3120 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in gramm ar.Nonterminals[nonterm]]))

	3121 debuglog.info("")

	3122

	3123 if check_recursion:

	3124 unreachable = grammar.find_unreachable()

	3125 for u in unreachable:

	3126 errorlog.warning("Symbol '%s' is unreachable",u)

	3127

	3128 infinite = grammar.infinite_cycles()

	3129 for inf in infinite:

	3130 errorlog.error("Infinite recursion detected for symbol '%s'", inf)

	3131 errors = 1

	3132

	3133 unused_prec = grammar.unused_precedence()

	3134 for term, assoc in unused_prec:

	3135 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", a ssoc, term)

	3136 errors = 1

	3137

	3138 if errors:

	3139 raise YaccError("Unable to build parser")

	3140

	3141 # Run the LRGeneratedTable on the grammar

	3142 if debug:

	3143 errorlog.debug("Generating %s tables", method)

	3144

	3145 lr = LRGeneratedTable(grammar,method,debuglog)

	3146

	3147 if debug:

	3148 num_sr = len(lr.sr_conflicts)

	3149

	3150 # Report shift/reduce and reduce/reduce conflicts

	3151 if num_sr == 1:

	3152 errorlog.warning("1 shift/reduce conflict")

	3153 elif num_sr > 1:

	3154 errorlog.warning("%d shift/reduce conflicts", num_sr)

	3155

	3156 num_rr = len(lr.rr_conflicts)

	3157 if num_rr == 1:

	3158 errorlog.warning("1 reduce/reduce conflict")

	3159 elif num_rr > 1:

	3160 errorlog.warning("%d reduce/reduce conflicts", num_rr)

	3161

	3162 # Write out conflicts to the output file

	3163 if debug and (lr.sr_conflicts or lr.rr_conflicts):

	3164 debuglog.warning("")

	3165 debuglog.warning("Conflicts:")

	3166 debuglog.warning("")

	3167

	3168 for state, tok, resolution in lr.sr_conflicts:

	3169 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)

	3170

	3171 for state, rule, rejected in lr.rr_conflicts:

	3172 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

	3173 debuglog.warning("rejected rule (%s)", rejected)

	3174 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)

	3175 errorlog.warning("rejected rule (%s)", rejected)

	3176

	3177 # Write the table file if requested

	3178 if write_tables:

	3179 lr.write_table(tabmodule,outputdir,signature)

	3180

	3181 # Build the parser

	3182 lr.bind_callables(pinfo.pdict)

	3183 parser = LRParser(lr,pinfo.error_func)

	3184

	3185 parse = parser.parse

	3186 return parser

OLD	NEW

« no previous file with comments | « tools/nixysa/third_party/ply-3.1/ply/lex.py ('k') | tools/nixysa/third_party/ply-3.1/setup.py » ('j') | no next file with comments »