Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Side by Side Diff: tools/nixysa/third_party/ply-3.1/ply/yacc.py

Issue 2043006: WTF NPAPI extension. Early draft. Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 10 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/nixysa/third_party/ply-3.1/ply/lex.py ('k') | tools/nixysa/third_party/ply-3.1/setup.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 # -----------------------------------------------------------------------------
2 # ply: yacc.py
3 #
4 # Author(s): David M. Beazley (dave@dabeaz.com)
5 #
6 # Copyright (C) 2001-2009, David M. Beazley
7 #
8 # This library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU Lesser General Public
10 # License as published by the Free Software Foundation; either
11 # version 2.1 of the License, or (at your option) any later version.
12 #
13 # This library is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Lesser General Public License for more details.
17 #
18 # You should have received a copy of the GNU Lesser General Public
19 # License along with this library; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #
22 # See the file COPYING for a complete copy of the LGPL.
23 #
24 #
25 # This implements an LR parser that is constructed from grammar rules defined
26 # as Python functions. The grammer is specified by supplying the BNF inside
27 # Python documentation strings. The inspiration for this technique was borrowed
28 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
29 # Spark and the GNU bison utility.
30 #
31 # The current implementation is only somewhat object-oriented. The
32 # LR parser itself is defined in terms of an object (which allows multiple
33 # parsers to co-exist). However, most of the variables used during table
34 # construction are defined in terms of global variables. Users shouldn't
35 # notice unless they are trying to define multiple parsers at the same
36 # time using threads (in which case they should have their head examined).
37 #
38 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
39 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
40 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
41 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
42 # by the more efficient DeRemer and Pennello algorithm.
43 #
44 # :::::::: WARNING :::::::
45 #
46 # Construction of LR parsing tables is fairly complicated and expensive.
47 # To make this module run fast, a *LOT* of work has been put into
48 # optimization---often at the expensive of readability and what might
49 # consider to be good Python "coding style." Modify the code at your
50 # own risk!
51 # ----------------------------------------------------------------------------
52
53 __version__ = "3.0"
54 __tabversion__ = "3.0" # Table version
55
56 #-----------------------------------------------------------------------------
57 # === User configurable parameters ===
58 #
59 # Change these to modify the default behavior of yacc (if you wish)
60 #-----------------------------------------------------------------------------
61
62 yaccdebug = 1 # Debugging mode. If set, yacc generates a
63 # a 'parser.out' file in the current directory
64
65 debug_file = 'parser.out' # Default name of the debugging file
66 tab_module = 'parsetab' # Default name of the table module
67 default_lr = 'LALR' # Default LR table generation method
68
69 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
70
71 yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
72 # implementations of certain functions.
73
74 resultlimit = 40 # Size limit of results when running in debug mod e.
75
76 import re, types, sys, os.path
77
78 # Compatibility function for python 2.6/3.0
79 if sys.version_info[0] < 3:
80 def func_code(f):
81 return f.func_code
82 else:
83 def func_code(f):
84 return f.__code__
85
86 # Compatibility
87 try:
88 MAXINT = sys.maxint
89 except AttributeError:
90 MAXINT = sys.maxsize
91
92 # Python 2.x/3.0 compatibility.
93 def load_ply_lex():
94 if sys.version_info[0] < 3:
95 import lex
96 else:
97 import ply.lex as lex
98 return lex
99
100 # This object is a stand-in for a logging object created by the
101 # logging module. PLY will use this by default to create things
102 # such as the parser.out file. If a user wants more detailed
103 # information, they can create their own logging object and pass
104 # it into PLY.
105
106 class PlyLogger(object):
107 def __init__(self,f):
108 self.f = f
109 def debug(self,msg,*args,**kwargs):
110 self.f.write((msg % args) + "\n")
111 info = debug
112
113 def warning(self,msg,*args,**kwargs):
114 self.f.write("WARNING: "+ (msg % args) + "\n")
115
116 def error(self,msg,*args,**kwargs):
117 self.f.write("ERROR: " + (msg % args) + "\n")
118
119 critical = debug
120
121 # Null logger is used when no output is generated. Does nothing.
122 class NullLogger(object):
123 def __getattribute__(self,name):
124 return self
125 def __call__(self,*args,**kwargs):
126 return self
127
128 # Exception raised for yacc-related errors
129 class YaccError(Exception): pass
130
131 # Format the result message that the parser produces when running in debug mode.
132 def format_result(r):
133 repr_str = repr(r)
134 if '\n' in repr_str: repr_str = repr(repr_str)
135 if len(repr_str) > resultlimit:
136 repr_str = repr_str[:resultlimit]+" ..."
137 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
138 return result
139
140
141 # Format stack entries when the parser is running in debug mode
142 def format_stack_entry(r):
143 repr_str = repr(r)
144 if '\n' in repr_str: repr_str = repr(repr_str)
145 if len(repr_str) < 16:
146 return repr_str
147 else:
148 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
149
150 #-----------------------------------------------------------------------------
151 # === LR Parsing Engine ===
152 #
153 # The following classes are used for the LR parser itself. These are not
154 # used during table construction and are independent of the actual LR
155 # table generation algorithm
156 #-----------------------------------------------------------------------------
157
158 # This class is used to hold non-terminal grammar symbols during parsing.
159 # It normally has the following attributes set:
160 # .type = Grammar symbol type
161 # .value = Symbol value
162 # .lineno = Starting line number
163 # .endlineno = Ending line number (optional, set automatically)
164 # .lexpos = Starting lex position
165 # .endlexpos = Ending lex position (optional, set automatically)
166
167 class YaccSymbol:
168 def __str__(self): return self.type
169 def __repr__(self): return str(self)
170
171 # This class is a wrapper around the objects actually passed to each
172 # grammar rule. Index lookup and assignment actually assign the
173 # .value attribute of the underlying YaccSymbol object.
174 # The lineno() method returns the line number of a given
175 # item (or 0 if not defined). The linespan() method returns
176 # a tuple of (startline,endline) representing the range of lines
177 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
178 # representing the range of positional information for a symbol.
179
180 class YaccProduction:
181 def __init__(self,s,stack=None):
182 self.slice = s
183 self.stack = stack
184 self.lexer = None
185 self.parser= None
186 def __getitem__(self,n):
187 if n >= 0: return self.slice[n].value
188 else: return self.stack[n].value
189
190 def __setitem__(self,n,v):
191 self.slice[n].value = v
192
193 def __getslice__(self,i,j):
194 return [s.value for s in self.slice[i:j]]
195
196 def __len__(self):
197 return len(self.slice)
198
199 def lineno(self,n):
200 return getattr(self.slice[n],"lineno",0)
201
202 def set_lineno(self,n,lineno):
203 self.slice[n].lineno = n
204
205 def linespan(self,n):
206 startline = getattr(self.slice[n],"lineno",0)
207 endline = getattr(self.slice[n],"endlineno",startline)
208 return startline,endline
209
210 def lexpos(self,n):
211 return getattr(self.slice[n],"lexpos",0)
212
213 def lexspan(self,n):
214 startpos = getattr(self.slice[n],"lexpos",0)
215 endpos = getattr(self.slice[n],"endlexpos",startpos)
216 return startpos,endpos
217
218 def error(self):
219 raise SyntaxError
220
221
222 # -----------------------------------------------------------------------------
223 # == LRParser ==
224 #
225 # The LR Parsing engine.
226 # -----------------------------------------------------------------------------
227
228 class LRParser:
229 def __init__(self,lrtab,errorf):
230 self.productions = lrtab.lr_productions
231 self.action = lrtab.lr_action
232 self.goto = lrtab.lr_goto
233 self.errorfunc = errorf
234
235 def errok(self):
236 self.errorok = 1
237
238 def restart(self):
239 del self.statestack[:]
240 del self.symstack[:]
241 sym = YaccSymbol()
242 sym.type = '$end'
243 self.symstack.append(sym)
244 self.statestack.append(0)
245
246 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
247 if debug or yaccdevel:
248 if isinstance(debug,int):
249 debug = PlyLogger(sys.stderr)
250 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
251 elif tracking:
252 return self.parseopt(input,lexer,debug,tracking,tokenfunc)
253 else:
254 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
255
256
257 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
258 # parsedebug().
259 #
260 # This is the debugging enabled version of parse(). All changes made to the
261 # parsing engine should be made here. For the non-debugging version,
262 # copy this code to a method parseopt() and delete all of the sections
263 # enclosed in:
264 #
265 # #--! DEBUG
266 # statements
267 # #--! DEBUG
268 #
269 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
270
271 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=No ne):
272 lookahead = None # Current lookahead symbol
273 lookaheadstack = [ ] # Stack of lookahead symbols
274 actions = self.action # Local reference to action table (to a void lookup on self.)
275 goto = self.goto # Local reference to goto table (to avo id lookup on self.)
276 prod = self.productions # Local reference to production list (t o avoid lookup on self.)
277 pslice = YaccProduction(None) # Production object passed to grammar r ules
278 errorcount = 0 # Used during error recovery
279
280 # --! DEBUG
281 debug.info("PLY: PARSE DEBUG START")
282 # --! DEBUG
283
284 # If no lexer was given, we will try to use the lex module
285 if not lexer:
286 lex = load_ply_lex()
287 lexer = lex.lexer
288
289 # Set up the lexer and parser objects on pslice
290 pslice.lexer = lexer
291 pslice.parser = self
292
293 # If input was supplied, pass to lexer
294 if input is not None:
295 lexer.input(input)
296
297 if tokenfunc is None:
298 # Tokenize function
299 get_token = lexer.token
300 else:
301 get_token = tokenfunc
302
303 # Set up the state and symbol stacks
304
305 statestack = [ ] # Stack of parsing states
306 self.statestack = statestack
307 symstack = [ ] # Stack of grammar symbols
308 self.symstack = symstack
309
310 pslice.stack = symstack # Put in the production
311 errtoken = None # Err token
312
313 # The start state is assumed to be (0,$end)
314
315 statestack.append(0)
316 sym = YaccSymbol()
317 sym.type = "$end"
318 symstack.append(sym)
319 state = 0
320 while 1:
321 # Get the next symbol on the input. If a lookahead symbol
322 # is already set, we just use that. Otherwise, we'll pull
323 # the next token off of the lookaheadstack or from the lexer
324
325 # --! DEBUG
326 debug.debug('')
327 debug.debug('State : %s', state)
328 # --! DEBUG
329
330 if not lookahead:
331 if not lookaheadstack:
332 lookahead = get_token() # Get the next token
333 else:
334 lookahead = lookaheadstack.pop()
335 if not lookahead:
336 lookahead = YaccSymbol()
337 lookahead.type = "$end"
338
339 # --! DEBUG
340 debug.debug('Stack : %s',
341 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]) , str(lookahead))).lstrip())
342 # --! DEBUG
343
344 # Check the action table
345 ltype = lookahead.type
346 t = actions[state].get(ltype)
347
348 if t is not None:
349 if t > 0:
350 # shift a symbol on the stack
351 statestack.append(t)
352 state = t
353
354 # --! DEBUG
355 debug.debug("Action : Shift and goto state %s", t)
356 # --! DEBUG
357
358 symstack.append(lookahead)
359 lookahead = None
360
361 # Decrease error count on successful shift
362 if errorcount: errorcount -=1
363 continue
364
365 if t < 0:
366 # reduce a symbol on the stack, emit a production
367 p = prod[-t]
368 pname = p.name
369 plen = p.len
370
371 # Get production function
372 sym = YaccSymbol()
373 sym.type = pname # Production name
374 sym.value = None
375
376 # --! DEBUG
377 if plen:
378 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[- plen:]])+"]",-t)
379 else:
380 debug.info("Action : Reduce rule [%s] with %s and goto s tate %d", p.str, [],-t)
381
382 # --! DEBUG
383
384 if plen:
385 targ = symstack[-plen-1:]
386 targ[0] = sym
387
388 # --! TRACKING
389 if tracking:
390 t1 = targ[1]
391 sym.lineno = t1.lineno
392 sym.lexpos = t1.lexpos
393 t1 = targ[-1]
394 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
395 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
396
397 # --! TRACKING
398
399 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
400 # The code enclosed in this section is duplicated
401 # below as a performance optimization. Make sure
402 # changes get made in both locations.
403
404 pslice.slice = targ
405
406 try:
407 # Call the grammar rule with our special slice objec t
408 del symstack[-plen:]
409 del statestack[-plen:]
410 p.callable(pslice)
411 # --! DEBUG
412 debug.info("Result : %s", format_result(pslice[0]))
413 # --! DEBUG
414 symstack.append(sym)
415 state = goto[statestack[-1]][pname]
416 statestack.append(state)
417 except SyntaxError:
418 # If an error was set. Enter error recovery state
419 lookaheadstack.append(lookahead)
420 symstack.pop()
421 statestack.pop()
422 state = statestack[-1]
423 sym.type = 'error'
424 lookahead = sym
425 errorcount = error_count
426 self.errorok = 0
427 continue
428 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
429
430 else:
431
432 # --! TRACKING
433 if tracking:
434 sym.lineno = lexer.lineno
435 sym.lexpos = lexer.lexpos
436 # --! TRACKING
437
438 targ = [ sym ]
439
440 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
441 # The code enclosed in this section is duplicated
442 # above as a performance optimization. Make sure
443 # changes get made in both locations.
444
445 pslice.slice = targ
446
447 try:
448 # Call the grammar rule with our special slice objec t
449 p.callable(pslice)
450 # --! DEBUG
451 debug.info("Result : %s", format_result(pslice[0]))
452 # --! DEBUG
453 symstack.append(sym)
454 state = goto[statestack[-1]][pname]
455 statestack.append(state)
456 except SyntaxError:
457 # If an error was set. Enter error recovery state
458 lookaheadstack.append(lookahead)
459 symstack.pop()
460 statestack.pop()
461 state = statestack[-1]
462 sym.type = 'error'
463 lookahead = sym
464 errorcount = error_count
465 self.errorok = 0
466 continue
467 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
468
469 if t == 0:
470 n = symstack[-1]
471 result = getattr(n,"value",None)
472 # --! DEBUG
473 debug.info("Done : Returning %s", format_result(result))
474 debug.info("PLY: PARSE DEBUG END")
475 # --! DEBUG
476 return result
477
478 if t == None:
479
480 # --! DEBUG
481 debug.error('Error : %s',
482 ("%s . %s" % (" ".join([xx.type for xx in symstack][ 1:]), str(lookahead))).lstrip())
483 # --! DEBUG
484
485 # We have some kind of parsing error here. To handle
486 # this, we are going to push the current token onto
487 # the tokenstack and replace it with an 'error' token.
488 # If there are any synchronization rules, they may
489 # catch it.
490 #
491 # In addition to pushing the error token, we call call
492 # the user defined p_error() function if this is the
493 # first syntax error. This function is only called if
494 # errorcount == 0.
495 if errorcount == 0 or self.errorok:
496 errorcount = error_count
497 self.errorok = 0
498 errtoken = lookahead
499 if errtoken.type == "$end":
500 errtoken = None # End of file!
501 if self.errorfunc:
502 global errok,token,restart
503 errok = self.errok # Set some special functions a vailable in error recovery
504 token = get_token
505 restart = self.restart
506 if errtoken and not hasattr(errtoken,'lexer'):
507 errtoken.lexer = lexer
508 tok = self.errorfunc(errtoken)
509 del errok, token, restart # Delete special functions
510
511 if self.errorok:
512 # User must have done some kind of panic
513 # mode recovery on their own. The
514 # returned token is the next lookahead
515 lookahead = tok
516 errtoken = None
517 continue
518 else:
519 if errtoken:
520 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno
521 else: lineno = 0
522 if lineno:
523 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
524 else:
525 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
526 else:
527 sys.stderr.write("yacc: Parse error in input. EOF\n" )
528 return
529
530 else:
531 errorcount = error_count
532
533 # case 1: the statestack only has 1 entry on it. If we're in t his state, the
534 # entire parse has been rolled back and we're completely hosed. The token is
535 # discarded and we just keep going.
536
537 if len(statestack) <= 1 and lookahead.type != "$end":
538 lookahead = None
539 errtoken = None
540 state = 0
541 # Nuke the pushback stack
542 del lookaheadstack[:]
543 continue
544
545 # case 2: the statestack has a couple of entries on it, but we'r e
546 # at the end of the file. nuke the top entry and generate an err or token
547
548 # Start nuking entries on the stack
549 if lookahead.type == "$end":
550 # Whoa. We're really hosed here. Bail out
551 return
552
553 if lookahead.type != 'error':
554 sym = symstack[-1]
555 if sym.type == 'error':
556 # Hmmm. Error is on top of stack, we'll just nuke input
557 # symbol and continue
558 lookahead = None
559 continue
560 t = YaccSymbol()
561 t.type = 'error'
562 if hasattr(lookahead,"lineno"):
563 t.lineno = lookahead.lineno
564 t.value = lookahead
565 lookaheadstack.append(lookahead)
566 lookahead = t
567 else:
568 symstack.pop()
569 statestack.pop()
570 state = statestack[-1] # Potential bug fix
571
572 continue
573
574 # Call an error function here
575 raise RuntimeError("yacc: internal parser error!!!\n")
576
577 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
578 # parseopt().
579 #
580 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
581 # Edit the debug version above, then copy any modifications to the method
582 # below while removing #--! DEBUG sections.
583 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
584
585
586 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
587 lookahead = None # Current lookahead symbol
588 lookaheadstack = [ ] # Stack of lookahead symbols
589 actions = self.action # Local reference to action table (to a void lookup on self.)
590 goto = self.goto # Local reference to goto table (to avo id lookup on self.)
591 prod = self.productions # Local reference to production list (t o avoid lookup on self.)
592 pslice = YaccProduction(None) # Production object passed to grammar r ules
593 errorcount = 0 # Used during error recovery
594
595 # If no lexer was given, we will try to use the lex module
596 if not lexer:
597 lex = load_ply_lex()
598 lexer = lex.lexer
599
600 # Set up the lexer and parser objects on pslice
601 pslice.lexer = lexer
602 pslice.parser = self
603
604 # If input was supplied, pass to lexer
605 if input is not None:
606 lexer.input(input)
607
608 if tokenfunc is None:
609 # Tokenize function
610 get_token = lexer.token
611 else:
612 get_token = tokenfunc
613
614 # Set up the state and symbol stacks
615
616 statestack = [ ] # Stack of parsing states
617 self.statestack = statestack
618 symstack = [ ] # Stack of grammar symbols
619 self.symstack = symstack
620
621 pslice.stack = symstack # Put in the production
622 errtoken = None # Err token
623
624 # The start state is assumed to be (0,$end)
625
626 statestack.append(0)
627 sym = YaccSymbol()
628 sym.type = '$end'
629 symstack.append(sym)
630 state = 0
631 while 1:
632 # Get the next symbol on the input. If a lookahead symbol
633 # is already set, we just use that. Otherwise, we'll pull
634 # the next token off of the lookaheadstack or from the lexer
635
636 if not lookahead:
637 if not lookaheadstack:
638 lookahead = get_token() # Get the next token
639 else:
640 lookahead = lookaheadstack.pop()
641 if not lookahead:
642 lookahead = YaccSymbol()
643 lookahead.type = '$end'
644
645 # Check the action table
646 ltype = lookahead.type
647 t = actions[state].get(ltype)
648
649 if t is not None:
650 if t > 0:
651 # shift a symbol on the stack
652 statestack.append(t)
653 state = t
654
655 symstack.append(lookahead)
656 lookahead = None
657
658 # Decrease error count on successful shift
659 if errorcount: errorcount -=1
660 continue
661
662 if t < 0:
663 # reduce a symbol on the stack, emit a production
664 p = prod[-t]
665 pname = p.name
666 plen = p.len
667
668 # Get production function
669 sym = YaccSymbol()
670 sym.type = pname # Production name
671 sym.value = None
672
673 if plen:
674 targ = symstack[-plen-1:]
675 targ[0] = sym
676
677 # --! TRACKING
678 if tracking:
679 t1 = targ[1]
680 sym.lineno = t1.lineno
681 sym.lexpos = t1.lexpos
682 t1 = targ[-1]
683 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
684 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
685
686 # --! TRACKING
687
688 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
689 # The code enclosed in this section is duplicated
690 # below as a performance optimization. Make sure
691 # changes get made in both locations.
692
693 pslice.slice = targ
694
695 try:
696 # Call the grammar rule with our special slice objec t
697 del symstack[-plen:]
698 del statestack[-plen:]
699 p.callable(pslice)
700 symstack.append(sym)
701 state = goto[statestack[-1]][pname]
702 statestack.append(state)
703 except SyntaxError:
704 # If an error was set. Enter error recovery state
705 lookaheadstack.append(lookahead)
706 symstack.pop()
707 statestack.pop()
708 state = statestack[-1]
709 sym.type = 'error'
710 lookahead = sym
711 errorcount = error_count
712 self.errorok = 0
713 continue
714 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
715
716 else:
717
718 # --! TRACKING
719 if tracking:
720 sym.lineno = lexer.lineno
721 sym.lexpos = lexer.lexpos
722 # --! TRACKING
723
724 targ = [ sym ]
725
726 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
727 # The code enclosed in this section is duplicated
728 # above as a performance optimization. Make sure
729 # changes get made in both locations.
730
731 pslice.slice = targ
732
733 try:
734 # Call the grammar rule with our special slice objec t
735 p.callable(pslice)
736 symstack.append(sym)
737 state = goto[statestack[-1]][pname]
738 statestack.append(state)
739 except SyntaxError:
740 # If an error was set. Enter error recovery state
741 lookaheadstack.append(lookahead)
742 symstack.pop()
743 statestack.pop()
744 state = statestack[-1]
745 sym.type = 'error'
746 lookahead = sym
747 errorcount = error_count
748 self.errorok = 0
749 continue
750 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
751
752 if t == 0:
753 n = symstack[-1]
754 return getattr(n,"value",None)
755
756 if t == None:
757
758 # We have some kind of parsing error here. To handle
759 # this, we are going to push the current token onto
760 # the tokenstack and replace it with an 'error' token.
761 # If there are any synchronization rules, they may
762 # catch it.
763 #
764 # In addition to pushing the error token, we call call
765 # the user defined p_error() function if this is the
766 # first syntax error. This function is only called if
767 # errorcount == 0.
768 if errorcount == 0 or self.errorok:
769 errorcount = error_count
770 self.errorok = 0
771 errtoken = lookahead
772 if errtoken.type == '$end':
773 errtoken = None # End of file!
774 if self.errorfunc:
775 global errok,token,restart
776 errok = self.errok # Set some special functions a vailable in error recovery
777 token = get_token
778 restart = self.restart
779 if errtoken and not hasattr(errtoken,'lexer'):
780 errtoken.lexer = lexer
781 tok = self.errorfunc(errtoken)
782 del errok, token, restart # Delete special functions
783
784 if self.errorok:
785 # User must have done some kind of panic
786 # mode recovery on their own. The
787 # returned token is the next lookahead
788 lookahead = tok
789 errtoken = None
790 continue
791 else:
792 if errtoken:
793 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno
794 else: lineno = 0
795 if lineno:
796 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
797 else:
798 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
799 else:
800 sys.stderr.write("yacc: Parse error in input. EOF\n" )
801 return
802
803 else:
804 errorcount = error_count
805
806 # case 1: the statestack only has 1 entry on it. If we're in t his state, the
807 # entire parse has been rolled back and we're completely hosed. The token is
808 # discarded and we just keep going.
809
810 if len(statestack) <= 1 and lookahead.type != '$end':
811 lookahead = None
812 errtoken = None
813 state = 0
814 # Nuke the pushback stack
815 del lookaheadstack[:]
816 continue
817
818 # case 2: the statestack has a couple of entries on it, but we'r e
819 # at the end of the file. nuke the top entry and generate an err or token
820
821 # Start nuking entries on the stack
822 if lookahead.type == '$end':
823 # Whoa. We're really hosed here. Bail out
824 return
825
826 if lookahead.type != 'error':
827 sym = symstack[-1]
828 if sym.type == 'error':
829 # Hmmm. Error is on top of stack, we'll just nuke input
830 # symbol and continue
831 lookahead = None
832 continue
833 t = YaccSymbol()
834 t.type = 'error'
835 if hasattr(lookahead,"lineno"):
836 t.lineno = lookahead.lineno
837 t.value = lookahead
838 lookaheadstack.append(lookahead)
839 lookahead = t
840 else:
841 symstack.pop()
842 statestack.pop()
843 state = statestack[-1] # Potential bug fix
844
845 continue
846
847 # Call an error function here
848 raise RuntimeError("yacc: internal parser error!!!\n")
849
850 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
851 # parseopt_notrack().
852 #
853 # Optimized version of parseopt() with line number tracking removed.
854 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
855 # code in the #--! TRACKING sections
856 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!
857
858 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc =None):
859 lookahead = None # Current lookahead symbol
860 lookaheadstack = [ ] # Stack of lookahead symbols
861 actions = self.action # Local reference to action table (to a void lookup on self.)
862 goto = self.goto # Local reference to goto table (to avo id lookup on self.)
863 prod = self.productions # Local reference to production list (t o avoid lookup on self.)
864 pslice = YaccProduction(None) # Production object passed to grammar r ules
865 errorcount = 0 # Used during error recovery
866
867 # If no lexer was given, we will try to use the lex module
868 if not lexer:
869 lex = load_ply_lex()
870 lexer = lex.lexer
871
872 # Set up the lexer and parser objects on pslice
873 pslice.lexer = lexer
874 pslice.parser = self
875
876 # If input was supplied, pass to lexer
877 if input is not None:
878 lexer.input(input)
879
880 if tokenfunc is None:
881 # Tokenize function
882 get_token = lexer.token
883 else:
884 get_token = tokenfunc
885
886 # Set up the state and symbol stacks
887
888 statestack = [ ] # Stack of parsing states
889 self.statestack = statestack
890 symstack = [ ] # Stack of grammar symbols
891 self.symstack = symstack
892
893 pslice.stack = symstack # Put in the production
894 errtoken = None # Err token
895
896 # The start state is assumed to be (0,$end)
897
898 statestack.append(0)
899 sym = YaccSymbol()
900 sym.type = '$end'
901 symstack.append(sym)
902 state = 0
903 while 1:
904 # Get the next symbol on the input. If a lookahead symbol
905 # is already set, we just use that. Otherwise, we'll pull
906 # the next token off of the lookaheadstack or from the lexer
907
908 if not lookahead:
909 if not lookaheadstack:
910 lookahead = get_token() # Get the next token
911 else:
912 lookahead = lookaheadstack.pop()
913 if not lookahead:
914 lookahead = YaccSymbol()
915 lookahead.type = '$end'
916
917 # Check the action table
918 ltype = lookahead.type
919 t = actions[state].get(ltype)
920
921 if t is not None:
922 if t > 0:
923 # shift a symbol on the stack
924 statestack.append(t)
925 state = t
926
927 symstack.append(lookahead)
928 lookahead = None
929
930 # Decrease error count on successful shift
931 if errorcount: errorcount -=1
932 continue
933
934 if t < 0:
935 # reduce a symbol on the stack, emit a production
936 p = prod[-t]
937 pname = p.name
938 plen = p.len
939
940 # Get production function
941 sym = YaccSymbol()
942 sym.type = pname # Production name
943 sym.value = None
944
945 if plen:
946 targ = symstack[-plen-1:]
947 targ[0] = sym
948
949 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
950 # The code enclosed in this section is duplicated
951 # below as a performance optimization. Make sure
952 # changes get made in both locations.
953
954 pslice.slice = targ
955
956 try:
957 # Call the grammar rule with our special slice objec t
958 del symstack[-plen:]
959 del statestack[-plen:]
960 p.callable(pslice)
961 symstack.append(sym)
962 state = goto[statestack[-1]][pname]
963 statestack.append(state)
964 except SyntaxError:
965 # If an error was set. Enter error recovery state
966 lookaheadstack.append(lookahead)
967 symstack.pop()
968 statestack.pop()
969 state = statestack[-1]
970 sym.type = 'error'
971 lookahead = sym
972 errorcount = error_count
973 self.errorok = 0
974 continue
975 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
976
977 else:
978
979 targ = [ sym ]
980
981 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
982 # The code enclosed in this section is duplicated
983 # above as a performance optimization. Make sure
984 # changes get made in both locations.
985
986 pslice.slice = targ
987
988 try:
989 # Call the grammar rule with our special slice objec t
990 p.callable(pslice)
991 symstack.append(sym)
992 state = goto[statestack[-1]][pname]
993 statestack.append(state)
994 except SyntaxError:
995 # If an error was set. Enter error recovery state
996 lookaheadstack.append(lookahead)
997 symstack.pop()
998 statestack.pop()
999 state = statestack[-1]
1000 sym.type = 'error'
1001 lookahead = sym
1002 errorcount = error_count
1003 self.errorok = 0
1004 continue
1005 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1006
1007 if t == 0:
1008 n = symstack[-1]
1009 return getattr(n,"value",None)
1010
1011 if t == None:
1012
1013 # We have some kind of parsing error here. To handle
1014 # this, we are going to push the current token onto
1015 # the tokenstack and replace it with an 'error' token.
1016 # If there are any synchronization rules, they may
1017 # catch it.
1018 #
1019 # In addition to pushing the error token, we call call
1020 # the user defined p_error() function if this is the
1021 # first syntax error. This function is only called if
1022 # errorcount == 0.
1023 if errorcount == 0 or self.errorok:
1024 errorcount = error_count
1025 self.errorok = 0
1026 errtoken = lookahead
1027 if errtoken.type == '$end':
1028 errtoken = None # End of file!
1029 if self.errorfunc:
1030 global errok,token,restart
1031 errok = self.errok # Set some special functions a vailable in error recovery
1032 token = get_token
1033 restart = self.restart
1034 if errtoken and not hasattr(errtoken,'lexer'):
1035 errtoken.lexer = lexer
1036 tok = self.errorfunc(errtoken)
1037 del errok, token, restart # Delete special functions
1038
1039 if self.errorok:
1040 # User must have done some kind of panic
1041 # mode recovery on their own. The
1042 # returned token is the next lookahead
1043 lookahead = tok
1044 errtoken = None
1045 continue
1046 else:
1047 if errtoken:
1048 if hasattr(errtoken,"lineno"): lineno = lookahead.li neno
1049 else: lineno = 0
1050 if lineno:
1051 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1052 else:
1053 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1054 else:
1055 sys.stderr.write("yacc: Parse error in input. EOF\n" )
1056 return
1057
1058 else:
1059 errorcount = error_count
1060
1061 # case 1: the statestack only has 1 entry on it. If we're in t his state, the
1062 # entire parse has been rolled back and we're completely hosed. The token is
1063 # discarded and we just keep going.
1064
1065 if len(statestack) <= 1 and lookahead.type != '$end':
1066 lookahead = None
1067 errtoken = None
1068 state = 0
1069 # Nuke the pushback stack
1070 del lookaheadstack[:]
1071 continue
1072
1073 # case 2: the statestack has a couple of entries on it, but we'r e
1074 # at the end of the file. nuke the top entry and generate an err or token
1075
1076 # Start nuking entries on the stack
1077 if lookahead.type == '$end':
1078 # Whoa. We're really hosed here. Bail out
1079 return
1080
1081 if lookahead.type != 'error':
1082 sym = symstack[-1]
1083 if sym.type == 'error':
1084 # Hmmm. Error is on top of stack, we'll just nuke input
1085 # symbol and continue
1086 lookahead = None
1087 continue
1088 t = YaccSymbol()
1089 t.type = 'error'
1090 if hasattr(lookahead,"lineno"):
1091 t.lineno = lookahead.lineno
1092 t.value = lookahead
1093 lookaheadstack.append(lookahead)
1094 lookahead = t
1095 else:
1096 symstack.pop()
1097 statestack.pop()
1098 state = statestack[-1] # Potential bug fix
1099
1100 continue
1101
1102 # Call an error function here
1103 raise RuntimeError("yacc: internal parser error!!!\n")
1104
1105 # -----------------------------------------------------------------------------
1106 # === Grammar Representation ===
1107 #
1108 # The following functions, classes, and variables are used to represent and
1109 # manipulate the rules that make up a grammar.
1110 # -----------------------------------------------------------------------------
1111
1112 import re
1113
1114 # regex matching identifiers
1115 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1116
1117 # -----------------------------------------------------------------------------
1118 # class Production:
1119 #
1120 # This class stores the raw information about a single production or grammar rul e.
1121 # A grammar rule refers to a specification such as this:
1122 #
1123 # expr : expr PLUS term
1124 #
1125 # Here are the basic attributes defined on all productions
1126 #
1127 # name - Name of the production. For example 'expr'
1128 # prod - A list of symbols on the right side ['expr','PLUS','term']
1129 # prec - Production precedence level
1130 # number - Production number.
1131 # func - Function that executes on reduce
1132 # file - File where production function is defined
1133 # lineno - Line number where production function is defined
1134 #
1135 # The following attributes are defined or optional.
1136 #
1137 # len - Length of the production (number of symbols on right hand si de)
1138 # usyms - Set of unique symbols found in the production
1139 # -----------------------------------------------------------------------------
1140
1141 class Production(object):
1142 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='', line=0):
1143 self.name = name
1144 self.prod = tuple(prod)
1145 self.number = number
1146 self.func = func
1147 self.callable = None
1148 self.file = file
1149 self.line = line
1150 self.prec = precedence
1151
1152 # Internal settings used during table construction
1153
1154 self.len = len(self.prod) # Length of the production
1155
1156 # Create a list of unique production symbols used in the production
1157 self.usyms = [ ]
1158 for s in self.prod:
1159 if s not in self.usyms:
1160 self.usyms.append(s)
1161
1162 # List of all LR items for the production
1163 self.lr_items = []
1164 self.lr_next = None
1165
1166 # Create a string representation
1167 if self.prod:
1168 self.str = "%s -> %s" % (self.name," ".join(self.prod))
1169 else:
1170 self.str = "%s -> <empty>" % self.name
1171
1172 def __str__(self):
1173 return self.str
1174
1175 def __repr__(self):
1176 return "Production("+str(self)+")"
1177
1178 def __len__(self):
1179 return len(self.prod)
1180
1181 def __nonzero__(self):
1182 return 1
1183
1184 def __getitem__(self,index):
1185 return self.prod[index]
1186
1187 # Return the nth lr_item from the production (or None if at the end)
1188 def lr_item(self,n):
1189 if n > len(self.prod): return None
1190 p = LRItem(self,n)
1191
1192 # Precompute the list of productions immediately following. Hack. Remov e later
1193 try:
1194 p.lr_after = Prodnames[p.prod[n+1]]
1195 except (IndexError,KeyError):
1196 p.lr_after = []
1197 try:
1198 p.lr_before = p.prod[n-1]
1199 except IndexError:
1200 p.lr_before = None
1201
1202 return p
1203
1204 # Bind the production function name to a callable
1205 def bind(self,pdict):
1206 if self.func:
1207 self.callable = pdict[self.func]
1208
1209 # This class serves as a minimal standin for Production objects when
1210 # reading table data from files. It only contains information
1211 # actually used by the LR parsing engine, plus some additional
1212 # debugging information.
1213 class MiniProduction(object):
1214 def __init__(self,str,name,len,func,file,line):
1215 self.name = name
1216 self.len = len
1217 self.func = func
1218 self.callable = None
1219 self.file = file
1220 self.line = line
1221 self.str = str
1222 def __str__(self):
1223 return self.str
1224 def __repr__(self):
1225 return "MiniProduction(%s)" % self.str
1226
1227 # Bind the production function name to a callable
1228 def bind(self,pdict):
1229 if self.func:
1230 self.callable = pdict[self.func]
1231
1232
1233 # -----------------------------------------------------------------------------
1234 # class LRItem
1235 #
1236 # This class represents a specific stage of parsing a production rule. For
1237 # example:
1238 #
1239 # expr : expr . PLUS term
1240 #
1241 # In the above, the "." represents the current location of the parse. Here
1242 # basic attributes:
1243 #
1244 # name - Name of the production. For example 'expr'
1245 # prod - A list of symbols on the right side ['expr','.', 'PLUS','te rm']
1246 # number - Production number.
1247 #
1248 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term '
1249 # then lr_next refers to 'expr -> expr PLUS . term'
1250 # lr_index - LR item index (location of the ".") in the prod list.
1251 # lookaheads - LALR lookahead symbols for this item
1252 # len - Length of the production (number of symbols on right hand s ide)
1253 # lr_after - List of all productions that immediately follow
1254 # lr_before - Grammar symbol immediately before
1255 # -----------------------------------------------------------------------------
1256
1257 class LRItem(object):
1258 def __init__(self,p,n):
1259 self.name = p.name
1260 self.prod = list(p.prod)
1261 self.number = p.number
1262 self.lr_index = n
1263 self.lookaheads = { }
1264 self.prod.insert(n,".")
1265 self.prod = tuple(self.prod)
1266 self.len = len(self.prod)
1267 self.usyms = p.usyms
1268
1269 def __str__(self):
1270 if self.prod:
1271 s = "%s -> %s" % (self.name," ".join(self.prod))
1272 else:
1273 s = "%s -> <empty>" % self.name
1274 return s
1275
1276 def __repr__(self):
1277 return "LRItem("+str(self)+")"
1278
1279 def __len__(self):
1280 return len(self.prod)
1281
1282 def __getitem__(self,index):
1283 return self.prod[index]
1284
1285 # -----------------------------------------------------------------------------
1286 # rightmost_terminal()
1287 #
1288 # Return the rightmost terminal from a list of symbols. Used in add_production( )
1289 # -----------------------------------------------------------------------------
1290 def rightmost_terminal(symbols, terminals):
1291 i = len(symbols) - 1
1292 while i >= 0:
1293 if symbols[i] in terminals:
1294 return symbols[i]
1295 i -= 1
1296 return None
1297
1298 # -----------------------------------------------------------------------------
1299 # === GRAMMAR CLASS ===
1300 #
1301 # The following class represents the contents of the specified grammar along
1302 # with various computed properties such as first sets, follow sets, LR items, et c.
1303 # This data is used for critical parts of the table generation process later.
1304 # -----------------------------------------------------------------------------
1305
1306 class GrammarError(YaccError): pass
1307
1308 class Grammar(object):
1309 def __init__(self,terminals):
1310 self.Productions = [None] # A list of all of the productions. The fir st
1311 # entry is always reserved for the purpose o f
1312 # building an augmented grammar
1313
1314 self.Prodnames = { } # A dictionary mapping the names of nontermi nals to a list of all
1315 # productions of that nonterminal.
1316
1317 self.Prodmap = { } # A dictionary that is only used to detect d uplicate
1318 # productions.
1319
1320 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
1321 # list of the rules where they are used.
1322
1323 for term in terminals:
1324 self.Terminals[term] = []
1325
1326 self.Terminals['error'] = []
1327
1328 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
1329 # of rule numbers where they are used.
1330
1331 self.First = { } # A dictionary of precomputed FIRST(x) symbo ls
1332
1333 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symb ols
1334
1335 self.Precedence = { } # Precedence rules for each terminal. Contai ns tuples of the
1336 # form ('right',level) or ('nonassoc', level ) or ('left',level)
1337
1338 self.UsedPrecedence = { } # Precedence rules that were actually used b y the grammer.
1339 # This is only used to provide error checkin g and to generate
1340 # a warning about unused precedence rules.
1341
1342 self.Start = None # Starting symbol for the grammar
1343
1344
1345 def __len__(self):
1346 return len(self.Productions)
1347
1348 def __getitem__(self,index):
1349 return self.Productions[index]
1350
1351 # -------------------------------------------------------------------------- ---
1352 # set_precedence()
1353 #
1354 # Sets the precedence for a given terminal. assoc is the associativity such as
1355 # 'left','right', or 'nonassoc'. level is a numeric level.
1356 #
1357 # -------------------------------------------------------------------------- ---
1358
1359 def set_precedence(self,term,assoc,level):
1360 assert self.Productions == [None],"Must call set_precedence() before add _production()"
1361 if term in self.Precedence:
1362 raise GrammarError("Precedence already specified for terminal '%s'" % term)
1363 if assoc not in ['left','right','nonassoc']:
1364 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1365 self.Precedence[term] = (assoc,level)
1366
1367 # -------------------------------------------------------------------------- ---
1368 # add_production()
1369 #
1370 # Given an action function, this function assembles a production rule and
1371 # computes its precedence level.
1372 #
1373 # The production rule is supplied as a list of symbols. For example,
1374 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1375 # symbols ['expr','PLUS','term'].
1376 #
1377 # Precedence is determined by the precedence of the right-most non-terminal
1378 # or the precedence of a terminal specified by %prec.
1379 #
1380 # A variety of error checks are performed to make sure production symbols
1381 # are valid and that %prec is used correctly.
1382 # -------------------------------------------------------------------------- ---
1383
1384 def add_production(self,prodname,syms,func=None,file='',line=0):
1385
1386 if prodname in self.Terminals:
1387 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined a s a token" % (file,line,prodname))
1388 if prodname == 'error':
1389 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserv ed word" % (file,line,prodname))
1390 if not _is_identifier.match(prodname):
1391 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prod name))
1392
1393 # Look for literal tokens
1394 for n,s in enumerate(syms):
1395 if s[0] in "'\"":
1396 try:
1397 c = eval(s)
1398 if (len(c) > 1):
1399 raise GrammarError("%s:%d: Literal token %s in rule '% s' may only be a single character" % (file,line,s, prodname))
1400 if not c in self.Terminals:
1401 self.Terminals[c] = []
1402 syms[n] = c
1403 continue
1404 except SyntaxError:
1405 pass
1406 if not _is_identifier.match(s) and s != '%prec':
1407 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (fi le,line,s, prodname))
1408
1409 # Determine the precedence level
1410 if '%prec' in syms:
1411 if syms[-1] == '%prec':
1412 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1413 if syms[-2] != '%prec':
1414 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1415 precname = syms[-1]
1416 prodprec = self.Precedence.get(precname,None)
1417 if not prodprec:
1418 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1419 else:
1420 self.UsedPrecedence[precname] = 1
1421 del syms[-2:] # Drop %prec from the rule
1422 else:
1423 # If no %prec, precedence is determined by the rightmost terminal sy mbol
1424 precname = rightmost_terminal(syms,self.Terminals)
1425 prodprec = self.Precedence.get(precname,('right',0))
1426
1427 # See if the rule is already in the rulemap
1428 map = "%s -> %s" % (prodname,syms)
1429 if map in self.Prodmap:
1430 m = self.Prodmap[map]
1431 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1432 "Previous definition at %s:%d" % (file,line, m.fi le, m.line))
1433
1434 # From this point on, everything is valid. Create a new Production inst ance
1435 pnumber = len(self.Productions)
1436 if not prodname in self.Nonterminals:
1437 self.Nonterminals[prodname] = [ ]
1438
1439 # Add the production number to Terminals and Nonterminals
1440 for t in syms:
1441 if t in self.Terminals:
1442 self.Terminals[t].append(pnumber)
1443 else:
1444 if not t in self.Nonterminals:
1445 self.Nonterminals[t] = [ ]
1446 self.Nonterminals[t].append(pnumber)
1447
1448 # Create a production and add it to the list of productions
1449 p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1450 self.Productions.append(p)
1451 self.Prodmap[map] = p
1452
1453 # Add to the global productions list
1454 try:
1455 self.Prodnames[prodname].append(p)
1456 except KeyError:
1457 self.Prodnames[prodname] = [ p ]
1458 return 0
1459
1460 # -------------------------------------------------------------------------- ---
1461 # set_start()
1462 #
1463 # Sets the starting symbol and creates the augmented grammar. Production
1464 # rule 0 is S' -> start where start is the start symbol.
1465 # -------------------------------------------------------------------------- ---
1466
1467 def set_start(self,start=None):
1468 if not start:
1469 start = self.Productions[1].name
1470 if start not in self.Nonterminals:
1471 raise GrammarError("start symbol %s undefined" % start)
1472 self.Productions[0] = Production(0,"S'",[start])
1473 self.Nonterminals[start].append(0)
1474 self.Start = start
1475
1476 # -------------------------------------------------------------------------- ---
1477 # find_unreachable()
1478 #
1479 # Find all of the nonterminal symbols that can't be reached from the startin g
1480 # symbol. Returns a list of nonterminals that can't be reached.
1481 # -------------------------------------------------------------------------- ---
1482
1483 def find_unreachable(self):
1484
1485 # Mark all symbols that are reachable from a symbol s
1486 def mark_reachable_from(s):
1487 if reachable[s]:
1488 # We've already reached symbol s.
1489 return
1490 reachable[s] = 1
1491 for p in self.Prodnames.get(s,[]):
1492 for r in p.prod:
1493 mark_reachable_from(r)
1494
1495 reachable = { }
1496 for s in list(self.Terminals) + list(self.Nonterminals):
1497 reachable[s] = 0
1498
1499 mark_reachable_from( self.Productions[0].prod[0] )
1500
1501 return [s for s in list(self.Nonterminals)
1502 if not reachable[s]]
1503
1504 # -------------------------------------------------------------------------- ---
1505 # infinite_cycles()
1506 #
1507 # This function looks at the various parsing rules and tries to detect
1508 # infinite recursion cycles (grammar rules where there is no possible way
1509 # to derive a string of only terminals).
1510 # -------------------------------------------------------------------------- ---
1511
1512 def infinite_cycles(self):
1513 terminates = {}
1514
1515 # Terminals:
1516 for t in self.Terminals:
1517 terminates[t] = 1
1518
1519 terminates['$end'] = 1
1520
1521 # Nonterminals:
1522
1523 # Initialize to false:
1524 for n in self.Nonterminals:
1525 terminates[n] = 0
1526
1527 # Then propagate termination until no change:
1528 while 1:
1529 some_change = 0
1530 for (n,pl) in self.Prodnames.items():
1531 # Nonterminal n terminates iff any of its productions terminates .
1532 for p in pl:
1533 # Production p terminates iff all of its rhs symbols termina te.
1534 for s in p.prod:
1535 if not terminates[s]:
1536 # The symbol s does not terminate,
1537 # so production p does not terminate.
1538 p_terminates = 0
1539 break
1540 else:
1541 # didn't break from the loop,
1542 # so every symbol s terminates
1543 # so production p terminates.
1544 p_terminates = 1
1545
1546 if p_terminates:
1547 # symbol n terminates!
1548 if not terminates[n]:
1549 terminates[n] = 1
1550 some_change = 1
1551 # Don't need to consider any more productions for this n .
1552 break
1553
1554 if not some_change:
1555 break
1556
1557 infinite = []
1558 for (s,term) in terminates.items():
1559 if not term:
1560 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1561 # s is used-but-not-defined, and we've already warned of tha t,
1562 # so it would be overkill to say that it's also non-terminat ing.
1563 pass
1564 else:
1565 infinite.append(s)
1566
1567 return infinite
1568
1569
1570 # -------------------------------------------------------------------------- ---
1571 # undefined_symbols()
1572 #
1573 # Find all symbols that were used the grammar, but not defined as tokens or
1574 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symb ol
1575 # and prod is the production where the symbol was used.
1576 # -------------------------------------------------------------------------- ---
1577 def undefined_symbols(self):
1578 result = []
1579 for p in self.Productions:
1580 if not p: continue
1581
1582 for s in p.prod:
1583 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1584 result.append((s,p))
1585 return result
1586
1587 # -------------------------------------------------------------------------- ---
1588 # unused_terminals()
1589 #
1590 # Find all terminals that were defined, but not used by the grammar. Return s
1591 # a list of all symbols.
1592 # -------------------------------------------------------------------------- ---
1593 def unused_terminals(self):
1594 unused_tok = []
1595 for s,v in self.Terminals.items():
1596 if s != 'error' and not v:
1597 unused_tok.append(s)
1598
1599 return unused_tok
1600
1601 # -------------------------------------------------------------------------- ----
1602 # unused_rules()
1603 #
1604 # Find all grammar rules that were defined, but not used (maybe not reachab le)
1605 # Returns a list of productions.
1606 # -------------------------------------------------------------------------- ----
1607
1608 def unused_rules(self):
1609 unused_prod = []
1610 for s,v in self.Nonterminals.items():
1611 if not v:
1612 p = self.Prodnames[s][0]
1613 unused_prod.append(p)
1614 return unused_prod
1615
1616 # -------------------------------------------------------------------------- ---
1617 # unused_precedence()
1618 #
1619 # Returns a list of tuples (term,precedence) corresponding to precedence
1620 # rules that were never used by the grammar. term is the name of the termin al
1621 # on which precedence was applied and precedence is a string such as 'left' or
1622 # 'right' corresponding to the type of precedence.
1623 # -------------------------------------------------------------------------- ---
1624
1625 def unused_precedence(self):
1626 unused = []
1627 for termname in self.Precedence:
1628 if not (termname in self.Terminals or termname in self.UsedPrecedenc e):
1629 unused.append((termname,self.Precedence[termname][0]))
1630
1631 return unused
1632
1633 # -------------------------------------------------------------------------
1634 # _first()
1635 #
1636 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1637 #
1638 # During execution of compute_first1, the result may be incomplete.
1639 # Afterward (e.g., when called from compute_follow()), it will be complete.
1640 # -------------------------------------------------------------------------
1641 def _first(self,beta):
1642
1643 # We are computing First(x1,x2,x3,...,xn)
1644 result = [ ]
1645 for x in beta:
1646 x_produces_empty = 0
1647
1648 # Add all the non-<empty> symbols of First[x] to the result.
1649 for f in self.First[x]:
1650 if f == '<empty>':
1651 x_produces_empty = 1
1652 else:
1653 if f not in result: result.append(f)
1654
1655 if x_produces_empty:
1656 # We have to consider the next x in beta,
1657 # i.e. stay in the loop.
1658 pass
1659 else:
1660 # We don't have to consider any further symbols in beta.
1661 break
1662 else:
1663 # There was no 'break' from the loop,
1664 # so x_produces_empty was true for all x in beta,
1665 # so beta produces empty as well.
1666 result.append('<empty>')
1667
1668 return result
1669
1670 # -------------------------------------------------------------------------
1671 # compute_first()
1672 #
1673 # Compute the value of FIRST1(X) for all symbols
1674 # -------------------------------------------------------------------------
1675 def compute_first(self):
1676 if self.First:
1677 return self.First
1678
1679 # Terminals:
1680 for t in self.Terminals:
1681 self.First[t] = [t]
1682
1683 self.First['$end'] = ['$end']
1684
1685 # Nonterminals:
1686
1687 # Initialize to the empty set:
1688 for n in self.Nonterminals:
1689 self.First[n] = []
1690
1691 # Then propagate symbols until no change:
1692 while 1:
1693 some_change = 0
1694 for n in self.Nonterminals:
1695 for p in self.Prodnames[n]:
1696 for f in self._first(p.prod):
1697 if f not in self.First[n]:
1698 self.First[n].append( f )
1699 some_change = 1
1700 if not some_change:
1701 break
1702
1703 return self.First
1704
1705 # ---------------------------------------------------------------------
1706 # compute_follow()
1707 #
1708 # Computes all of the follow sets for every non-terminal symbol. The
1709 # follow set is the set of all symbols that might follow a given
1710 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1711 # ---------------------------------------------------------------------
1712 def compute_follow(self,start=None):
1713 # If already computed, return the result
1714 if self.Follow:
1715 return self.Follow
1716
1717 # If first sets not computed yet, do that first.
1718 if not self.First:
1719 self.compute_first()
1720
1721 # Add '$end' to the follow list of the start symbol
1722 for k in self.Nonterminals:
1723 self.Follow[k] = [ ]
1724
1725 if not start:
1726 start = self.Productions[1].name
1727
1728 self.Follow[start] = [ '$end' ]
1729
1730 while 1:
1731 didadd = 0
1732 for p in self.Productions[1:]:
1733 # Here is the production set
1734 for i in range(len(p.prod)):
1735 B = p.prod[i]
1736 if B in self.Nonterminals:
1737 # Okay. We got a non-terminal in a production
1738 fst = self._first(p.prod[i+1:])
1739 hasempty = 0
1740 for f in fst:
1741 if f != '<empty>' and f not in self.Follow[B]:
1742 self.Follow[B].append(f)
1743 didadd = 1
1744 if f == '<empty>':
1745 hasempty = 1
1746 if hasempty or i == (len(p.prod)-1):
1747 # Add elements of follow(a) to follow(b)
1748 for f in self.Follow[p.name]:
1749 if f not in self.Follow[B]:
1750 self.Follow[B].append(f)
1751 didadd = 1
1752 if not didadd: break
1753 return self.Follow
1754
1755
1756 # -------------------------------------------------------------------------- ---
1757 # build_lritems()
1758 #
1759 # This function walks the list of productions and builds a complete set of t he
1760 # LR items. The LR items are stored in two ways: First, they are uniquely
1761 # numbered and placed in the list _lritems. Second, a linked list of LR ite ms
1762 # is built for each production. For example:
1763 #
1764 # E -> E PLUS E
1765 #
1766 # Creates the list
1767 #
1768 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1769 # -------------------------------------------------------------------------- ---
1770
1771 def build_lritems(self):
1772 for p in self.Productions:
1773 lastlri = p
1774 i = 0
1775 lr_items = []
1776 while 1:
1777 if i > len(p):
1778 lri = None
1779 else:
1780 lri = LRItem(p,i)
1781 # Precompute the list of productions immediately following
1782 try:
1783 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1784 except (IndexError,KeyError):
1785 lri.lr_after = []
1786 try:
1787 lri.lr_before = lri.prod[i-1]
1788 except IndexError:
1789 lri.lr_before = None
1790
1791 lastlri.lr_next = lri
1792 if not lri: break
1793 lr_items.append(lri)
1794 lastlri = lri
1795 i += 1
1796 p.lr_items = lr_items
1797
1798 # -----------------------------------------------------------------------------
1799 # == Class LRTable ==
1800 #
1801 # This basic class represents a basic table of LR parsing information.
1802 # Methods for generating the tables are not defined here. They are defined
1803 # in the derived class LRGeneratedTable.
1804 # -----------------------------------------------------------------------------
1805
1806 class VersionError(YaccError): pass
1807
1808 class LRTable(object):
1809 def __init__(self):
1810 self.lr_action = None
1811 self.lr_goto = None
1812 self.lr_productions = None
1813 self.lr_method = None
1814
1815 def read_table(self,module):
1816 if isinstance(module,types.ModuleType):
1817 parsetab = module
1818 else:
1819 if sys.version_info[0] < 3:
1820 exec("import %s as parsetab" % module)
1821 else:
1822 env = { }
1823 exec("import %s as parsetab" % module, env, env)
1824 parsetab = env['parsetab']
1825
1826 if parsetab._tabversion != __tabversion__:
1827 raise VersionError("yacc table file version is out of date")
1828
1829 self.lr_action = parsetab._lr_action
1830 self.lr_goto = parsetab._lr_goto
1831
1832 self.lr_productions = []
1833 for p in parsetab._lr_productions:
1834 self.lr_productions.append(MiniProduction(*p))
1835
1836 self.lr_method = parsetab._lr_method
1837 return parsetab._lr_signature
1838
1839 # Bind all production function names to callable objects in pdict
1840 def bind_callables(self,pdict):
1841 for p in self.lr_productions:
1842 p.bind(pdict)
1843
1844 # -----------------------------------------------------------------------------
1845 # === LR Generator ===
1846 #
1847 # The following classes and functions are used to generate LR parsing tables on
1848 # a grammar.
1849 # -----------------------------------------------------------------------------
1850
1851 # -----------------------------------------------------------------------------
1852 # digraph()
1853 # traverse()
1854 #
1855 # The following two functions are used to compute set valued functions
1856 # of the form:
1857 #
1858 # F(x) = F'(x) U U{F(y) | x R y}
1859 #
1860 # This is used to compute the values of Read() sets as well as FOLLOW sets
1861 # in LALR(1) generation.
1862 #
1863 # Inputs: X - An input set
1864 # R - A relation
1865 # FP - Set-valued function
1866 # ------------------------------------------------------------------------------
1867
1868 def digraph(X,R,FP):
1869 N = { }
1870 for x in X:
1871 N[x] = 0
1872 stack = []
1873 F = { }
1874 for x in X:
1875 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1876 return F
1877
1878 def traverse(x,N,stack,F,X,R,FP):
1879 stack.append(x)
1880 d = len(stack)
1881 N[x] = d
1882 F[x] = FP(x) # F(X) <- F'(x)
1883
1884 rel = R(x) # Get y's related to x
1885 for y in rel:
1886 if N[y] == 0:
1887 traverse(y,N,stack,F,X,R,FP)
1888 N[x] = min(N[x],N[y])
1889 for a in F.get(y,[]):
1890 if a not in F[x]: F[x].append(a)
1891 if N[x] == d:
1892 N[stack[-1]] = MAXINT
1893 F[stack[-1]] = F[x]
1894 element = stack.pop()
1895 while element != x:
1896 N[stack[-1]] = MAXINT
1897 F[stack[-1]] = F[x]
1898 element = stack.pop()
1899
1900 class LALRError(YaccError): pass
1901
1902 # -----------------------------------------------------------------------------
1903 # == LRGeneratedTable ==
1904 #
1905 # This class implements the LR table generation algorithm. There are no
1906 # public methods except for write()
1907 # -----------------------------------------------------------------------------
1908
1909 class LRGeneratedTable(LRTable):
1910 def __init__(self,grammar,method='LALR',log=None):
1911 if method not in ['SLR','LALR']:
1912 raise LALRError("Unsupported method %s" % method)
1913
1914 self.grammar = grammar
1915 self.lr_method = method
1916
1917 # Set up the logger
1918 if not log:
1919 log = NullLogger()
1920 self.log = log
1921
1922 # Internal attributes
1923 self.lr_action = {} # Action table
1924 self.lr_goto = {} # Goto table
1925 self.lr_productions = grammar.Productions # Copy of grammar Producti on array
1926 self.lr_goto_cache = {} # Cache of computed gotos
1927 self.lr0_cidhash = {} # Cache of closures
1928
1929 self._add_count = 0 # Internal counter used to detect cycles
1930
1931 # Diagonistic information filled in by the table generator
1932 self.sr_conflict = 0
1933 self.rr_conflict = 0
1934 self.conflicts = [] # List of conflicts
1935
1936 self.sr_conflicts = []
1937 self.rr_conflicts = []
1938
1939 # Build the tables
1940 self.grammar.build_lritems()
1941 self.grammar.compute_first()
1942 self.grammar.compute_follow()
1943 self.lr_parse_table()
1944
1945 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1946
1947 def lr0_closure(self,I):
1948 self._add_count += 1
1949
1950 # Add everything in I to J
1951 J = I[:]
1952 didadd = 1
1953 while didadd:
1954 didadd = 0
1955 for j in J:
1956 for x in j.lr_after:
1957 if getattr(x,"lr0_added",0) == self._add_count: continue
1958 # Add B --> .G to J
1959 J.append(x.lr_next)
1960 x.lr0_added = self._add_count
1961 didadd = 1
1962
1963 return J
1964
1965 # Compute the LR(0) goto function goto(I,X) where I is a set
1966 # of LR(0) items and X is a grammar symbol. This function is written
1967 # in a way that guarantees uniqueness of the generated goto sets
1968 # (i.e. the same goto set will never be returned as two different Python
1969 # objects). With uniqueness, we can later do fast set comparisons using
1970 # id(obj) instead of element-wise comparison.
1971
1972 def lr0_goto(self,I,x):
1973 # First we look for a previously cached entry
1974 g = self.lr_goto_cache.get((id(I),x),None)
1975 if g: return g
1976
1977 # Now we generate the goto set in a way that guarantees uniqueness
1978 # of the result
1979
1980 s = self.lr_goto_cache.get(x,None)
1981 if not s:
1982 s = { }
1983 self.lr_goto_cache[x] = s
1984
1985 gs = [ ]
1986 for p in I:
1987 n = p.lr_next
1988 if n and n.lr_before == x:
1989 s1 = s.get(id(n),None)
1990 if not s1:
1991 s1 = { }
1992 s[id(n)] = s1
1993 gs.append(n)
1994 s = s1
1995 g = s.get('$end',None)
1996 if not g:
1997 if gs:
1998 g = self.lr0_closure(gs)
1999 s['$end'] = g
2000 else:
2001 s['$end'] = gs
2002 self.lr_goto_cache[(id(I),x)] = g
2003 return g
2004
2005 # Compute the LR(0) sets of item function
2006 def lr0_items(self):
2007
2008 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2009 i = 0
2010 for I in C:
2011 self.lr0_cidhash[id(I)] = i
2012 i += 1
2013
2014 # Loop over the items in C and each grammar symbols
2015 i = 0
2016 while i < len(C):
2017 I = C[i]
2018 i += 1
2019
2020 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2021 asyms = { }
2022 for ii in I:
2023 for s in ii.usyms:
2024 asyms[s] = None
2025
2026 for x in asyms:
2027 g = self.lr0_goto(I,x)
2028 if not g: continue
2029 if id(g) in self.lr0_cidhash: continue
2030 self.lr0_cidhash[id(g)] = len(C)
2031 C.append(g)
2032
2033 return C
2034
2035 # -------------------------------------------------------------------------- ---
2036 # ==== LALR(1) Parsing ====
2037 #
2038 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2039 # relying upon Follow() sets when performing reductions, a more selective
2040 # lookahead set that incorporates the state of the LR(0) machine is utilized .
2041 # Thus, we mainly just have to focus on calculating the lookahead sets.
2042 #
2043 # The method used here is due to DeRemer and Pennelo (1982).
2044 #
2045 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2046 # Lookahead Sets", ACM Transactions on Programming Languages and Systems ,
2047 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2048 #
2049 # Further details can also be found in:
2050 #
2051 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing ",
2052 # McGraw-Hill Book Company, (1985).
2053 #
2054 # -------------------------------------------------------------------------- ---
2055
2056 # -------------------------------------------------------------------------- ---
2057 # compute_nullable_nonterminals()
2058 #
2059 # Creates a dictionary containing all of the non-terminals that might produc e
2060 # an empty production.
2061 # -------------------------------------------------------------------------- ---
2062
2063 def compute_nullable_nonterminals(self):
2064 nullable = {}
2065 num_nullable = 0
2066 while 1:
2067 for p in self.grammar.Productions[1:]:
2068 if p.len == 0:
2069 nullable[p.name] = 1
2070 continue
2071 for t in p.prod:
2072 if not t in nullable: break
2073 else:
2074 nullable[p.name] = 1
2075 if len(nullable) == num_nullable: break
2076 num_nullable = len(nullable)
2077 return nullable
2078
2079 # -------------------------------------------------------------------------- ---
2080 # find_nonterminal_trans(C)
2081 #
2082 # Given a set of LR(0) items, this functions finds all of the non-terminal
2083 # transitions. These are transitions in which a dot appears immediately b efore
2084 # a non-terminal. Returns a list of tuples of the form (state,N) where sta te
2085 # is the state number and N is the nonterminal symbol.
2086 #
2087 # The input C is the set of LR(0) items.
2088 # -------------------------------------------------------------------------- ---
2089
2090 def find_nonterminal_transitions(self,C):
2091 trans = []
2092 for state in range(len(C)):
2093 for p in C[state]:
2094 if p.lr_index < p.len - 1:
2095 t = (state,p.prod[p.lr_index+1])
2096 if t[1] in self.grammar.Nonterminals:
2097 if t not in trans: trans.append(t)
2098 state = state + 1
2099 return trans
2100
2101 # -------------------------------------------------------------------------- ---
2102 # dr_relation()
2103 #
2104 # Computes the DR(p,A) relationships for non-terminal transitions. The inpu t
2105 # is a tuple (state,N) where state is a number and N is a nonterminal symbol .
2106 #
2107 # Returns a list of terminals.
2108 # -------------------------------------------------------------------------- ---
2109
2110 def dr_relation(self,C,trans,nullable):
2111 dr_set = { }
2112 state,N = trans
2113 terms = []
2114
2115 g = self.lr0_goto(C[state],N)
2116 for p in g:
2117 if p.lr_index < p.len - 1:
2118 a = p.prod[p.lr_index+1]
2119 if a in self.grammar.Terminals:
2120 if a not in terms: terms.append(a)
2121
2122 # This extra bit is to handle the start state
2123 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2124 terms.append('$end')
2125
2126 return terms
2127
2128 # -------------------------------------------------------------------------- ---
2129 # reads_relation()
2130 #
2131 # Computes the READS() relation (p,A) READS (t,C).
2132 # -------------------------------------------------------------------------- ---
2133
2134 def reads_relation(self,C, trans, empty):
2135 # Look for empty transitions
2136 rel = []
2137 state, N = trans
2138
2139 g = self.lr0_goto(C[state],N)
2140 j = self.lr0_cidhash.get(id(g),-1)
2141 for p in g:
2142 if p.lr_index < p.len - 1:
2143 a = p.prod[p.lr_index + 1]
2144 if a in empty:
2145 rel.append((j,a))
2146
2147 return rel
2148
2149 # -------------------------------------------------------------------------- ---
2150 # compute_lookback_includes()
2151 #
2152 # Determines the lookback and includes relations
2153 #
2154 # LOOKBACK:
2155 #
2156 # This relation is determined by running the LR(0) state machine forward.
2157 # For example, starting with a production "N : . A B C", we run it forward
2158 # to obtain "N : A B C ." We then build a relationship between this final
2159 # state and the starting state. These relationships are stored in a dictio nary
2160 # lookdict.
2161 #
2162 # INCLUDES:
2163 #
2164 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2165 #
2166 # This relation is used to determine non-terminal transitions that occur
2167 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2168 # if the following holds:
2169 #
2170 # B -> LAT, where T -> epsilon and p' -L-> p
2171 #
2172 # L is essentially a prefix (which may be empty), T is a suffix that must be
2173 # able to derive an empty string. State p' must lead to state p with the st ring L.
2174 #
2175 # -------------------------------------------------------------------------- ---
2176
2177 def compute_lookback_includes(self,C,trans,nullable):
2178
2179 lookdict = {} # Dictionary of lookback relations
2180 includedict = {} # Dictionary of include relations
2181
2182 # Make a dictionary of non-terminal transitions
2183 dtrans = {}
2184 for t in trans:
2185 dtrans[t] = 1
2186
2187 # Loop over all transitions and compute lookbacks and includes
2188 for state,N in trans:
2189 lookb = []
2190 includes = []
2191 for p in C[state]:
2192 if p.name != N: continue
2193
2194 # Okay, we have a name match. We now follow the production all the way
2195 # through the state machine until we get the . on the right hand side
2196
2197 lr_index = p.lr_index
2198 j = state
2199 while lr_index < p.len - 1:
2200 lr_index = lr_index + 1
2201 t = p.prod[lr_index]
2202
2203 # Check to see if this symbol and state are a non-terminal transition
2204 if (j,t) in dtrans:
2205 # Yes. Okay, there is some chance that this is an in cludes relation
2206 # the only way to know for certain is whether the res t of the
2207 # production derives empty
2208
2209 li = lr_index + 1
2210 while li < p.len:
2211 if p.prod[li] in self.grammar.Terminals: break # No forget it
2212 if not p.prod[li] in nullable: break
2213 li = li + 1
2214 else:
2215 # Appears to be a relation between (j,t) and (st ate,N)
2216 includes.append((j,t))
2217
2218 g = self.lr0_goto(C[j],t) # Go to next set
2219 j = self.lr0_cidhash.get(id(g),-1) # Go to next state
2220
2221 # When we get here, j is the final state, now we have to locate the production
2222 for r in C[j]:
2223 if r.name != p.name: continue
2224 if r.len != p.len: continue
2225 i = 0
2226 # This look is comparing a production ". A B C" with "A B C ."
2227 while i < r.lr_index:
2228 if r.prod[i] != p.prod[i+1]: break
2229 i = i + 1
2230 else:
2231 lookb.append((j,r))
2232 for i in includes:
2233 if not i in includedict: includedict[i] = []
2234 includedict[i].append((state,N))
2235 lookdict[(state,N)] = lookb
2236
2237 return lookdict,includedict
2238
2239 # -------------------------------------------------------------------------- ---
2240 # compute_read_sets()
2241 #
2242 # Given a set of LR(0) items, this function computes the read sets.
2243 #
2244 # Inputs: C = Set of LR(0) items
2245 # ntrans = Set of nonterminal transitions
2246 # nullable = Set of empty transitions
2247 #
2248 # Returns a set containing the read sets
2249 # -------------------------------------------------------------------------- ---
2250
2251 def compute_read_sets(self,C, ntrans, nullable):
2252 FP = lambda x: self.dr_relation(C,x,nullable)
2253 R = lambda x: self.reads_relation(C,x,nullable)
2254 F = digraph(ntrans,R,FP)
2255 return F
2256
2257 # -------------------------------------------------------------------------- ---
2258 # compute_follow_sets()
2259 #
2260 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2261 # and an include set, this function computes the follow sets
2262 #
2263 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2264 #
2265 # Inputs:
2266 # ntrans = Set of nonterminal transitions
2267 # readsets = Readset (previously computed)
2268 # inclsets = Include sets (previously computed)
2269 #
2270 # Returns a set containing the follow sets
2271 # -------------------------------------------------------------------------- ---
2272
2273 def compute_follow_sets(self,ntrans,readsets,inclsets):
2274 FP = lambda x: readsets[x]
2275 R = lambda x: inclsets.get(x,[])
2276 F = digraph(ntrans,R,FP)
2277 return F
2278
2279 # -------------------------------------------------------------------------- ---
2280 # add_lookaheads()
2281 #
2282 # Attaches the lookahead symbols to grammar rules.
2283 #
2284 # Inputs: lookbacks - Set of lookback relations
2285 # followset - Computed follow set
2286 #
2287 # This function directly attaches the lookaheads to productions contained
2288 # in the lookbacks set
2289 # -------------------------------------------------------------------------- ---
2290
2291 def add_lookaheads(self,lookbacks,followset):
2292 for trans,lb in lookbacks.items():
2293 # Loop over productions in lookback
2294 for state,p in lb:
2295 if not state in p.lookaheads:
2296 p.lookaheads[state] = []
2297 f = followset.get(trans,[])
2298 for a in f:
2299 if a not in p.lookaheads[state]: p.lookaheads[state].appen d(a)
2300
2301 # -------------------------------------------------------------------------- ---
2302 # add_lalr_lookaheads()
2303 #
2304 # This function does all of the work of adding lookahead information for use
2305 # with LALR parsing
2306 # -------------------------------------------------------------------------- ---
2307
2308 def add_lalr_lookaheads(self,C):
2309 # Determine all of the nullable nonterminals
2310 nullable = self.compute_nullable_nonterminals()
2311
2312 # Find all non-terminal transitions
2313 trans = self.find_nonterminal_transitions(C)
2314
2315 # Compute read sets
2316 readsets = self.compute_read_sets(C,trans,nullable)
2317
2318 # Compute lookback/includes relations
2319 lookd, included = self.compute_lookback_includes(C,trans,nullable)
2320
2321 # Compute LALR FOLLOW sets
2322 followsets = self.compute_follow_sets(trans,readsets,included)
2323
2324 # Add all of the lookaheads
2325 self.add_lookaheads(lookd,followsets)
2326
2327 # -------------------------------------------------------------------------- ---
2328 # lr_parse_table()
2329 #
2330 # This function constructs the parse tables for SLR or LALR
2331 # -------------------------------------------------------------------------- ---
2332 def lr_parse_table(self):
2333 goto = self.lr_goto # Goto array
2334 action = self.lr_action # Action array
2335 log = self.log # Logger for output
2336
2337 actionp = { } # Action production array (temporary)
2338
2339 log.info("Parsing method: %s", self.lr_method)
2340
2341 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2342 # This determines the number of states
2343
2344 C = self.lr0_items()
2345
2346 if self.lr_method == 'LALR':
2347 self.add_lalr_lookaheads(C)
2348
2349 # Build the parser table, state by state
2350 st = 0
2351 for I in C:
2352 # Loop over each production in I
2353 actlist = [ ] # List of actions
2354 st_action = { }
2355 st_actionp = { }
2356 st_goto = { }
2357 log.info("")
2358 log.info("state %d", st)
2359 log.info("")
2360 for p in I:
2361 log.info(" (%d) %s", p.number, str(p))
2362 log.info("")
2363
2364 for p in I:
2365 if p.len == p.lr_index + 1:
2366 if p.name == "S'":
2367 # Start symbol. Accept!
2368 st_action["$end"] = 0
2369 st_actionp["$end"] = p
2370 else:
2371 # We are at the end of a production. Reduce!
2372 if self.lr_method == 'LALR':
2373 laheads = p.lookaheads[st]
2374 else:
2375 laheads = self.grammar.Follow[p.name]
2376 for a in laheads:
2377 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2378 r = st_action.get(a,None)
2379 if r is not None:
2380 # Whoa. Have a shift/reduce or reduce/reduce conflict
2381 if r > 0:
2382 # Need to decide on shift or reduce here
2383 # By default we favor shifting. Need to add
2384 # some precedence rules here.
2385 sprec,slevel = self.grammar.Productions[ st_actionp[a].number].prec
2386 rprec,rlevel = self.grammar.Precedence.g et(a,('right',0))
2387 if (slevel < rlevel) or ((slevel == rlev el) and (rprec == 'left')):
2388 # We really need to reduce here.
2389 st_action[a] = -p.number
2390 st_actionp[a] = p
2391 if not slevel and not rlevel:
2392 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)
2393 self.sr_conflicts.append((st,a,' reduce'))
2394 elif (slevel == rlevel) and (rprec == 'n onassoc'):
2395 st_action[a] = None
2396 else:
2397 # Hmmm. Guess we'll keep the shift
2398 if not rlevel:
2399 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)
2400 self.sr_conflicts.append((st,a,' shift'))
2401 elif r < 0:
2402 # Reduce/reduce conflict. In this case , we favor the rule
2403 # that was defined first in the grammar file
2404 oldp = self.grammar.Productions[-r]
2405 pp = self.grammar.Productions[p.number]
2406 if oldp.line > pp.line:
2407 st_action[a] = -p.number
2408 st_actionp[a] = p
2409 chosenp,rejectp = pp,oldp
2410 else:
2411 chosenp,rejectp = oldp,pp
2412 self.rr_conflicts.append((st,chosenp,rej ectp))
2413 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2414 else:
2415 raise LALRError("Unknown conflict in sta te %d" % st)
2416 else:
2417 st_action[a] = -p.number
2418 st_actionp[a] = p
2419 else:
2420 i = p.lr_index
2421 a = p.prod[i+1] # Get symbol right after the "."
2422 if a in self.grammar.Terminals:
2423 g = self.lr0_goto(I,a)
2424 j = self.lr0_cidhash.get(id(g),-1)
2425 if j >= 0:
2426 # We are in a shift state
2427 actlist.append((a,p,"shift and go to state %d" % j))
2428 r = st_action.get(a,None)
2429 if r is not None:
2430 # Whoa have a shift/reduce or shift/shift co nflict
2431 if r > 0:
2432 if r != j:
2433 raise LALRError("Shift/shift conflic t in state %d" % st)
2434 elif r < 0:
2435 # Do a precedence check.
2436 # - if precedence of reduce rule is h igher, we reduce.
2437 # - if precedence of reduce is same a nd left assoc, we reduce.
2438 # - otherwise we shift
2439 rprec,rlevel = self.grammar.Productions[ st_actionp[a].number].prec
2440 sprec,slevel = self.grammar.Precedence.g et(a,('right',0))
2441 if (slevel > rlevel) or ((slevel == rlev el) and (rprec == 'right')):
2442 # We decide to shift here... highest precedence to shift
2443 st_action[a] = j
2444 st_actionp[a] = p
2445 if not rlevel:
2446 log.info(" ! shift/reduce confl ict for %s resolved as shift",a)
2447 self.sr_conflicts.append((st,a,' shift'))
2448 elif (slevel == rlevel) and (rprec == 'n onassoc'):
2449 st_action[a] = None
2450 else:
2451 # Hmmm. Guess we'll keep the reduce
2452 if not slevel and not rlevel:
2453 log.info(" ! shift/reduce confl ict for %s resolved as reduce",a)
2454 self.sr_conflicts.append((st,a,' reduce'))
2455
2456 else:
2457 raise LALRError("Unknown conflict in sta te %d" % st)
2458 else:
2459 st_action[a] = j
2460 st_actionp[a] = p
2461
2462 # Print the actions associated with each terminal
2463 _actprint = { }
2464 for a,p,m in actlist:
2465 if a in st_action:
2466 if p is st_actionp[a]:
2467 log.info(" %-15s %s",a,m)
2468 _actprint[(a,m)] = 1
2469 log.info("")
2470 # Print the actions that were not used. (debugging)
2471 not_used = 0
2472 for a,p,m in actlist:
2473 if a in st_action:
2474 if p is not st_actionp[a]:
2475 if not (a,m) in _actprint:
2476 log.debug(" ! %-15s [ %s ]",a,m)
2477 not_used = 1
2478 _actprint[(a,m)] = 1
2479 if not_used:
2480 log.debug("")
2481
2482 # Construct the goto table for this state
2483
2484 nkeys = { }
2485 for ii in I:
2486 for s in ii.usyms:
2487 if s in self.grammar.Nonterminals:
2488 nkeys[s] = None
2489 for n in nkeys:
2490 g = self.lr0_goto(I,n)
2491 j = self.lr0_cidhash.get(id(g),-1)
2492 if j >= 0:
2493 st_goto[n] = j
2494 log.info(" %-30s shift and go to state %d",n,j)
2495
2496 action[st] = st_action
2497 actionp[st] = st_actionp
2498 goto[st] = st_goto
2499 st += 1
2500
2501
2502 # -------------------------------------------------------------------------- ---
2503 # write()
2504 #
2505 # This function writes the LR parsing tables to a file
2506 # -------------------------------------------------------------------------- ---
2507
2508 def write_table(self,modulename,outputdir='',signature=""):
2509 basemodulename = modulename.split(".")[-1]
2510 filename = os.path.join(outputdir,basemodulename) + ".py"
2511 try:
2512 f = open(filename,"w")
2513
2514 f.write("""
2515 # %s
2516 # This file is automatically generated. Do not edit.
2517 _tabversion = %r
2518
2519 _lr_method = %r
2520
2521 _lr_signature = %r
2522 """ % (filename, __tabversion__, self.lr_method, signature))
2523
2524 # Change smaller to 0 to go back to original tables
2525 smaller = 1
2526
2527 # Factor out names to try and make smaller
2528 if smaller:
2529 items = { }
2530
2531 for s,nd in self.lr_action.items():
2532 for name,v in nd.items():
2533 i = items.get(name)
2534 if not i:
2535 i = ([],[])
2536 items[name] = i
2537 i[0].append(s)
2538 i[1].append(v)
2539
2540 f.write("\n_lr_action_items = {")
2541 for k,v in items.items():
2542 f.write("%r:([" % k)
2543 for i in v[0]:
2544 f.write("%r," % i)
2545 f.write("],[")
2546 for i in v[1]:
2547 f.write("%r," % i)
2548
2549 f.write("]),")
2550 f.write("}\n")
2551
2552 f.write("""
2553 _lr_action = { }
2554 for _k, _v in _lr_action_items.items():
2555 for _x,_y in zip(_v[0],_v[1]):
2556 if not _x in _lr_action: _lr_action[_x] = { }
2557 _lr_action[_x][_k] = _y
2558 del _lr_action_items
2559 """)
2560
2561 else:
2562 f.write("\n_lr_action = { ");
2563 for k,v in self.lr_action.items():
2564 f.write("(%r,%r):%r," % (k[0],k[1],v))
2565 f.write("}\n");
2566
2567 if smaller:
2568 # Factor out names to try and make smaller
2569 items = { }
2570
2571 for s,nd in self.lr_goto.items():
2572 for name,v in nd.items():
2573 i = items.get(name)
2574 if not i:
2575 i = ([],[])
2576 items[name] = i
2577 i[0].append(s)
2578 i[1].append(v)
2579
2580 f.write("\n_lr_goto_items = {")
2581 for k,v in items.items():
2582 f.write("%r:([" % k)
2583 for i in v[0]:
2584 f.write("%r," % i)
2585 f.write("],[")
2586 for i in v[1]:
2587 f.write("%r," % i)
2588
2589 f.write("]),")
2590 f.write("}\n")
2591
2592 f.write("""
2593 _lr_goto = { }
2594 for _k, _v in _lr_goto_items.items():
2595 for _x,_y in zip(_v[0],_v[1]):
2596 if not _x in _lr_goto: _lr_goto[_x] = { }
2597 _lr_goto[_x][_k] = _y
2598 del _lr_goto_items
2599 """)
2600 else:
2601 f.write("\n_lr_goto = { ");
2602 for k,v in self.lr_goto.items():
2603 f.write("(%r,%r):%r," % (k[0],k[1],v))
2604 f.write("}\n");
2605
2606 # Write production table
2607 f.write("_lr_productions = [\n")
2608 for p in self.lr_productions:
2609 if p.func:
2610 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p .func,p.file,p.line))
2611 else:
2612 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p .len))
2613 f.write("]\n")
2614 f.close()
2615
2616 except IOError:
2617 e = sys.exc_info()[1]
2618 sys.stderr.write("Unable to create '%s'\n" % filename)
2619 sys.stderr.write(str(e)+"\n")
2620 return
2621
2622
2623 # -----------------------------------------------------------------------------
2624 # === INTROSPECTION ===
2625 #
2626 # The following functions and classes are used to implement the PLY
2627 # introspection features followed by the yacc() function itself.
2628 # -----------------------------------------------------------------------------
2629
2630 # -----------------------------------------------------------------------------
2631 # get_caller_module_dict()
2632 #
2633 # This function returns a dictionary containing all of the symbols defined withi n
2634 # a caller further down the call stack. This is used to get the environment
2635 # associated with the yacc() call if none was provided.
2636 # -----------------------------------------------------------------------------
2637
2638 def get_caller_module_dict(levels):
2639 try:
2640 raise RuntimeError
2641 except RuntimeError:
2642 e,b,t = sys.exc_info()
2643 f = t.tb_frame
2644 while levels > 0:
2645 f = f.f_back
2646 levels -= 1
2647 ldict = f.f_globals.copy()
2648 if f.f_globals != f.f_locals:
2649 ldict.update(f.f_locals)
2650
2651 return ldict
2652
2653 # -----------------------------------------------------------------------------
2654 # parse_grammar()
2655 #
2656 # This takes a raw grammar rule string and parses it into production data
2657 # -----------------------------------------------------------------------------
2658 def parse_grammar(doc,file,line):
2659 grammar = []
2660 # Split the doc string into lines
2661 pstrings = doc.splitlines()
2662 lastp = None
2663 dline = line
2664 for ps in pstrings:
2665 dline += 1
2666 p = ps.split()
2667 if not p: continue
2668 try:
2669 if p[0] == '|':
2670 # This is a continuation of a previous rule
2671 if not lastp:
2672 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2673 prodname = lastp
2674 syms = p[1:]
2675 else:
2676 prodname = p[0]
2677 lastp = prodname
2678 syms = p[2:]
2679 assign = p[1]
2680 if assign != ':' and assign != '::=':
2681 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (fil e,dline))
2682
2683 grammar.append((file,dline,prodname,syms))
2684 except SyntaxError:
2685 raise
2686 except Exception:
2687 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,p s.strip()))
2688
2689 return grammar
2690
2691 # -----------------------------------------------------------------------------
2692 # ParserReflect()
2693 #
2694 # This class represents information extracted for building a parser including
2695 # start symbol, error function, tokens, precedence list, action functions,
2696 # etc.
2697 # -----------------------------------------------------------------------------
2698 class ParserReflect(object):
2699 def __init__(self,pdict,log=None):
2700 self.pdict = pdict
2701 self.start = None
2702 self.error_func = None
2703 self.tokens = None
2704 self.files = {}
2705 self.grammar = []
2706 self.error = 0
2707
2708 if log is None:
2709 self.log = PlyLogger(sys.stderr)
2710 else:
2711 self.log = log
2712
2713 # Get all of the basic information
2714 def get_all(self):
2715 self.get_start()
2716 self.get_error_func()
2717 self.get_tokens()
2718 self.get_precedence()
2719 self.get_pfunctions()
2720
2721 # Validate all of the information
2722 def validate_all(self):
2723 self.validate_start()
2724 self.validate_error_func()
2725 self.validate_tokens()
2726 self.validate_precedence()
2727 self.validate_pfunctions()
2728 self.validate_files()
2729 return self.error
2730
2731 # Compute a signature over the grammar
2732 def signature(self):
2733 from binascii import crc32
2734 sig = 0
2735 try:
2736 if self.start:
2737 sig = crc32(self.start.encode('latin-1'),sig)
2738 if self.prec:
2739 sig = crc32("".join(["".join(p) for p in self.prec]).encode('lat in-1'),sig)
2740 if self.tokens:
2741 sig = crc32(" ".join(self.tokens).encode('latin-1'),sig)
2742 for f in self.pfuncs:
2743 if f[3]:
2744 sig = crc32(f[3].encode('latin-1'),sig)
2745 except (TypeError,ValueError):
2746 pass
2747 return sig
2748
2749 # -------------------------------------------------------------------------- ---
2750 # validate_file()
2751 #
2752 # This method checks to see if there are duplicated p_rulename() functions
2753 # in the parser module file. Without this function, it is really easy for
2754 # users to make mistakes by cutting and pasting code fragments (and it's a r eal
2755 # bugger to try and figure out why the resulting parser doesn't work). Ther efore,
2756 # we just do a little regular expression pattern matching of def statements
2757 # to try and detect duplicates.
2758 # -------------------------------------------------------------------------- ---
2759
2760 def validate_files(self):
2761 # Match def p_funcname(
2762 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2763
2764 for filename in self.files.keys():
2765 base,ext = os.path.splitext(filename)
2766 if ext != '.py': return 1 # No idea. Assume it's okay.
2767
2768 try:
2769 f = open(filename)
2770 lines = f.readlines()
2771 f.close()
2772 except IOError:
2773 continue
2774
2775 counthash = { }
2776 for linen,l in enumerate(lines):
2777 linen += 1
2778 m = fre.match(l)
2779 if m:
2780 name = m.group(1)
2781 prev = counthash.get(name)
2782 if not prev:
2783 counthash[name] = linen
2784 else:
2785 self.log.warning("%s:%d: Function %s redefined. Previous ly defined on line %d", filename,linen,name,prev)
2786
2787 # Get the start symbol
2788 def get_start(self):
2789 self.start = self.pdict.get('start')
2790
2791 # Validate the start symbol
2792 def validate_start(self):
2793 if self.start is not None:
2794 if not isinstance(self.start,str):
2795 self.log.error("'start' must be a string")
2796
2797 # Look for error handler
2798 def get_error_func(self):
2799 self.error_func = self.pdict.get('p_error')
2800
2801 # Validate the error function
2802 def validate_error_func(self):
2803 if self.error_func:
2804 if isinstance(self.error_func,types.FunctionType):
2805 ismethod = 0
2806 elif isinstance(self.error_func, types.MethodType):
2807 ismethod = 1
2808 else:
2809 self.log.error("'p_error' defined, but is not a function or meth od")
2810 self.error = 1
2811 return
2812
2813 eline = func_code(self.error_func).co_firstlineno
2814 efile = func_code(self.error_func).co_filename
2815 self.files[efile] = 1
2816
2817 if (func_code(self.error_func).co_argcount != 1+ismethod):
2818 self.log.error("%s:%d: p_error() requires 1 argument",efile,elin e)
2819 self.error = 1
2820
2821 # Get the tokens map
2822 def get_tokens(self):
2823 tokens = self.pdict.get("tokens",None)
2824 if not tokens:
2825 self.log.error("No token list is defined")
2826 self.error = 1
2827 return
2828
2829 if not isinstance(tokens,(list, tuple)):
2830 self.log.error("tokens must be a list or tuple")
2831 self.error = 1
2832 return
2833
2834 if not tokens:
2835 self.log.error("tokens is empty")
2836 self.error = 1
2837 return
2838
2839 self.tokens = tokens
2840
2841 # Validate the tokens
2842 def validate_tokens(self):
2843 # Validate the tokens.
2844 if 'error' in self.tokens:
2845 self.log.error("Illegal token name 'error'. Is a reserved word")
2846 self.error = 1
2847 return
2848
2849 terminals = {}
2850 for n in self.tokens:
2851 if n in terminals:
2852 self.log.warning("Token '%s' multiply defined", n)
2853 terminals[n] = 1
2854
2855 # Get the precedence map (if any)
2856 def get_precedence(self):
2857 self.prec = self.pdict.get("precedence",None)
2858
2859 # Validate and parse the precedence map
2860 def validate_precedence(self):
2861 preclist = []
2862 if self.prec:
2863 if not isinstance(self.prec,(list,tuple)):
2864 self.log.error("precedence must be a list or tuple")
2865 self.error = 1
2866 return
2867 for level,p in enumerate(self.prec):
2868 if not isinstance(p,(list,tuple)):
2869 self.log.error("Bad precedence table")
2870 self.error = 1
2871 return
2872
2873 if len(p) < 2:
2874 self.log.error("Malformed precedence entry %s. Must be (asso c, term, ..., term)",p)
2875 self.error = 1
2876 return
2877 assoc = p[0]
2878 if not isinstance(assoc,str):
2879 self.log.error("precedence associativity must be a string")
2880 self.error = 1
2881 return
2882 for term in p[1:]:
2883 if not isinstance(term,str):
2884 self.log.error("precedence items must be strings")
2885 self.error = 1
2886 return
2887 preclist.append((term,assoc,level+1))
2888 self.preclist = preclist
2889
2890 # Get all p_functions from the grammar
2891 def get_pfunctions(self):
2892 p_functions = []
2893 for name, item in self.pdict.items():
2894 if name[:2] != 'p_': continue
2895 if name == 'p_error': continue
2896 if isinstance(item,(types.FunctionType,types.MethodType)):
2897 line = func_code(item).co_firstlineno
2898 file = func_code(item).co_filename
2899 p_functions.append((line,file,name,item.__doc__))
2900
2901 # Sort all of the actions by line number
2902 p_functions.sort()
2903 self.pfuncs = p_functions
2904
2905
2906 # Validate all of the p_functions
2907 def validate_pfunctions(self):
2908 grammar = []
2909 # Check for non-empty symbols
2910 if len(self.pfuncs) == 0:
2911 self.log.error("no rules of the form p_rulename are defined")
2912 self.error = 1
2913 return
2914
2915 for line, file, name, doc in self.pfuncs:
2916 func = self.pdict[name]
2917 if isinstance(func, types.MethodType):
2918 reqargs = 2
2919 else:
2920 reqargs = 1
2921 if func_code(func).co_argcount > reqargs:
2922 self.log.error("%s:%d: Rule '%s' has too many arguments",file,li ne,func.__name__)
2923 self.error = 1
2924 elif func_code(func).co_argcount < reqargs:
2925 self.log.error("%s:%d: Rule '%s' requires an argument",file,line ,func.__name__)
2926 self.error = 1
2927 elif not func.__doc__:
2928 self.log.warning("%s:%d: No documentation string specified in fu nction '%s' (ignored)",file,line,func.__name__)
2929 else:
2930 try:
2931 parsed_g = parse_grammar(doc,file,line)
2932 for g in parsed_g:
2933 grammar.append((name, g))
2934 except SyntaxError:
2935 e = sys.exc_info()[1]
2936 self.log.error(str(e))
2937 self.error = 1
2938
2939 # Looks like a valid grammar rule
2940 # Mark the file in which defined.
2941 self.files[file] = 1
2942
2943 # Secondary validation step that looks for p_ definitions that are not f unctions
2944 # or functions that look like they might be grammar rules.
2945
2946 for n,v in self.pdict.items():
2947 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.Metho dType)): continue
2948 if n[0:2] == 't_': continue
2949 if n[0:2] == 'p_' and n != 'p_error':
2950 self.log.warning("'%s' not defined as a function", n)
2951 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount = = 1) or
2952 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
2953 try:
2954 doc = v.__doc__.split(" ")
2955 if doc[1] == ':':
2956 self.log.warning("%s:%d: Possible grammar rule '%s' defi ned without p_ prefix",
2957 func_code(v).co_filename, func_code(v). co_firstlineno,n)
2958 except Exception:
2959 pass
2960
2961 self.grammar = grammar
2962
2963 # -----------------------------------------------------------------------------
2964 # yacc(module)
2965 #
2966 # Build a parser
2967 # -----------------------------------------------------------------------------
2968
2969 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star t=None,
2970 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,out putdir='',
2971 debuglog=None, errorlog = None):
2972
2973 global parse # Reference to the parsing method of the last b uilt parser
2974
2975 if errorlog is None:
2976 errorlog = PlyLogger(sys.stderr)
2977
2978 # Get the module dictionary used for the parser
2979 if module:
2980 _items = [(k,getattr(module,k)) for k in dir(module)]
2981 pdict = dict(_items)
2982 else:
2983 pdict = get_caller_module_dict(2)
2984
2985 # Collect parser information from the dictionary
2986 pinfo = ParserReflect(pdict,log=errorlog)
2987 pinfo.get_all()
2988
2989 if pinfo.error:
2990 raise YaccError("Unable to build parser")
2991
2992 # Check signature against table files (if any)
2993 signature = pinfo.signature()
2994
2995 # Read the tables
2996 try:
2997 lr = LRTable()
2998 read_signature = lr.read_table(tabmodule)
2999 if optimize or (read_signature == signature):
3000 try:
3001 lr.bind_callables(pinfo.pdict)
3002 parser = LRParser(lr,pinfo.error_func)
3003 parse = parser.parse
3004 return parser
3005 except Exception:
3006 e = sys.exc_info()[1]
3007 errorlog.warning("There was a problem loading the table file: %s ", repr(e))
3008 except VersionError:
3009 e = sys.exc_info()
3010 errorlog.warning(str(e))
3011 except Exception:
3012 pass
3013
3014 if debuglog is None:
3015 if debug:
3016 debuglog = PlyLogger(open(debugfile,"w"))
3017 else:
3018 debuglog = NullLogger()
3019
3020 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __ver sion__)
3021
3022
3023 errors = 0
3024
3025 # Validate the parser information
3026 if pinfo.validate_all():
3027 raise YaccError("Unable to build parser")
3028
3029 if not pinfo.error_func:
3030 errorlog.warning("no p_error() function is defined")
3031
3032 # Create a grammar object
3033 grammar = Grammar(pinfo.tokens)
3034
3035 # Set precedence level for terminals
3036 for term, assoc, level in pinfo.preclist:
3037 try:
3038 grammar.set_precedence(term,assoc,level)
3039 except GrammarError:
3040 e = sys.exc_info()[1]
3041 errorlog.warning("%s",str(e))
3042
3043 # Add productions to the grammar
3044 for funcname, gram in pinfo.grammar:
3045 file, line, prodname, syms = gram
3046 try:
3047 grammar.add_production(prodname,syms,funcname,file,line)
3048 except GrammarError:
3049 e = sys.exc_info()[1]
3050 errorlog.error("%s",str(e))
3051 errors = 1
3052
3053 # Set the grammar start symbols
3054 try:
3055 if start is None:
3056 grammar.set_start(pinfo.start)
3057 else:
3058 grammar.set_start(start)
3059 except GrammarError:
3060 e = sys.exc_info()[1]
3061 errorlog.error(str(e))
3062 errors = 1
3063
3064 if errors:
3065 raise YaccError("Unable to build parser")
3066
3067 # Verify the grammar structure
3068 undefined_symbols = grammar.undefined_symbols()
3069 for sym, prod in undefined_symbols:
3070 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3071 errors = 1
3072
3073 unused_terminals = grammar.unused_terminals()
3074 if unused_terminals:
3075 debuglog.info("")
3076 debuglog.info("Unused terminals:")
3077 debuglog.info("")
3078 for term in unused_terminals:
3079 errorlog.warning("Token '%s' defined, but not used", term)
3080 debuglog.info(" %s", term)
3081
3082 # Print out all productions to the debug log
3083 if debug:
3084 debuglog.info("")
3085 debuglog.info("Grammar")
3086 debuglog.info("")
3087 for n,p in enumerate(grammar.Productions):
3088 debuglog.info("Rule %-5d %s", n+1, p)
3089
3090 # Find unused non-terminals
3091 unused_rules = grammar.unused_rules()
3092 for prod in unused_rules:
3093 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, pr od.line, prod.name)
3094
3095 if len(unused_terminals) == 1:
3096 errorlog.warning("There is 1 unused token")
3097 if len(unused_terminals) > 1:
3098 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3099
3100 if len(unused_rules) == 1:
3101 errorlog.warning("There is 1 unused rule")
3102 if len(unused_rules) > 1:
3103 errorlog.warning("There are %d unused rules", len(unused_rules))
3104
3105 if debug:
3106 debuglog.info("")
3107 debuglog.info("Terminals, with rules where they appear")
3108 debuglog.info("")
3109 terms = list(grammar.Terminals)
3110 terms.sort()
3111 for term in terms:
3112 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar. Terminals[term]]))
3113
3114 debuglog.info("")
3115 debuglog.info("Nonterminals, with rules where they appear")
3116 debuglog.info("")
3117 nonterms = list(grammar.Nonterminals)
3118 nonterms.sort()
3119 for nonterm in nonterms:
3120 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in gramm ar.Nonterminals[nonterm]]))
3121 debuglog.info("")
3122
3123 if check_recursion:
3124 unreachable = grammar.find_unreachable()
3125 for u in unreachable:
3126 errorlog.warning("Symbol '%s' is unreachable",u)
3127
3128 infinite = grammar.infinite_cycles()
3129 for inf in infinite:
3130 errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3131 errors = 1
3132
3133 unused_prec = grammar.unused_precedence()
3134 for term, assoc in unused_prec:
3135 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", a ssoc, term)
3136 errors = 1
3137
3138 if errors:
3139 raise YaccError("Unable to build parser")
3140
3141 # Run the LRGeneratedTable on the grammar
3142 if debug:
3143 errorlog.debug("Generating %s tables", method)
3144
3145 lr = LRGeneratedTable(grammar,method,debuglog)
3146
3147 if debug:
3148 num_sr = len(lr.sr_conflicts)
3149
3150 # Report shift/reduce and reduce/reduce conflicts
3151 if num_sr == 1:
3152 errorlog.warning("1 shift/reduce conflict")
3153 elif num_sr > 1:
3154 errorlog.warning("%d shift/reduce conflicts", num_sr)
3155
3156 num_rr = len(lr.rr_conflicts)
3157 if num_rr == 1:
3158 errorlog.warning("1 reduce/reduce conflict")
3159 elif num_rr > 1:
3160 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3161
3162 # Write out conflicts to the output file
3163 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3164 debuglog.warning("")
3165 debuglog.warning("Conflicts:")
3166 debuglog.warning("")
3167
3168 for state, tok, resolution in lr.sr_conflicts:
3169 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3170
3171 for state, rule, rejected in lr.rr_conflicts:
3172 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3173 debuglog.warning("rejected rule (%s)", rejected)
3174 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3175 errorlog.warning("rejected rule (%s)", rejected)
3176
3177 # Write the table file if requested
3178 if write_tables:
3179 lr.write_table(tabmodule,outputdir,signature)
3180
3181 # Build the parser
3182 lr.bind_callables(pinfo.pdict)
3183 parser = LRParser(lr,pinfo.error_func)
3184
3185 parse = parser.parse
3186 return parser
OLDNEW
« no previous file with comments | « tools/nixysa/third_party/ply-3.1/ply/lex.py ('k') | tools/nixysa/third_party/ply-3.1/setup.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698