OLD | NEW |
| (Empty) |
1 # ----------------------------------------------------------------------------- | |
2 # ply: yacc.py | |
3 # | |
4 # Copyright (C) 2001-2011, | |
5 # David M. Beazley (Dabeaz LLC) | |
6 # All rights reserved. | |
7 # | |
8 # Redistribution and use in source and binary forms, with or without | |
9 # modification, are permitted provided that the following conditions are | |
10 # met: | |
11 # | |
12 # * Redistributions of source code must retain the above copyright notice, | |
13 # this list of conditions and the following disclaimer. | |
14 # * Redistributions in binary form must reproduce the above copyright notice, | |
15 # this list of conditions and the following disclaimer in the documentation | |
16 # and/or other materials provided with the distribution. | |
17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to | |
18 # endorse or promote products derived from this software without | |
19 # specific prior written permission. | |
20 # | |
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 # ----------------------------------------------------------------------------- | |
33 # | |
34 # This implements an LR parser that is constructed from grammar rules defined | |
35 # as Python functions. The grammer is specified by supplying the BNF inside | |
36 # Python documentation strings. The inspiration for this technique was borrowed | |
37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between | |
38 # Spark and the GNU bison utility. | |
39 # | |
40 # The current implementation is only somewhat object-oriented. The | |
41 # LR parser itself is defined in terms of an object (which allows multiple | |
42 # parsers to co-exist). However, most of the variables used during table | |
43 # construction are defined in terms of global variables. Users shouldn't | |
44 # notice unless they are trying to define multiple parsers at the same | |
45 # time using threads (in which case they should have their head examined). | |
46 # | |
47 # This implementation supports both SLR and LALR(1) parsing. LALR(1) | |
48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), | |
49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, | |
50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced | |
51 # by the more efficient DeRemer and Pennello algorithm. | |
52 # | |
53 # :::::::: WARNING ::::::: | |
54 # | |
55 # Construction of LR parsing tables is fairly complicated and expensive. | |
56 # To make this module run fast, a *LOT* of work has been put into | |
57 # optimization---often at the expensive of readability and what might | |
58 # consider to be good Python "coding style." Modify the code at your | |
59 # own risk! | |
60 # ---------------------------------------------------------------------------- | |
61 | |
62 __version__ = "3.4" | |
63 __tabversion__ = "3.2" # Table version | |
64 | |
65 #----------------------------------------------------------------------------- | |
66 # === User configurable parameters === | |
67 # | |
68 # Change these to modify the default behavior of yacc (if you wish) | |
69 #----------------------------------------------------------------------------- | |
70 | |
71 yaccdebug = 1 # Debugging mode. If set, yacc generates a | |
72 # a 'parser.out' file in the current directory | |
73 | |
74 debug_file = 'parser.out' # Default name of the debugging file | |
75 tab_module = 'parsetab' # Default name of the table module | |
76 default_lr = 'LALR' # Default LR table generation method | |
77 | |
78 error_count = 3 # Number of symbols that must be shifted to leave
recovery mode | |
79 | |
80 yaccdevel = 0 # Set to True if developing yacc. This turns off
optimized | |
81 # implementations of certain functions. | |
82 | |
83 resultlimit = 40 # Size limit of results when running in debug mod
e. | |
84 | |
85 pickle_protocol = 0 # Protocol to use when writing pickle files | |
86 | |
87 import re, types, sys, os.path | |
88 | |
89 # Compatibility function for python 2.6/3.0 | |
90 if sys.version_info[0] < 3: | |
91 def func_code(f): | |
92 return f.func_code | |
93 else: | |
94 def func_code(f): | |
95 return f.__code__ | |
96 | |
97 # Compatibility | |
98 try: | |
99 MAXINT = sys.maxint | |
100 except AttributeError: | |
101 MAXINT = sys.maxsize | |
102 | |
103 # Python 2.x/3.0 compatibility. | |
104 def load_ply_lex(): | |
105 if sys.version_info[0] < 3: | |
106 import lex | |
107 else: | |
108 import ply.lex as lex | |
109 return lex | |
110 | |
111 # This object is a stand-in for a logging object created by the | |
112 # logging module. PLY will use this by default to create things | |
113 # such as the parser.out file. If a user wants more detailed | |
114 # information, they can create their own logging object and pass | |
115 # it into PLY. | |
116 | |
117 class PlyLogger(object): | |
118 def __init__(self,f): | |
119 self.f = f | |
120 def debug(self,msg,*args,**kwargs): | |
121 self.f.write((msg % args) + "\n") | |
122 info = debug | |
123 | |
124 def warning(self,msg,*args,**kwargs): | |
125 self.f.write("WARNING: "+ (msg % args) + "\n") | |
126 | |
127 def error(self,msg,*args,**kwargs): | |
128 self.f.write("ERROR: " + (msg % args) + "\n") | |
129 | |
130 critical = debug | |
131 | |
132 # Null logger is used when no output is generated. Does nothing. | |
133 class NullLogger(object): | |
134 def __getattribute__(self,name): | |
135 return self | |
136 def __call__(self,*args,**kwargs): | |
137 return self | |
138 | |
139 # Exception raised for yacc-related errors | |
140 class YaccError(Exception): pass | |
141 | |
142 # Format the result message that the parser produces when running in debug mode. | |
143 def format_result(r): | |
144 repr_str = repr(r) | |
145 if '\n' in repr_str: repr_str = repr(repr_str) | |
146 if len(repr_str) > resultlimit: | |
147 repr_str = repr_str[:resultlimit]+" ..." | |
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) | |
149 return result | |
150 | |
151 | |
152 # Format stack entries when the parser is running in debug mode | |
153 def format_stack_entry(r): | |
154 repr_str = repr(r) | |
155 if '\n' in repr_str: repr_str = repr(repr_str) | |
156 if len(repr_str) < 16: | |
157 return repr_str | |
158 else: | |
159 return "<%s @ 0x%x>" % (type(r).__name__,id(r)) | |
160 | |
161 #----------------------------------------------------------------------------- | |
162 # === LR Parsing Engine === | |
163 # | |
164 # The following classes are used for the LR parser itself. These are not | |
165 # used during table construction and are independent of the actual LR | |
166 # table generation algorithm | |
167 #----------------------------------------------------------------------------- | |
168 | |
169 # This class is used to hold non-terminal grammar symbols during parsing. | |
170 # It normally has the following attributes set: | |
171 # .type = Grammar symbol type | |
172 # .value = Symbol value | |
173 # .lineno = Starting line number | |
174 # .endlineno = Ending line number (optional, set automatically) | |
175 # .lexpos = Starting lex position | |
176 # .endlexpos = Ending lex position (optional, set automatically) | |
177 | |
178 class YaccSymbol: | |
179 def __str__(self): return self.type | |
180 def __repr__(self): return str(self) | |
181 | |
182 # This class is a wrapper around the objects actually passed to each | |
183 # grammar rule. Index lookup and assignment actually assign the | |
184 # .value attribute of the underlying YaccSymbol object. | |
185 # The lineno() method returns the line number of a given | |
186 # item (or 0 if not defined). The linespan() method returns | |
187 # a tuple of (startline,endline) representing the range of lines | |
188 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) | |
189 # representing the range of positional information for a symbol. | |
190 | |
191 class YaccProduction: | |
192 def __init__(self,s,stack=None): | |
193 self.slice = s | |
194 self.stack = stack | |
195 self.lexer = None | |
196 self.parser= None | |
197 def __getitem__(self,n): | |
198 if n >= 0: return self.slice[n].value | |
199 else: return self.stack[n].value | |
200 | |
201 def __setitem__(self,n,v): | |
202 self.slice[n].value = v | |
203 | |
204 def __getslice__(self,i,j): | |
205 return [s.value for s in self.slice[i:j]] | |
206 | |
207 def __len__(self): | |
208 return len(self.slice) | |
209 | |
210 def lineno(self,n): | |
211 return getattr(self.slice[n],"lineno",0) | |
212 | |
213 def set_lineno(self,n,lineno): | |
214 self.slice[n].lineno = lineno | |
215 | |
216 def linespan(self,n): | |
217 startline = getattr(self.slice[n],"lineno",0) | |
218 endline = getattr(self.slice[n],"endlineno",startline) | |
219 return startline,endline | |
220 | |
221 def lexpos(self,n): | |
222 return getattr(self.slice[n],"lexpos",0) | |
223 | |
224 def lexspan(self,n): | |
225 startpos = getattr(self.slice[n],"lexpos",0) | |
226 endpos = getattr(self.slice[n],"endlexpos",startpos) | |
227 return startpos,endpos | |
228 | |
229 def error(self): | |
230 raise SyntaxError | |
231 | |
232 | |
233 # ----------------------------------------------------------------------------- | |
234 # == LRParser == | |
235 # | |
236 # The LR Parsing engine. | |
237 # ----------------------------------------------------------------------------- | |
238 | |
239 class LRParser: | |
240 def __init__(self,lrtab,errorf): | |
241 self.productions = lrtab.lr_productions | |
242 self.action = lrtab.lr_action | |
243 self.goto = lrtab.lr_goto | |
244 self.errorfunc = errorf | |
245 | |
246 def errok(self): | |
247 self.errorok = 1 | |
248 | |
249 def restart(self): | |
250 del self.statestack[:] | |
251 del self.symstack[:] | |
252 sym = YaccSymbol() | |
253 sym.type = '$end' | |
254 self.symstack.append(sym) | |
255 self.statestack.append(0) | |
256 | |
257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): | |
258 if debug or yaccdevel: | |
259 if isinstance(debug,int): | |
260 debug = PlyLogger(sys.stderr) | |
261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc) | |
262 elif tracking: | |
263 return self.parseopt(input,lexer,debug,tracking,tokenfunc) | |
264 else: | |
265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) | |
266 | |
267 | |
268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
269 # parsedebug(). | |
270 # | |
271 # This is the debugging enabled version of parse(). All changes made to the | |
272 # parsing engine should be made here. For the non-debugging version, | |
273 # copy this code to a method parseopt() and delete all of the sections | |
274 # enclosed in: | |
275 # | |
276 # #--! DEBUG | |
277 # statements | |
278 # #--! DEBUG | |
279 # | |
280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
281 | |
282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=No
ne): | |
283 lookahead = None # Current lookahead symbol | |
284 lookaheadstack = [ ] # Stack of lookahead symbols | |
285 actions = self.action # Local reference to action table (to a
void lookup on self.) | |
286 goto = self.goto # Local reference to goto table (to avo
id lookup on self.) | |
287 prod = self.productions # Local reference to production list (t
o avoid lookup on self.) | |
288 pslice = YaccProduction(None) # Production object passed to grammar r
ules | |
289 errorcount = 0 # Used during error recovery | |
290 | |
291 # --! DEBUG | |
292 debug.info("PLY: PARSE DEBUG START") | |
293 # --! DEBUG | |
294 | |
295 # If no lexer was given, we will try to use the lex module | |
296 if not lexer: | |
297 lex = load_ply_lex() | |
298 lexer = lex.lexer | |
299 | |
300 # Set up the lexer and parser objects on pslice | |
301 pslice.lexer = lexer | |
302 pslice.parser = self | |
303 | |
304 # If input was supplied, pass to lexer | |
305 if input is not None: | |
306 lexer.input(input) | |
307 | |
308 if tokenfunc is None: | |
309 # Tokenize function | |
310 get_token = lexer.token | |
311 else: | |
312 get_token = tokenfunc | |
313 | |
314 # Set up the state and symbol stacks | |
315 | |
316 statestack = [ ] # Stack of parsing states | |
317 self.statestack = statestack | |
318 symstack = [ ] # Stack of grammar symbols | |
319 self.symstack = symstack | |
320 | |
321 pslice.stack = symstack # Put in the production | |
322 errtoken = None # Err token | |
323 | |
324 # The start state is assumed to be (0,$end) | |
325 | |
326 statestack.append(0) | |
327 sym = YaccSymbol() | |
328 sym.type = "$end" | |
329 symstack.append(sym) | |
330 state = 0 | |
331 while 1: | |
332 # Get the next symbol on the input. If a lookahead symbol | |
333 # is already set, we just use that. Otherwise, we'll pull | |
334 # the next token off of the lookaheadstack or from the lexer | |
335 | |
336 # --! DEBUG | |
337 debug.debug('') | |
338 debug.debug('State : %s', state) | |
339 # --! DEBUG | |
340 | |
341 if not lookahead: | |
342 if not lookaheadstack: | |
343 lookahead = get_token() # Get the next token | |
344 else: | |
345 lookahead = lookaheadstack.pop() | |
346 if not lookahead: | |
347 lookahead = YaccSymbol() | |
348 lookahead.type = "$end" | |
349 | |
350 # --! DEBUG | |
351 debug.debug('Stack : %s', | |
352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:])
, str(lookahead))).lstrip()) | |
353 # --! DEBUG | |
354 | |
355 # Check the action table | |
356 ltype = lookahead.type | |
357 t = actions[state].get(ltype) | |
358 | |
359 if t is not None: | |
360 if t > 0: | |
361 # shift a symbol on the stack | |
362 statestack.append(t) | |
363 state = t | |
364 | |
365 # --! DEBUG | |
366 debug.debug("Action : Shift and goto state %s", t) | |
367 # --! DEBUG | |
368 | |
369 symstack.append(lookahead) | |
370 lookahead = None | |
371 | |
372 # Decrease error count on successful shift | |
373 if errorcount: errorcount -=1 | |
374 continue | |
375 | |
376 if t < 0: | |
377 # reduce a symbol on the stack, emit a production | |
378 p = prod[-t] | |
379 pname = p.name | |
380 plen = p.len | |
381 | |
382 # Get production function | |
383 sym = YaccSymbol() | |
384 sym.type = pname # Production name | |
385 sym.value = None | |
386 | |
387 # --! DEBUG | |
388 if plen: | |
389 debug.info("Action : Reduce rule [%s] with %s and goto s
tate %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-
plen:]])+"]",-t) | |
390 else: | |
391 debug.info("Action : Reduce rule [%s] with %s and goto s
tate %d", p.str, [],-t) | |
392 | |
393 # --! DEBUG | |
394 | |
395 if plen: | |
396 targ = symstack[-plen-1:] | |
397 targ[0] = sym | |
398 | |
399 # --! TRACKING | |
400 if tracking: | |
401 t1 = targ[1] | |
402 sym.lineno = t1.lineno | |
403 sym.lexpos = t1.lexpos | |
404 t1 = targ[-1] | |
405 sym.endlineno = getattr(t1,"endlineno",t1.lineno) | |
406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) | |
407 | |
408 # --! TRACKING | |
409 | |
410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
411 # The code enclosed in this section is duplicated | |
412 # below as a performance optimization. Make sure | |
413 # changes get made in both locations. | |
414 | |
415 pslice.slice = targ | |
416 | |
417 try: | |
418 # Call the grammar rule with our special slice objec
t | |
419 del symstack[-plen:] | |
420 del statestack[-plen:] | |
421 p.callable(pslice) | |
422 # --! DEBUG | |
423 debug.info("Result : %s", format_result(pslice[0])) | |
424 # --! DEBUG | |
425 symstack.append(sym) | |
426 state = goto[statestack[-1]][pname] | |
427 statestack.append(state) | |
428 except SyntaxError: | |
429 # If an error was set. Enter error recovery state | |
430 lookaheadstack.append(lookahead) | |
431 symstack.pop() | |
432 statestack.pop() | |
433 state = statestack[-1] | |
434 sym.type = 'error' | |
435 lookahead = sym | |
436 errorcount = error_count | |
437 self.errorok = 0 | |
438 continue | |
439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
440 | |
441 else: | |
442 | |
443 # --! TRACKING | |
444 if tracking: | |
445 sym.lineno = lexer.lineno | |
446 sym.lexpos = lexer.lexpos | |
447 # --! TRACKING | |
448 | |
449 targ = [ sym ] | |
450 | |
451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
452 # The code enclosed in this section is duplicated | |
453 # above as a performance optimization. Make sure | |
454 # changes get made in both locations. | |
455 | |
456 pslice.slice = targ | |
457 | |
458 try: | |
459 # Call the grammar rule with our special slice objec
t | |
460 p.callable(pslice) | |
461 # --! DEBUG | |
462 debug.info("Result : %s", format_result(pslice[0])) | |
463 # --! DEBUG | |
464 symstack.append(sym) | |
465 state = goto[statestack[-1]][pname] | |
466 statestack.append(state) | |
467 except SyntaxError: | |
468 # If an error was set. Enter error recovery state | |
469 lookaheadstack.append(lookahead) | |
470 symstack.pop() | |
471 statestack.pop() | |
472 state = statestack[-1] | |
473 sym.type = 'error' | |
474 lookahead = sym | |
475 errorcount = error_count | |
476 self.errorok = 0 | |
477 continue | |
478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
479 | |
480 if t == 0: | |
481 n = symstack[-1] | |
482 result = getattr(n,"value",None) | |
483 # --! DEBUG | |
484 debug.info("Done : Returning %s", format_result(result)) | |
485 debug.info("PLY: PARSE DEBUG END") | |
486 # --! DEBUG | |
487 return result | |
488 | |
489 if t == None: | |
490 | |
491 # --! DEBUG | |
492 debug.error('Error : %s', | |
493 ("%s . %s" % (" ".join([xx.type for xx in symstack][
1:]), str(lookahead))).lstrip()) | |
494 # --! DEBUG | |
495 | |
496 # We have some kind of parsing error here. To handle | |
497 # this, we are going to push the current token onto | |
498 # the tokenstack and replace it with an 'error' token. | |
499 # If there are any synchronization rules, they may | |
500 # catch it. | |
501 # | |
502 # In addition to pushing the error token, we call call | |
503 # the user defined p_error() function if this is the | |
504 # first syntax error. This function is only called if | |
505 # errorcount == 0. | |
506 if errorcount == 0 or self.errorok: | |
507 errorcount = error_count | |
508 self.errorok = 0 | |
509 errtoken = lookahead | |
510 if errtoken.type == "$end": | |
511 errtoken = None # End of file! | |
512 if self.errorfunc: | |
513 global errok,token,restart | |
514 errok = self.errok # Set some special functions a
vailable in error recovery | |
515 token = get_token | |
516 restart = self.restart | |
517 if errtoken and not hasattr(errtoken,'lexer'): | |
518 errtoken.lexer = lexer | |
519 tok = self.errorfunc(errtoken) | |
520 del errok, token, restart # Delete special functions | |
521 | |
522 if self.errorok: | |
523 # User must have done some kind of panic | |
524 # mode recovery on their own. The | |
525 # returned token is the next lookahead | |
526 lookahead = tok | |
527 errtoken = None | |
528 continue | |
529 else: | |
530 if errtoken: | |
531 if hasattr(errtoken,"lineno"): lineno = lookahead.li
neno | |
532 else: lineno = 0 | |
533 if lineno: | |
534 sys.stderr.write("yacc: Syntax error at line %d,
token=%s\n" % (lineno, errtoken.type)) | |
535 else: | |
536 sys.stderr.write("yacc: Syntax error, token=%s"
% errtoken.type) | |
537 else: | |
538 sys.stderr.write("yacc: Parse error in input. EOF\n"
) | |
539 return | |
540 | |
541 else: | |
542 errorcount = error_count | |
543 | |
544 # case 1: the statestack only has 1 entry on it. If we're in t
his state, the | |
545 # entire parse has been rolled back and we're completely hosed.
The token is | |
546 # discarded and we just keep going. | |
547 | |
548 if len(statestack) <= 1 and lookahead.type != "$end": | |
549 lookahead = None | |
550 errtoken = None | |
551 state = 0 | |
552 # Nuke the pushback stack | |
553 del lookaheadstack[:] | |
554 continue | |
555 | |
556 # case 2: the statestack has a couple of entries on it, but we'r
e | |
557 # at the end of the file. nuke the top entry and generate an err
or token | |
558 | |
559 # Start nuking entries on the stack | |
560 if lookahead.type == "$end": | |
561 # Whoa. We're really hosed here. Bail out | |
562 return | |
563 | |
564 if lookahead.type != 'error': | |
565 sym = symstack[-1] | |
566 if sym.type == 'error': | |
567 # Hmmm. Error is on top of stack, we'll just nuke input | |
568 # symbol and continue | |
569 lookahead = None | |
570 continue | |
571 t = YaccSymbol() | |
572 t.type = 'error' | |
573 if hasattr(lookahead,"lineno"): | |
574 t.lineno = lookahead.lineno | |
575 t.value = lookahead | |
576 lookaheadstack.append(lookahead) | |
577 lookahead = t | |
578 else: | |
579 symstack.pop() | |
580 statestack.pop() | |
581 state = statestack[-1] # Potential bug fix | |
582 | |
583 continue | |
584 | |
585 # Call an error function here | |
586 raise RuntimeError("yacc: internal parser error!!!\n") | |
587 | |
588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
589 # parseopt(). | |
590 # | |
591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. | |
592 # Edit the debug version above, then copy any modifications to the method | |
593 # below while removing #--! DEBUG sections. | |
594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
595 | |
596 | |
597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): | |
598 lookahead = None # Current lookahead symbol | |
599 lookaheadstack = [ ] # Stack of lookahead symbols | |
600 actions = self.action # Local reference to action table (to a
void lookup on self.) | |
601 goto = self.goto # Local reference to goto table (to avo
id lookup on self.) | |
602 prod = self.productions # Local reference to production list (t
o avoid lookup on self.) | |
603 pslice = YaccProduction(None) # Production object passed to grammar r
ules | |
604 errorcount = 0 # Used during error recovery | |
605 | |
606 # If no lexer was given, we will try to use the lex module | |
607 if not lexer: | |
608 lex = load_ply_lex() | |
609 lexer = lex.lexer | |
610 | |
611 # Set up the lexer and parser objects on pslice | |
612 pslice.lexer = lexer | |
613 pslice.parser = self | |
614 | |
615 # If input was supplied, pass to lexer | |
616 if input is not None: | |
617 lexer.input(input) | |
618 | |
619 if tokenfunc is None: | |
620 # Tokenize function | |
621 get_token = lexer.token | |
622 else: | |
623 get_token = tokenfunc | |
624 | |
625 # Set up the state and symbol stacks | |
626 | |
627 statestack = [ ] # Stack of parsing states | |
628 self.statestack = statestack | |
629 symstack = [ ] # Stack of grammar symbols | |
630 self.symstack = symstack | |
631 | |
632 pslice.stack = symstack # Put in the production | |
633 errtoken = None # Err token | |
634 | |
635 # The start state is assumed to be (0,$end) | |
636 | |
637 statestack.append(0) | |
638 sym = YaccSymbol() | |
639 sym.type = '$end' | |
640 symstack.append(sym) | |
641 state = 0 | |
642 while 1: | |
643 # Get the next symbol on the input. If a lookahead symbol | |
644 # is already set, we just use that. Otherwise, we'll pull | |
645 # the next token off of the lookaheadstack or from the lexer | |
646 | |
647 if not lookahead: | |
648 if not lookaheadstack: | |
649 lookahead = get_token() # Get the next token | |
650 else: | |
651 lookahead = lookaheadstack.pop() | |
652 if not lookahead: | |
653 lookahead = YaccSymbol() | |
654 lookahead.type = '$end' | |
655 | |
656 # Check the action table | |
657 ltype = lookahead.type | |
658 t = actions[state].get(ltype) | |
659 | |
660 if t is not None: | |
661 if t > 0: | |
662 # shift a symbol on the stack | |
663 statestack.append(t) | |
664 state = t | |
665 | |
666 symstack.append(lookahead) | |
667 lookahead = None | |
668 | |
669 # Decrease error count on successful shift | |
670 if errorcount: errorcount -=1 | |
671 continue | |
672 | |
673 if t < 0: | |
674 # reduce a symbol on the stack, emit a production | |
675 p = prod[-t] | |
676 pname = p.name | |
677 plen = p.len | |
678 | |
679 # Get production function | |
680 sym = YaccSymbol() | |
681 sym.type = pname # Production name | |
682 sym.value = None | |
683 | |
684 if plen: | |
685 targ = symstack[-plen-1:] | |
686 targ[0] = sym | |
687 | |
688 # --! TRACKING | |
689 if tracking: | |
690 t1 = targ[1] | |
691 sym.lineno = t1.lineno | |
692 sym.lexpos = t1.lexpos | |
693 t1 = targ[-1] | |
694 sym.endlineno = getattr(t1,"endlineno",t1.lineno) | |
695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) | |
696 | |
697 # --! TRACKING | |
698 | |
699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
700 # The code enclosed in this section is duplicated | |
701 # below as a performance optimization. Make sure | |
702 # changes get made in both locations. | |
703 | |
704 pslice.slice = targ | |
705 | |
706 try: | |
707 # Call the grammar rule with our special slice objec
t | |
708 del symstack[-plen:] | |
709 del statestack[-plen:] | |
710 p.callable(pslice) | |
711 symstack.append(sym) | |
712 state = goto[statestack[-1]][pname] | |
713 statestack.append(state) | |
714 except SyntaxError: | |
715 # If an error was set. Enter error recovery state | |
716 lookaheadstack.append(lookahead) | |
717 symstack.pop() | |
718 statestack.pop() | |
719 state = statestack[-1] | |
720 sym.type = 'error' | |
721 lookahead = sym | |
722 errorcount = error_count | |
723 self.errorok = 0 | |
724 continue | |
725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
726 | |
727 else: | |
728 | |
729 # --! TRACKING | |
730 if tracking: | |
731 sym.lineno = lexer.lineno | |
732 sym.lexpos = lexer.lexpos | |
733 # --! TRACKING | |
734 | |
735 targ = [ sym ] | |
736 | |
737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
738 # The code enclosed in this section is duplicated | |
739 # above as a performance optimization. Make sure | |
740 # changes get made in both locations. | |
741 | |
742 pslice.slice = targ | |
743 | |
744 try: | |
745 # Call the grammar rule with our special slice objec
t | |
746 p.callable(pslice) | |
747 symstack.append(sym) | |
748 state = goto[statestack[-1]][pname] | |
749 statestack.append(state) | |
750 except SyntaxError: | |
751 # If an error was set. Enter error recovery state | |
752 lookaheadstack.append(lookahead) | |
753 symstack.pop() | |
754 statestack.pop() | |
755 state = statestack[-1] | |
756 sym.type = 'error' | |
757 lookahead = sym | |
758 errorcount = error_count | |
759 self.errorok = 0 | |
760 continue | |
761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
762 | |
763 if t == 0: | |
764 n = symstack[-1] | |
765 return getattr(n,"value",None) | |
766 | |
767 if t == None: | |
768 | |
769 # We have some kind of parsing error here. To handle | |
770 # this, we are going to push the current token onto | |
771 # the tokenstack and replace it with an 'error' token. | |
772 # If there are any synchronization rules, they may | |
773 # catch it. | |
774 # | |
775 # In addition to pushing the error token, we call call | |
776 # the user defined p_error() function if this is the | |
777 # first syntax error. This function is only called if | |
778 # errorcount == 0. | |
779 if errorcount == 0 or self.errorok: | |
780 errorcount = error_count | |
781 self.errorok = 0 | |
782 errtoken = lookahead | |
783 if errtoken.type == '$end': | |
784 errtoken = None # End of file! | |
785 if self.errorfunc: | |
786 global errok,token,restart | |
787 errok = self.errok # Set some special functions a
vailable in error recovery | |
788 token = get_token | |
789 restart = self.restart | |
790 if errtoken and not hasattr(errtoken,'lexer'): | |
791 errtoken.lexer = lexer | |
792 tok = self.errorfunc(errtoken) | |
793 del errok, token, restart # Delete special functions | |
794 | |
795 if self.errorok: | |
796 # User must have done some kind of panic | |
797 # mode recovery on their own. The | |
798 # returned token is the next lookahead | |
799 lookahead = tok | |
800 errtoken = None | |
801 continue | |
802 else: | |
803 if errtoken: | |
804 if hasattr(errtoken,"lineno"): lineno = lookahead.li
neno | |
805 else: lineno = 0 | |
806 if lineno: | |
807 sys.stderr.write("yacc: Syntax error at line %d,
token=%s\n" % (lineno, errtoken.type)) | |
808 else: | |
809 sys.stderr.write("yacc: Syntax error, token=%s"
% errtoken.type) | |
810 else: | |
811 sys.stderr.write("yacc: Parse error in input. EOF\n"
) | |
812 return | |
813 | |
814 else: | |
815 errorcount = error_count | |
816 | |
817 # case 1: the statestack only has 1 entry on it. If we're in t
his state, the | |
818 # entire parse has been rolled back and we're completely hosed.
The token is | |
819 # discarded and we just keep going. | |
820 | |
821 if len(statestack) <= 1 and lookahead.type != '$end': | |
822 lookahead = None | |
823 errtoken = None | |
824 state = 0 | |
825 # Nuke the pushback stack | |
826 del lookaheadstack[:] | |
827 continue | |
828 | |
829 # case 2: the statestack has a couple of entries on it, but we'r
e | |
830 # at the end of the file. nuke the top entry and generate an err
or token | |
831 | |
832 # Start nuking entries on the stack | |
833 if lookahead.type == '$end': | |
834 # Whoa. We're really hosed here. Bail out | |
835 return | |
836 | |
837 if lookahead.type != 'error': | |
838 sym = symstack[-1] | |
839 if sym.type == 'error': | |
840 # Hmmm. Error is on top of stack, we'll just nuke input | |
841 # symbol and continue | |
842 lookahead = None | |
843 continue | |
844 t = YaccSymbol() | |
845 t.type = 'error' | |
846 if hasattr(lookahead,"lineno"): | |
847 t.lineno = lookahead.lineno | |
848 t.value = lookahead | |
849 lookaheadstack.append(lookahead) | |
850 lookahead = t | |
851 else: | |
852 symstack.pop() | |
853 statestack.pop() | |
854 state = statestack[-1] # Potential bug fix | |
855 | |
856 continue | |
857 | |
858 # Call an error function here | |
859 raise RuntimeError("yacc: internal parser error!!!\n") | |
860 | |
861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
862 # parseopt_notrack(). | |
863 # | |
864 # Optimized version of parseopt() with line number tracking removed. | |
865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove | |
866 # code in the #--! TRACKING sections | |
867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! | |
868 | |
869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc
=None): | |
870 lookahead = None # Current lookahead symbol | |
871 lookaheadstack = [ ] # Stack of lookahead symbols | |
872 actions = self.action # Local reference to action table (to a
void lookup on self.) | |
873 goto = self.goto # Local reference to goto table (to avo
id lookup on self.) | |
874 prod = self.productions # Local reference to production list (t
o avoid lookup on self.) | |
875 pslice = YaccProduction(None) # Production object passed to grammar r
ules | |
876 errorcount = 0 # Used during error recovery | |
877 | |
878 # If no lexer was given, we will try to use the lex module | |
879 if not lexer: | |
880 lex = load_ply_lex() | |
881 lexer = lex.lexer | |
882 | |
883 # Set up the lexer and parser objects on pslice | |
884 pslice.lexer = lexer | |
885 pslice.parser = self | |
886 | |
887 # If input was supplied, pass to lexer | |
888 if input is not None: | |
889 lexer.input(input) | |
890 | |
891 if tokenfunc is None: | |
892 # Tokenize function | |
893 get_token = lexer.token | |
894 else: | |
895 get_token = tokenfunc | |
896 | |
897 # Set up the state and symbol stacks | |
898 | |
899 statestack = [ ] # Stack of parsing states | |
900 self.statestack = statestack | |
901 symstack = [ ] # Stack of grammar symbols | |
902 self.symstack = symstack | |
903 | |
904 pslice.stack = symstack # Put in the production | |
905 errtoken = None # Err token | |
906 | |
907 # The start state is assumed to be (0,$end) | |
908 | |
909 statestack.append(0) | |
910 sym = YaccSymbol() | |
911 sym.type = '$end' | |
912 symstack.append(sym) | |
913 state = 0 | |
914 while 1: | |
915 # Get the next symbol on the input. If a lookahead symbol | |
916 # is already set, we just use that. Otherwise, we'll pull | |
917 # the next token off of the lookaheadstack or from the lexer | |
918 | |
919 if not lookahead: | |
920 if not lookaheadstack: | |
921 lookahead = get_token() # Get the next token | |
922 else: | |
923 lookahead = lookaheadstack.pop() | |
924 if not lookahead: | |
925 lookahead = YaccSymbol() | |
926 lookahead.type = '$end' | |
927 | |
928 # Check the action table | |
929 ltype = lookahead.type | |
930 t = actions[state].get(ltype) | |
931 | |
932 if t is not None: | |
933 if t > 0: | |
934 # shift a symbol on the stack | |
935 statestack.append(t) | |
936 state = t | |
937 | |
938 symstack.append(lookahead) | |
939 lookahead = None | |
940 | |
941 # Decrease error count on successful shift | |
942 if errorcount: errorcount -=1 | |
943 continue | |
944 | |
945 if t < 0: | |
946 # reduce a symbol on the stack, emit a production | |
947 p = prod[-t] | |
948 pname = p.name | |
949 plen = p.len | |
950 | |
951 # Get production function | |
952 sym = YaccSymbol() | |
953 sym.type = pname # Production name | |
954 sym.value = None | |
955 | |
956 if plen: | |
957 targ = symstack[-plen-1:] | |
958 targ[0] = sym | |
959 | |
960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
961 # The code enclosed in this section is duplicated | |
962 # below as a performance optimization. Make sure | |
963 # changes get made in both locations. | |
964 | |
965 pslice.slice = targ | |
966 | |
967 try: | |
968 # Call the grammar rule with our special slice objec
t | |
969 del symstack[-plen:] | |
970 del statestack[-plen:] | |
971 p.callable(pslice) | |
972 symstack.append(sym) | |
973 state = goto[statestack[-1]][pname] | |
974 statestack.append(state) | |
975 except SyntaxError: | |
976 # If an error was set. Enter error recovery state | |
977 lookaheadstack.append(lookahead) | |
978 symstack.pop() | |
979 statestack.pop() | |
980 state = statestack[-1] | |
981 sym.type = 'error' | |
982 lookahead = sym | |
983 errorcount = error_count | |
984 self.errorok = 0 | |
985 continue | |
986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
987 | |
988 else: | |
989 | |
990 targ = [ sym ] | |
991 | |
992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
993 # The code enclosed in this section is duplicated | |
994 # above as a performance optimization. Make sure | |
995 # changes get made in both locations. | |
996 | |
997 pslice.slice = targ | |
998 | |
999 try: | |
1000 # Call the grammar rule with our special slice objec
t | |
1001 p.callable(pslice) | |
1002 symstack.append(sym) | |
1003 state = goto[statestack[-1]][pname] | |
1004 statestack.append(state) | |
1005 except SyntaxError: | |
1006 # If an error was set. Enter error recovery state | |
1007 lookaheadstack.append(lookahead) | |
1008 symstack.pop() | |
1009 statestack.pop() | |
1010 state = statestack[-1] | |
1011 sym.type = 'error' | |
1012 lookahead = sym | |
1013 errorcount = error_count | |
1014 self.errorok = 0 | |
1015 continue | |
1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
1017 | |
1018 if t == 0: | |
1019 n = symstack[-1] | |
1020 return getattr(n,"value",None) | |
1021 | |
1022 if t == None: | |
1023 | |
1024 # We have some kind of parsing error here. To handle | |
1025 # this, we are going to push the current token onto | |
1026 # the tokenstack and replace it with an 'error' token. | |
1027 # If there are any synchronization rules, they may | |
1028 # catch it. | |
1029 # | |
1030 # In addition to pushing the error token, we call call | |
1031 # the user defined p_error() function if this is the | |
1032 # first syntax error. This function is only called if | |
1033 # errorcount == 0. | |
1034 if errorcount == 0 or self.errorok: | |
1035 errorcount = error_count | |
1036 self.errorok = 0 | |
1037 errtoken = lookahead | |
1038 if errtoken.type == '$end': | |
1039 errtoken = None # End of file! | |
1040 if self.errorfunc: | |
1041 global errok,token,restart | |
1042 errok = self.errok # Set some special functions a
vailable in error recovery | |
1043 token = get_token | |
1044 restart = self.restart | |
1045 if errtoken and not hasattr(errtoken,'lexer'): | |
1046 errtoken.lexer = lexer | |
1047 tok = self.errorfunc(errtoken) | |
1048 del errok, token, restart # Delete special functions | |
1049 | |
1050 if self.errorok: | |
1051 # User must have done some kind of panic | |
1052 # mode recovery on their own. The | |
1053 # returned token is the next lookahead | |
1054 lookahead = tok | |
1055 errtoken = None | |
1056 continue | |
1057 else: | |
1058 if errtoken: | |
1059 if hasattr(errtoken,"lineno"): lineno = lookahead.li
neno | |
1060 else: lineno = 0 | |
1061 if lineno: | |
1062 sys.stderr.write("yacc: Syntax error at line %d,
token=%s\n" % (lineno, errtoken.type)) | |
1063 else: | |
1064 sys.stderr.write("yacc: Syntax error, token=%s"
% errtoken.type) | |
1065 else: | |
1066 sys.stderr.write("yacc: Parse error in input. EOF\n"
) | |
1067 return | |
1068 | |
1069 else: | |
1070 errorcount = error_count | |
1071 | |
1072 # case 1: the statestack only has 1 entry on it. If we're in t
his state, the | |
1073 # entire parse has been rolled back and we're completely hosed.
The token is | |
1074 # discarded and we just keep going. | |
1075 | |
1076 if len(statestack) <= 1 and lookahead.type != '$end': | |
1077 lookahead = None | |
1078 errtoken = None | |
1079 state = 0 | |
1080 # Nuke the pushback stack | |
1081 del lookaheadstack[:] | |
1082 continue | |
1083 | |
1084 # case 2: the statestack has a couple of entries on it, but we'r
e | |
1085 # at the end of the file. nuke the top entry and generate an err
or token | |
1086 | |
1087 # Start nuking entries on the stack | |
1088 if lookahead.type == '$end': | |
1089 # Whoa. We're really hosed here. Bail out | |
1090 return | |
1091 | |
1092 if lookahead.type != 'error': | |
1093 sym = symstack[-1] | |
1094 if sym.type == 'error': | |
1095 # Hmmm. Error is on top of stack, we'll just nuke input | |
1096 # symbol and continue | |
1097 lookahead = None | |
1098 continue | |
1099 t = YaccSymbol() | |
1100 t.type = 'error' | |
1101 if hasattr(lookahead,"lineno"): | |
1102 t.lineno = lookahead.lineno | |
1103 t.value = lookahead | |
1104 lookaheadstack.append(lookahead) | |
1105 lookahead = t | |
1106 else: | |
1107 symstack.pop() | |
1108 statestack.pop() | |
1109 state = statestack[-1] # Potential bug fix | |
1110 | |
1111 continue | |
1112 | |
1113 # Call an error function here | |
1114 raise RuntimeError("yacc: internal parser error!!!\n") | |
1115 | |
1116 # ----------------------------------------------------------------------------- | |
1117 # === Grammar Representation === | |
1118 # | |
1119 # The following functions, classes, and variables are used to represent and | |
1120 # manipulate the rules that make up a grammar. | |
1121 # ----------------------------------------------------------------------------- | |
1122 | |
1123 import re | |
1124 | |
1125 # regex matching identifiers | |
1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') | |
1127 | |
1128 # ----------------------------------------------------------------------------- | |
1129 # class Production: | |
1130 # | |
1131 # This class stores the raw information about a single production or grammar rul
e. | |
1132 # A grammar rule refers to a specification such as this: | |
1133 # | |
1134 # expr : expr PLUS term | |
1135 # | |
1136 # Here are the basic attributes defined on all productions | |
1137 # | |
1138 # name - Name of the production. For example 'expr' | |
1139 # prod - A list of symbols on the right side ['expr','PLUS','term'] | |
1140 # prec - Production precedence level | |
1141 # number - Production number. | |
1142 # func - Function that executes on reduce | |
1143 # file - File where production function is defined | |
1144 # lineno - Line number where production function is defined | |
1145 # | |
1146 # The following attributes are defined or optional. | |
1147 # | |
1148 # len - Length of the production (number of symbols on right hand si
de) | |
1149 # usyms - Set of unique symbols found in the production | |
1150 # ----------------------------------------------------------------------------- | |
1151 | |
1152 class Production(object): | |
1153 reduced = 0 | |
1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',
line=0): | |
1155 self.name = name | |
1156 self.prod = tuple(prod) | |
1157 self.number = number | |
1158 self.func = func | |
1159 self.callable = None | |
1160 self.file = file | |
1161 self.line = line | |
1162 self.prec = precedence | |
1163 | |
1164 # Internal settings used during table construction | |
1165 | |
1166 self.len = len(self.prod) # Length of the production | |
1167 | |
1168 # Create a list of unique production symbols used in the production | |
1169 self.usyms = [ ] | |
1170 for s in self.prod: | |
1171 if s not in self.usyms: | |
1172 self.usyms.append(s) | |
1173 | |
1174 # List of all LR items for the production | |
1175 self.lr_items = [] | |
1176 self.lr_next = None | |
1177 | |
1178 # Create a string representation | |
1179 if self.prod: | |
1180 self.str = "%s -> %s" % (self.name," ".join(self.prod)) | |
1181 else: | |
1182 self.str = "%s -> <empty>" % self.name | |
1183 | |
1184 def __str__(self): | |
1185 return self.str | |
1186 | |
1187 def __repr__(self): | |
1188 return "Production("+str(self)+")" | |
1189 | |
1190 def __len__(self): | |
1191 return len(self.prod) | |
1192 | |
1193 def __nonzero__(self): | |
1194 return 1 | |
1195 | |
1196 def __getitem__(self,index): | |
1197 return self.prod[index] | |
1198 | |
1199 # Return the nth lr_item from the production (or None if at the end) | |
1200 def lr_item(self,n): | |
1201 if n > len(self.prod): return None | |
1202 p = LRItem(self,n) | |
1203 | |
1204 # Precompute the list of productions immediately following. Hack. Remov
e later | |
1205 try: | |
1206 p.lr_after = Prodnames[p.prod[n+1]] | |
1207 except (IndexError,KeyError): | |
1208 p.lr_after = [] | |
1209 try: | |
1210 p.lr_before = p.prod[n-1] | |
1211 except IndexError: | |
1212 p.lr_before = None | |
1213 | |
1214 return p | |
1215 | |
1216 # Bind the production function name to a callable | |
1217 def bind(self,pdict): | |
1218 if self.func: | |
1219 self.callable = pdict[self.func] | |
1220 | |
1221 # This class serves as a minimal standin for Production objects when | |
1222 # reading table data from files. It only contains information | |
1223 # actually used by the LR parsing engine, plus some additional | |
1224 # debugging information. | |
1225 class MiniProduction(object): | |
1226 def __init__(self,str,name,len,func,file,line): | |
1227 self.name = name | |
1228 self.len = len | |
1229 self.func = func | |
1230 self.callable = None | |
1231 self.file = file | |
1232 self.line = line | |
1233 self.str = str | |
1234 def __str__(self): | |
1235 return self.str | |
1236 def __repr__(self): | |
1237 return "MiniProduction(%s)" % self.str | |
1238 | |
1239 # Bind the production function name to a callable | |
1240 def bind(self,pdict): | |
1241 if self.func: | |
1242 self.callable = pdict[self.func] | |
1243 | |
1244 | |
1245 # ----------------------------------------------------------------------------- | |
1246 # class LRItem | |
1247 # | |
1248 # This class represents a specific stage of parsing a production rule. For | |
1249 # example: | |
1250 # | |
1251 # expr : expr . PLUS term | |
1252 # | |
1253 # In the above, the "." represents the current location of the parse. Here | |
1254 # basic attributes: | |
1255 # | |
1256 # name - Name of the production. For example 'expr' | |
1257 # prod - A list of symbols on the right side ['expr','.', 'PLUS','te
rm'] | |
1258 # number - Production number. | |
1259 # | |
1260 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term
' | |
1261 # then lr_next refers to 'expr -> expr PLUS . term' | |
1262 # lr_index - LR item index (location of the ".") in the prod list. | |
1263 # lookaheads - LALR lookahead symbols for this item | |
1264 # len - Length of the production (number of symbols on right hand s
ide) | |
1265 # lr_after - List of all productions that immediately follow | |
1266 # lr_before - Grammar symbol immediately before | |
1267 # ----------------------------------------------------------------------------- | |
1268 | |
1269 class LRItem(object): | |
1270 def __init__(self,p,n): | |
1271 self.name = p.name | |
1272 self.prod = list(p.prod) | |
1273 self.number = p.number | |
1274 self.lr_index = n | |
1275 self.lookaheads = { } | |
1276 self.prod.insert(n,".") | |
1277 self.prod = tuple(self.prod) | |
1278 self.len = len(self.prod) | |
1279 self.usyms = p.usyms | |
1280 | |
1281 def __str__(self): | |
1282 if self.prod: | |
1283 s = "%s -> %s" % (self.name," ".join(self.prod)) | |
1284 else: | |
1285 s = "%s -> <empty>" % self.name | |
1286 return s | |
1287 | |
1288 def __repr__(self): | |
1289 return "LRItem("+str(self)+")" | |
1290 | |
1291 # ----------------------------------------------------------------------------- | |
1292 # rightmost_terminal() | |
1293 # | |
1294 # Return the rightmost terminal from a list of symbols. Used in add_production(
) | |
1295 # ----------------------------------------------------------------------------- | |
1296 def rightmost_terminal(symbols, terminals): | |
1297 i = len(symbols) - 1 | |
1298 while i >= 0: | |
1299 if symbols[i] in terminals: | |
1300 return symbols[i] | |
1301 i -= 1 | |
1302 return None | |
1303 | |
1304 # ----------------------------------------------------------------------------- | |
1305 # === GRAMMAR CLASS === | |
1306 # | |
1307 # The following class represents the contents of the specified grammar along | |
1308 # with various computed properties such as first sets, follow sets, LR items, et
c. | |
1309 # This data is used for critical parts of the table generation process later. | |
1310 # ----------------------------------------------------------------------------- | |
1311 | |
1312 class GrammarError(YaccError): pass | |
1313 | |
1314 class Grammar(object): | |
1315 def __init__(self,terminals): | |
1316 self.Productions = [None] # A list of all of the productions. The fir
st | |
1317 # entry is always reserved for the purpose o
f | |
1318 # building an augmented grammar | |
1319 | |
1320 self.Prodnames = { } # A dictionary mapping the names of nontermi
nals to a list of all | |
1321 # productions of that nonterminal. | |
1322 | |
1323 self.Prodmap = { } # A dictionary that is only used to detect d
uplicate | |
1324 # productions. | |
1325 | |
1326 self.Terminals = { } # A dictionary mapping the names of terminal
symbols to a | |
1327 # list of the rules where they are used. | |
1328 | |
1329 for term in terminals: | |
1330 self.Terminals[term] = [] | |
1331 | |
1332 self.Terminals['error'] = [] | |
1333 | |
1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals
to a list | |
1335 # of rule numbers where they are used. | |
1336 | |
1337 self.First = { } # A dictionary of precomputed FIRST(x) symbo
ls | |
1338 | |
1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symb
ols | |
1340 | |
1341 self.Precedence = { } # Precedence rules for each terminal. Contai
ns tuples of the | |
1342 # form ('right',level) or ('nonassoc', level
) or ('left',level) | |
1343 | |
1344 self.UsedPrecedence = { } # Precedence rules that were actually used b
y the grammer. | |
1345 # This is only used to provide error checkin
g and to generate | |
1346 # a warning about unused precedence rules. | |
1347 | |
1348 self.Start = None # Starting symbol for the grammar | |
1349 | |
1350 | |
1351 def __len__(self): | |
1352 return len(self.Productions) | |
1353 | |
1354 def __getitem__(self,index): | |
1355 return self.Productions[index] | |
1356 | |
1357 # --------------------------------------------------------------------------
--- | |
1358 # set_precedence() | |
1359 # | |
1360 # Sets the precedence for a given terminal. assoc is the associativity such
as | |
1361 # 'left','right', or 'nonassoc'. level is a numeric level. | |
1362 # | |
1363 # --------------------------------------------------------------------------
--- | |
1364 | |
1365 def set_precedence(self,term,assoc,level): | |
1366 assert self.Productions == [None],"Must call set_precedence() before add
_production()" | |
1367 if term in self.Precedence: | |
1368 raise GrammarError("Precedence already specified for terminal '%s'"
% term) | |
1369 if assoc not in ['left','right','nonassoc']: | |
1370 raise GrammarError("Associativity must be one of 'left','right', or
'nonassoc'") | |
1371 self.Precedence[term] = (assoc,level) | |
1372 | |
1373 # --------------------------------------------------------------------------
--- | |
1374 # add_production() | |
1375 # | |
1376 # Given an action function, this function assembles a production rule and | |
1377 # computes its precedence level. | |
1378 # | |
1379 # The production rule is supplied as a list of symbols. For example, | |
1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and | |
1381 # symbols ['expr','PLUS','term']. | |
1382 # | |
1383 # Precedence is determined by the precedence of the right-most non-terminal | |
1384 # or the precedence of a terminal specified by %prec. | |
1385 # | |
1386 # A variety of error checks are performed to make sure production symbols | |
1387 # are valid and that %prec is used correctly. | |
1388 # --------------------------------------------------------------------------
--- | |
1389 | |
1390 def add_production(self,prodname,syms,func=None,file='',line=0): | |
1391 | |
1392 if prodname in self.Terminals: | |
1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined a
s a token" % (file,line,prodname)) | |
1394 if prodname == 'error': | |
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserv
ed word" % (file,line,prodname)) | |
1396 if not _is_identifier.match(prodname): | |
1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prod
name)) | |
1398 | |
1399 # Look for literal tokens | |
1400 for n,s in enumerate(syms): | |
1401 if s[0] in "'\"": | |
1402 try: | |
1403 c = eval(s) | |
1404 if (len(c) > 1): | |
1405 raise GrammarError("%s:%d: Literal token %s in rule '%
s' may only be a single character" % (file,line,s, prodname)) | |
1406 if not c in self.Terminals: | |
1407 self.Terminals[c] = [] | |
1408 syms[n] = c | |
1409 continue | |
1410 except SyntaxError: | |
1411 pass | |
1412 if not _is_identifier.match(s) and s != '%prec': | |
1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (fi
le,line,s, prodname)) | |
1414 | |
1415 # Determine the precedence level | |
1416 if '%prec' in syms: | |
1417 if syms[-1] == '%prec': | |
1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec"
% (file,line)) | |
1419 if syms[-2] != '%prec': | |
1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear
at the end of a grammar rule" % (file,line)) | |
1421 precname = syms[-1] | |
1422 prodprec = self.Precedence.get(precname,None) | |
1423 if not prodprec: | |
1424 raise GrammarError("%s:%d: Nothing known about the precedence of
'%s'" % (file,line,precname)) | |
1425 else: | |
1426 self.UsedPrecedence[precname] = 1 | |
1427 del syms[-2:] # Drop %prec from the rule | |
1428 else: | |
1429 # If no %prec, precedence is determined by the rightmost terminal sy
mbol | |
1430 precname = rightmost_terminal(syms,self.Terminals) | |
1431 prodprec = self.Precedence.get(precname,('right',0)) | |
1432 | |
1433 # See if the rule is already in the rulemap | |
1434 map = "%s -> %s" % (prodname,syms) | |
1435 if map in self.Prodmap: | |
1436 m = self.Prodmap[map] | |
1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + | |
1438 "Previous definition at %s:%d" % (m.file, m.line)
) | |
1439 | |
1440 # From this point on, everything is valid. Create a new Production inst
ance | |
1441 pnumber = len(self.Productions) | |
1442 if not prodname in self.Nonterminals: | |
1443 self.Nonterminals[prodname] = [ ] | |
1444 | |
1445 # Add the production number to Terminals and Nonterminals | |
1446 for t in syms: | |
1447 if t in self.Terminals: | |
1448 self.Terminals[t].append(pnumber) | |
1449 else: | |
1450 if not t in self.Nonterminals: | |
1451 self.Nonterminals[t] = [ ] | |
1452 self.Nonterminals[t].append(pnumber) | |
1453 | |
1454 # Create a production and add it to the list of productions | |
1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line) | |
1456 self.Productions.append(p) | |
1457 self.Prodmap[map] = p | |
1458 | |
1459 # Add to the global productions list | |
1460 try: | |
1461 self.Prodnames[prodname].append(p) | |
1462 except KeyError: | |
1463 self.Prodnames[prodname] = [ p ] | |
1464 return 0 | |
1465 | |
1466 # --------------------------------------------------------------------------
--- | |
1467 # set_start() | |
1468 # | |
1469 # Sets the starting symbol and creates the augmented grammar. Production | |
1470 # rule 0 is S' -> start where start is the start symbol. | |
1471 # --------------------------------------------------------------------------
--- | |
1472 | |
1473 def set_start(self,start=None): | |
1474 if not start: | |
1475 start = self.Productions[1].name | |
1476 if start not in self.Nonterminals: | |
1477 raise GrammarError("start symbol %s undefined" % start) | |
1478 self.Productions[0] = Production(0,"S'",[start]) | |
1479 self.Nonterminals[start].append(0) | |
1480 self.Start = start | |
1481 | |
1482 # --------------------------------------------------------------------------
--- | |
1483 # find_unreachable() | |
1484 # | |
1485 # Find all of the nonterminal symbols that can't be reached from the startin
g | |
1486 # symbol. Returns a list of nonterminals that can't be reached. | |
1487 # --------------------------------------------------------------------------
--- | |
1488 | |
1489 def find_unreachable(self): | |
1490 | |
1491 # Mark all symbols that are reachable from a symbol s | |
1492 def mark_reachable_from(s): | |
1493 if reachable[s]: | |
1494 # We've already reached symbol s. | |
1495 return | |
1496 reachable[s] = 1 | |
1497 for p in self.Prodnames.get(s,[]): | |
1498 for r in p.prod: | |
1499 mark_reachable_from(r) | |
1500 | |
1501 reachable = { } | |
1502 for s in list(self.Terminals) + list(self.Nonterminals): | |
1503 reachable[s] = 0 | |
1504 | |
1505 mark_reachable_from( self.Productions[0].prod[0] ) | |
1506 | |
1507 return [s for s in list(self.Nonterminals) | |
1508 if not reachable[s]] | |
1509 | |
1510 # --------------------------------------------------------------------------
--- | |
1511 # infinite_cycles() | |
1512 # | |
1513 # This function looks at the various parsing rules and tries to detect | |
1514 # infinite recursion cycles (grammar rules where there is no possible way | |
1515 # to derive a string of only terminals). | |
1516 # --------------------------------------------------------------------------
--- | |
1517 | |
1518 def infinite_cycles(self): | |
1519 terminates = {} | |
1520 | |
1521 # Terminals: | |
1522 for t in self.Terminals: | |
1523 terminates[t] = 1 | |
1524 | |
1525 terminates['$end'] = 1 | |
1526 | |
1527 # Nonterminals: | |
1528 | |
1529 # Initialize to false: | |
1530 for n in self.Nonterminals: | |
1531 terminates[n] = 0 | |
1532 | |
1533 # Then propagate termination until no change: | |
1534 while 1: | |
1535 some_change = 0 | |
1536 for (n,pl) in self.Prodnames.items(): | |
1537 # Nonterminal n terminates iff any of its productions terminates
. | |
1538 for p in pl: | |
1539 # Production p terminates iff all of its rhs symbols termina
te. | |
1540 for s in p.prod: | |
1541 if not terminates[s]: | |
1542 # The symbol s does not terminate, | |
1543 # so production p does not terminate. | |
1544 p_terminates = 0 | |
1545 break | |
1546 else: | |
1547 # didn't break from the loop, | |
1548 # so every symbol s terminates | |
1549 # so production p terminates. | |
1550 p_terminates = 1 | |
1551 | |
1552 if p_terminates: | |
1553 # symbol n terminates! | |
1554 if not terminates[n]: | |
1555 terminates[n] = 1 | |
1556 some_change = 1 | |
1557 # Don't need to consider any more productions for this n
. | |
1558 break | |
1559 | |
1560 if not some_change: | |
1561 break | |
1562 | |
1563 infinite = [] | |
1564 for (s,term) in terminates.items(): | |
1565 if not term: | |
1566 if not s in self.Prodnames and not s in self.Terminals and s !=
'error': | |
1567 # s is used-but-not-defined, and we've already warned of tha
t, | |
1568 # so it would be overkill to say that it's also non-terminat
ing. | |
1569 pass | |
1570 else: | |
1571 infinite.append(s) | |
1572 | |
1573 return infinite | |
1574 | |
1575 | |
1576 # --------------------------------------------------------------------------
--- | |
1577 # undefined_symbols() | |
1578 # | |
1579 # Find all symbols that were used the grammar, but not defined as tokens or | |
1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symb
ol | |
1581 # and prod is the production where the symbol was used. | |
1582 # --------------------------------------------------------------------------
--- | |
1583 def undefined_symbols(self): | |
1584 result = [] | |
1585 for p in self.Productions: | |
1586 if not p: continue | |
1587 | |
1588 for s in p.prod: | |
1589 if not s in self.Prodnames and not s in self.Terminals and s !=
'error': | |
1590 result.append((s,p)) | |
1591 return result | |
1592 | |
1593 # --------------------------------------------------------------------------
--- | |
1594 # unused_terminals() | |
1595 # | |
1596 # Find all terminals that were defined, but not used by the grammar. Return
s | |
1597 # a list of all symbols. | |
1598 # --------------------------------------------------------------------------
--- | |
1599 def unused_terminals(self): | |
1600 unused_tok = [] | |
1601 for s,v in self.Terminals.items(): | |
1602 if s != 'error' and not v: | |
1603 unused_tok.append(s) | |
1604 | |
1605 return unused_tok | |
1606 | |
1607 # --------------------------------------------------------------------------
---- | |
1608 # unused_rules() | |
1609 # | |
1610 # Find all grammar rules that were defined, but not used (maybe not reachab
le) | |
1611 # Returns a list of productions. | |
1612 # --------------------------------------------------------------------------
---- | |
1613 | |
1614 def unused_rules(self): | |
1615 unused_prod = [] | |
1616 for s,v in self.Nonterminals.items(): | |
1617 if not v: | |
1618 p = self.Prodnames[s][0] | |
1619 unused_prod.append(p) | |
1620 return unused_prod | |
1621 | |
1622 # --------------------------------------------------------------------------
--- | |
1623 # unused_precedence() | |
1624 # | |
1625 # Returns a list of tuples (term,precedence) corresponding to precedence | |
1626 # rules that were never used by the grammar. term is the name of the termin
al | |
1627 # on which precedence was applied and precedence is a string such as 'left'
or | |
1628 # 'right' corresponding to the type of precedence. | |
1629 # --------------------------------------------------------------------------
--- | |
1630 | |
1631 def unused_precedence(self): | |
1632 unused = [] | |
1633 for termname in self.Precedence: | |
1634 if not (termname in self.Terminals or termname in self.UsedPrecedenc
e): | |
1635 unused.append((termname,self.Precedence[termname][0])) | |
1636 | |
1637 return unused | |
1638 | |
1639 # ------------------------------------------------------------------------- | |
1640 # _first() | |
1641 # | |
1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. | |
1643 # | |
1644 # During execution of compute_first1, the result may be incomplete. | |
1645 # Afterward (e.g., when called from compute_follow()), it will be complete. | |
1646 # ------------------------------------------------------------------------- | |
1647 def _first(self,beta): | |
1648 | |
1649 # We are computing First(x1,x2,x3,...,xn) | |
1650 result = [ ] | |
1651 for x in beta: | |
1652 x_produces_empty = 0 | |
1653 | |
1654 # Add all the non-<empty> symbols of First[x] to the result. | |
1655 for f in self.First[x]: | |
1656 if f == '<empty>': | |
1657 x_produces_empty = 1 | |
1658 else: | |
1659 if f not in result: result.append(f) | |
1660 | |
1661 if x_produces_empty: | |
1662 # We have to consider the next x in beta, | |
1663 # i.e. stay in the loop. | |
1664 pass | |
1665 else: | |
1666 # We don't have to consider any further symbols in beta. | |
1667 break | |
1668 else: | |
1669 # There was no 'break' from the loop, | |
1670 # so x_produces_empty was true for all x in beta, | |
1671 # so beta produces empty as well. | |
1672 result.append('<empty>') | |
1673 | |
1674 return result | |
1675 | |
1676 # ------------------------------------------------------------------------- | |
1677 # compute_first() | |
1678 # | |
1679 # Compute the value of FIRST1(X) for all symbols | |
1680 # ------------------------------------------------------------------------- | |
1681 def compute_first(self): | |
1682 if self.First: | |
1683 return self.First | |
1684 | |
1685 # Terminals: | |
1686 for t in self.Terminals: | |
1687 self.First[t] = [t] | |
1688 | |
1689 self.First['$end'] = ['$end'] | |
1690 | |
1691 # Nonterminals: | |
1692 | |
1693 # Initialize to the empty set: | |
1694 for n in self.Nonterminals: | |
1695 self.First[n] = [] | |
1696 | |
1697 # Then propagate symbols until no change: | |
1698 while 1: | |
1699 some_change = 0 | |
1700 for n in self.Nonterminals: | |
1701 for p in self.Prodnames[n]: | |
1702 for f in self._first(p.prod): | |
1703 if f not in self.First[n]: | |
1704 self.First[n].append( f ) | |
1705 some_change = 1 | |
1706 if not some_change: | |
1707 break | |
1708 | |
1709 return self.First | |
1710 | |
1711 # --------------------------------------------------------------------- | |
1712 # compute_follow() | |
1713 # | |
1714 # Computes all of the follow sets for every non-terminal symbol. The | |
1715 # follow set is the set of all symbols that might follow a given | |
1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189. | |
1717 # --------------------------------------------------------------------- | |
1718 def compute_follow(self,start=None): | |
1719 # If already computed, return the result | |
1720 if self.Follow: | |
1721 return self.Follow | |
1722 | |
1723 # If first sets not computed yet, do that first. | |
1724 if not self.First: | |
1725 self.compute_first() | |
1726 | |
1727 # Add '$end' to the follow list of the start symbol | |
1728 for k in self.Nonterminals: | |
1729 self.Follow[k] = [ ] | |
1730 | |
1731 if not start: | |
1732 start = self.Productions[1].name | |
1733 | |
1734 self.Follow[start] = [ '$end' ] | |
1735 | |
1736 while 1: | |
1737 didadd = 0 | |
1738 for p in self.Productions[1:]: | |
1739 # Here is the production set | |
1740 for i in range(len(p.prod)): | |
1741 B = p.prod[i] | |
1742 if B in self.Nonterminals: | |
1743 # Okay. We got a non-terminal in a production | |
1744 fst = self._first(p.prod[i+1:]) | |
1745 hasempty = 0 | |
1746 for f in fst: | |
1747 if f != '<empty>' and f not in self.Follow[B]: | |
1748 self.Follow[B].append(f) | |
1749 didadd = 1 | |
1750 if f == '<empty>': | |
1751 hasempty = 1 | |
1752 if hasempty or i == (len(p.prod)-1): | |
1753 # Add elements of follow(a) to follow(b) | |
1754 for f in self.Follow[p.name]: | |
1755 if f not in self.Follow[B]: | |
1756 self.Follow[B].append(f) | |
1757 didadd = 1 | |
1758 if not didadd: break | |
1759 return self.Follow | |
1760 | |
1761 | |
1762 # --------------------------------------------------------------------------
--- | |
1763 # build_lritems() | |
1764 # | |
1765 # This function walks the list of productions and builds a complete set of t
he | |
1766 # LR items. The LR items are stored in two ways: First, they are uniquely | |
1767 # numbered and placed in the list _lritems. Second, a linked list of LR ite
ms | |
1768 # is built for each production. For example: | |
1769 # | |
1770 # E -> E PLUS E | |
1771 # | |
1772 # Creates the list | |
1773 # | |
1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] | |
1775 # --------------------------------------------------------------------------
--- | |
1776 | |
1777 def build_lritems(self): | |
1778 for p in self.Productions: | |
1779 lastlri = p | |
1780 i = 0 | |
1781 lr_items = [] | |
1782 while 1: | |
1783 if i > len(p): | |
1784 lri = None | |
1785 else: | |
1786 lri = LRItem(p,i) | |
1787 # Precompute the list of productions immediately following | |
1788 try: | |
1789 lri.lr_after = self.Prodnames[lri.prod[i+1]] | |
1790 except (IndexError,KeyError): | |
1791 lri.lr_after = [] | |
1792 try: | |
1793 lri.lr_before = lri.prod[i-1] | |
1794 except IndexError: | |
1795 lri.lr_before = None | |
1796 | |
1797 lastlri.lr_next = lri | |
1798 if not lri: break | |
1799 lr_items.append(lri) | |
1800 lastlri = lri | |
1801 i += 1 | |
1802 p.lr_items = lr_items | |
1803 | |
1804 # ----------------------------------------------------------------------------- | |
1805 # == Class LRTable == | |
1806 # | |
1807 # This basic class represents a basic table of LR parsing information. | |
1808 # Methods for generating the tables are not defined here. They are defined | |
1809 # in the derived class LRGeneratedTable. | |
1810 # ----------------------------------------------------------------------------- | |
1811 | |
1812 class VersionError(YaccError): pass | |
1813 | |
1814 class LRTable(object): | |
1815 def __init__(self): | |
1816 self.lr_action = None | |
1817 self.lr_goto = None | |
1818 self.lr_productions = None | |
1819 self.lr_method = None | |
1820 | |
1821 def read_table(self,module): | |
1822 if isinstance(module,types.ModuleType): | |
1823 parsetab = module | |
1824 else: | |
1825 if sys.version_info[0] < 3: | |
1826 exec("import %s as parsetab" % module) | |
1827 else: | |
1828 env = { } | |
1829 exec("import %s as parsetab" % module, env, env) | |
1830 parsetab = env['parsetab'] | |
1831 | |
1832 if parsetab._tabversion != __tabversion__: | |
1833 raise VersionError("yacc table file version is out of date") | |
1834 | |
1835 self.lr_action = parsetab._lr_action | |
1836 self.lr_goto = parsetab._lr_goto | |
1837 | |
1838 self.lr_productions = [] | |
1839 for p in parsetab._lr_productions: | |
1840 self.lr_productions.append(MiniProduction(*p)) | |
1841 | |
1842 self.lr_method = parsetab._lr_method | |
1843 return parsetab._lr_signature | |
1844 | |
1845 def read_pickle(self,filename): | |
1846 try: | |
1847 import cPickle as pickle | |
1848 except ImportError: | |
1849 import pickle | |
1850 | |
1851 in_f = open(filename,"rb") | |
1852 | |
1853 tabversion = pickle.load(in_f) | |
1854 if tabversion != __tabversion__: | |
1855 raise VersionError("yacc table file version is out of date") | |
1856 self.lr_method = pickle.load(in_f) | |
1857 signature = pickle.load(in_f) | |
1858 self.lr_action = pickle.load(in_f) | |
1859 self.lr_goto = pickle.load(in_f) | |
1860 productions = pickle.load(in_f) | |
1861 | |
1862 self.lr_productions = [] | |
1863 for p in productions: | |
1864 self.lr_productions.append(MiniProduction(*p)) | |
1865 | |
1866 in_f.close() | |
1867 return signature | |
1868 | |
1869 # Bind all production function names to callable objects in pdict | |
1870 def bind_callables(self,pdict): | |
1871 for p in self.lr_productions: | |
1872 p.bind(pdict) | |
1873 | |
1874 # ----------------------------------------------------------------------------- | |
1875 # === LR Generator === | |
1876 # | |
1877 # The following classes and functions are used to generate LR parsing tables on | |
1878 # a grammar. | |
1879 # ----------------------------------------------------------------------------- | |
1880 | |
1881 # ----------------------------------------------------------------------------- | |
1882 # digraph() | |
1883 # traverse() | |
1884 # | |
1885 # The following two functions are used to compute set valued functions | |
1886 # of the form: | |
1887 # | |
1888 # F(x) = F'(x) U U{F(y) | x R y} | |
1889 # | |
1890 # This is used to compute the values of Read() sets as well as FOLLOW sets | |
1891 # in LALR(1) generation. | |
1892 # | |
1893 # Inputs: X - An input set | |
1894 # R - A relation | |
1895 # FP - Set-valued function | |
1896 # ------------------------------------------------------------------------------ | |
1897 | |
1898 def digraph(X,R,FP): | |
1899 N = { } | |
1900 for x in X: | |
1901 N[x] = 0 | |
1902 stack = [] | |
1903 F = { } | |
1904 for x in X: | |
1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) | |
1906 return F | |
1907 | |
1908 def traverse(x,N,stack,F,X,R,FP): | |
1909 stack.append(x) | |
1910 d = len(stack) | |
1911 N[x] = d | |
1912 F[x] = FP(x) # F(X) <- F'(x) | |
1913 | |
1914 rel = R(x) # Get y's related to x | |
1915 for y in rel: | |
1916 if N[y] == 0: | |
1917 traverse(y,N,stack,F,X,R,FP) | |
1918 N[x] = min(N[x],N[y]) | |
1919 for a in F.get(y,[]): | |
1920 if a not in F[x]: F[x].append(a) | |
1921 if N[x] == d: | |
1922 N[stack[-1]] = MAXINT | |
1923 F[stack[-1]] = F[x] | |
1924 element = stack.pop() | |
1925 while element != x: | |
1926 N[stack[-1]] = MAXINT | |
1927 F[stack[-1]] = F[x] | |
1928 element = stack.pop() | |
1929 | |
1930 class LALRError(YaccError): pass | |
1931 | |
1932 # ----------------------------------------------------------------------------- | |
1933 # == LRGeneratedTable == | |
1934 # | |
1935 # This class implements the LR table generation algorithm. There are no | |
1936 # public methods except for write() | |
1937 # ----------------------------------------------------------------------------- | |
1938 | |
1939 class LRGeneratedTable(LRTable): | |
1940 def __init__(self,grammar,method='LALR',log=None): | |
1941 if method not in ['SLR','LALR']: | |
1942 raise LALRError("Unsupported method %s" % method) | |
1943 | |
1944 self.grammar = grammar | |
1945 self.lr_method = method | |
1946 | |
1947 # Set up the logger | |
1948 if not log: | |
1949 log = NullLogger() | |
1950 self.log = log | |
1951 | |
1952 # Internal attributes | |
1953 self.lr_action = {} # Action table | |
1954 self.lr_goto = {} # Goto table | |
1955 self.lr_productions = grammar.Productions # Copy of grammar Producti
on array | |
1956 self.lr_goto_cache = {} # Cache of computed gotos | |
1957 self.lr0_cidhash = {} # Cache of closures | |
1958 | |
1959 self._add_count = 0 # Internal counter used to detect cycles | |
1960 | |
1961 # Diagonistic information filled in by the table generator | |
1962 self.sr_conflict = 0 | |
1963 self.rr_conflict = 0 | |
1964 self.conflicts = [] # List of conflicts | |
1965 | |
1966 self.sr_conflicts = [] | |
1967 self.rr_conflicts = [] | |
1968 | |
1969 # Build the tables | |
1970 self.grammar.build_lritems() | |
1971 self.grammar.compute_first() | |
1972 self.grammar.compute_follow() | |
1973 self.lr_parse_table() | |
1974 | |
1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. | |
1976 | |
1977 def lr0_closure(self,I): | |
1978 self._add_count += 1 | |
1979 | |
1980 # Add everything in I to J | |
1981 J = I[:] | |
1982 didadd = 1 | |
1983 while didadd: | |
1984 didadd = 0 | |
1985 for j in J: | |
1986 for x in j.lr_after: | |
1987 if getattr(x,"lr0_added",0) == self._add_count: continue | |
1988 # Add B --> .G to J | |
1989 J.append(x.lr_next) | |
1990 x.lr0_added = self._add_count | |
1991 didadd = 1 | |
1992 | |
1993 return J | |
1994 | |
1995 # Compute the LR(0) goto function goto(I,X) where I is a set | |
1996 # of LR(0) items and X is a grammar symbol. This function is written | |
1997 # in a way that guarantees uniqueness of the generated goto sets | |
1998 # (i.e. the same goto set will never be returned as two different Python | |
1999 # objects). With uniqueness, we can later do fast set comparisons using | |
2000 # id(obj) instead of element-wise comparison. | |
2001 | |
2002 def lr0_goto(self,I,x): | |
2003 # First we look for a previously cached entry | |
2004 g = self.lr_goto_cache.get((id(I),x),None) | |
2005 if g: return g | |
2006 | |
2007 # Now we generate the goto set in a way that guarantees uniqueness | |
2008 # of the result | |
2009 | |
2010 s = self.lr_goto_cache.get(x,None) | |
2011 if not s: | |
2012 s = { } | |
2013 self.lr_goto_cache[x] = s | |
2014 | |
2015 gs = [ ] | |
2016 for p in I: | |
2017 n = p.lr_next | |
2018 if n and n.lr_before == x: | |
2019 s1 = s.get(id(n),None) | |
2020 if not s1: | |
2021 s1 = { } | |
2022 s[id(n)] = s1 | |
2023 gs.append(n) | |
2024 s = s1 | |
2025 g = s.get('$end',None) | |
2026 if not g: | |
2027 if gs: | |
2028 g = self.lr0_closure(gs) | |
2029 s['$end'] = g | |
2030 else: | |
2031 s['$end'] = gs | |
2032 self.lr_goto_cache[(id(I),x)] = g | |
2033 return g | |
2034 | |
2035 # Compute the LR(0) sets of item function | |
2036 def lr0_items(self): | |
2037 | |
2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] | |
2039 i = 0 | |
2040 for I in C: | |
2041 self.lr0_cidhash[id(I)] = i | |
2042 i += 1 | |
2043 | |
2044 # Loop over the items in C and each grammar symbols | |
2045 i = 0 | |
2046 while i < len(C): | |
2047 I = C[i] | |
2048 i += 1 | |
2049 | |
2050 # Collect all of the symbols that could possibly be in the goto(I,X)
sets | |
2051 asyms = { } | |
2052 for ii in I: | |
2053 for s in ii.usyms: | |
2054 asyms[s] = None | |
2055 | |
2056 for x in asyms: | |
2057 g = self.lr0_goto(I,x) | |
2058 if not g: continue | |
2059 if id(g) in self.lr0_cidhash: continue | |
2060 self.lr0_cidhash[id(g)] = len(C) | |
2061 C.append(g) | |
2062 | |
2063 return C | |
2064 | |
2065 # --------------------------------------------------------------------------
--- | |
2066 # ==== LALR(1) Parsing ==== | |
2067 # | |
2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of | |
2069 # relying upon Follow() sets when performing reductions, a more selective | |
2070 # lookahead set that incorporates the state of the LR(0) machine is utilized
. | |
2071 # Thus, we mainly just have to focus on calculating the lookahead sets. | |
2072 # | |
2073 # The method used here is due to DeRemer and Pennelo (1982). | |
2074 # | |
2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) | |
2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems
, | |
2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 | |
2078 # | |
2079 # Further details can also be found in: | |
2080 # | |
2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing
", | |
2082 # McGraw-Hill Book Company, (1985). | |
2083 # | |
2084 # --------------------------------------------------------------------------
--- | |
2085 | |
2086 # --------------------------------------------------------------------------
--- | |
2087 # compute_nullable_nonterminals() | |
2088 # | |
2089 # Creates a dictionary containing all of the non-terminals that might produc
e | |
2090 # an empty production. | |
2091 # --------------------------------------------------------------------------
--- | |
2092 | |
2093 def compute_nullable_nonterminals(self): | |
2094 nullable = {} | |
2095 num_nullable = 0 | |
2096 while 1: | |
2097 for p in self.grammar.Productions[1:]: | |
2098 if p.len == 0: | |
2099 nullable[p.name] = 1 | |
2100 continue | |
2101 for t in p.prod: | |
2102 if not t in nullable: break | |
2103 else: | |
2104 nullable[p.name] = 1 | |
2105 if len(nullable) == num_nullable: break | |
2106 num_nullable = len(nullable) | |
2107 return nullable | |
2108 | |
2109 # --------------------------------------------------------------------------
--- | |
2110 # find_nonterminal_trans(C) | |
2111 # | |
2112 # Given a set of LR(0) items, this functions finds all of the non-terminal | |
2113 # transitions. These are transitions in which a dot appears immediately b
efore | |
2114 # a non-terminal. Returns a list of tuples of the form (state,N) where sta
te | |
2115 # is the state number and N is the nonterminal symbol. | |
2116 # | |
2117 # The input C is the set of LR(0) items. | |
2118 # --------------------------------------------------------------------------
--- | |
2119 | |
2120 def find_nonterminal_transitions(self,C): | |
2121 trans = [] | |
2122 for state in range(len(C)): | |
2123 for p in C[state]: | |
2124 if p.lr_index < p.len - 1: | |
2125 t = (state,p.prod[p.lr_index+1]) | |
2126 if t[1] in self.grammar.Nonterminals: | |
2127 if t not in trans: trans.append(t) | |
2128 state = state + 1 | |
2129 return trans | |
2130 | |
2131 # --------------------------------------------------------------------------
--- | |
2132 # dr_relation() | |
2133 # | |
2134 # Computes the DR(p,A) relationships for non-terminal transitions. The inpu
t | |
2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol
. | |
2136 # | |
2137 # Returns a list of terminals. | |
2138 # --------------------------------------------------------------------------
--- | |
2139 | |
2140 def dr_relation(self,C,trans,nullable): | |
2141 dr_set = { } | |
2142 state,N = trans | |
2143 terms = [] | |
2144 | |
2145 g = self.lr0_goto(C[state],N) | |
2146 for p in g: | |
2147 if p.lr_index < p.len - 1: | |
2148 a = p.prod[p.lr_index+1] | |
2149 if a in self.grammar.Terminals: | |
2150 if a not in terms: terms.append(a) | |
2151 | |
2152 # This extra bit is to handle the start state | |
2153 if state == 0 and N == self.grammar.Productions[0].prod[0]: | |
2154 terms.append('$end') | |
2155 | |
2156 return terms | |
2157 | |
2158 # --------------------------------------------------------------------------
--- | |
2159 # reads_relation() | |
2160 # | |
2161 # Computes the READS() relation (p,A) READS (t,C). | |
2162 # --------------------------------------------------------------------------
--- | |
2163 | |
2164 def reads_relation(self,C, trans, empty): | |
2165 # Look for empty transitions | |
2166 rel = [] | |
2167 state, N = trans | |
2168 | |
2169 g = self.lr0_goto(C[state],N) | |
2170 j = self.lr0_cidhash.get(id(g),-1) | |
2171 for p in g: | |
2172 if p.lr_index < p.len - 1: | |
2173 a = p.prod[p.lr_index + 1] | |
2174 if a in empty: | |
2175 rel.append((j,a)) | |
2176 | |
2177 return rel | |
2178 | |
2179 # --------------------------------------------------------------------------
--- | |
2180 # compute_lookback_includes() | |
2181 # | |
2182 # Determines the lookback and includes relations | |
2183 # | |
2184 # LOOKBACK: | |
2185 # | |
2186 # This relation is determined by running the LR(0) state machine forward. | |
2187 # For example, starting with a production "N : . A B C", we run it forward | |
2188 # to obtain "N : A B C ." We then build a relationship between this final | |
2189 # state and the starting state. These relationships are stored in a dictio
nary | |
2190 # lookdict. | |
2191 # | |
2192 # INCLUDES: | |
2193 # | |
2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). | |
2195 # | |
2196 # This relation is used to determine non-terminal transitions that occur | |
2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) | |
2198 # if the following holds: | |
2199 # | |
2200 # B -> LAT, where T -> epsilon and p' -L-> p | |
2201 # | |
2202 # L is essentially a prefix (which may be empty), T is a suffix that must be | |
2203 # able to derive an empty string. State p' must lead to state p with the st
ring L. | |
2204 # | |
2205 # --------------------------------------------------------------------------
--- | |
2206 | |
2207 def compute_lookback_includes(self,C,trans,nullable): | |
2208 | |
2209 lookdict = {} # Dictionary of lookback relations | |
2210 includedict = {} # Dictionary of include relations | |
2211 | |
2212 # Make a dictionary of non-terminal transitions | |
2213 dtrans = {} | |
2214 for t in trans: | |
2215 dtrans[t] = 1 | |
2216 | |
2217 # Loop over all transitions and compute lookbacks and includes | |
2218 for state,N in trans: | |
2219 lookb = [] | |
2220 includes = [] | |
2221 for p in C[state]: | |
2222 if p.name != N: continue | |
2223 | |
2224 # Okay, we have a name match. We now follow the production all
the way | |
2225 # through the state machine until we get the . on the right hand
side | |
2226 | |
2227 lr_index = p.lr_index | |
2228 j = state | |
2229 while lr_index < p.len - 1: | |
2230 lr_index = lr_index + 1 | |
2231 t = p.prod[lr_index] | |
2232 | |
2233 # Check to see if this symbol and state are a non-terminal
transition | |
2234 if (j,t) in dtrans: | |
2235 # Yes. Okay, there is some chance that this is an in
cludes relation | |
2236 # the only way to know for certain is whether the res
t of the | |
2237 # production derives empty | |
2238 | |
2239 li = lr_index + 1 | |
2240 while li < p.len: | |
2241 if p.prod[li] in self.grammar.Terminals: break
# No forget it | |
2242 if not p.prod[li] in nullable: break | |
2243 li = li + 1 | |
2244 else: | |
2245 # Appears to be a relation between (j,t) and (st
ate,N) | |
2246 includes.append((j,t)) | |
2247 | |
2248 g = self.lr0_goto(C[j],t) # Go to next set | |
2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state | |
2250 | |
2251 # When we get here, j is the final state, now we have to locate
the production | |
2252 for r in C[j]: | |
2253 if r.name != p.name: continue | |
2254 if r.len != p.len: continue | |
2255 i = 0 | |
2256 # This look is comparing a production ". A B C" with "A B C
." | |
2257 while i < r.lr_index: | |
2258 if r.prod[i] != p.prod[i+1]: break | |
2259 i = i + 1 | |
2260 else: | |
2261 lookb.append((j,r)) | |
2262 for i in includes: | |
2263 if not i in includedict: includedict[i] = [] | |
2264 includedict[i].append((state,N)) | |
2265 lookdict[(state,N)] = lookb | |
2266 | |
2267 return lookdict,includedict | |
2268 | |
2269 # --------------------------------------------------------------------------
--- | |
2270 # compute_read_sets() | |
2271 # | |
2272 # Given a set of LR(0) items, this function computes the read sets. | |
2273 # | |
2274 # Inputs: C = Set of LR(0) items | |
2275 # ntrans = Set of nonterminal transitions | |
2276 # nullable = Set of empty transitions | |
2277 # | |
2278 # Returns a set containing the read sets | |
2279 # --------------------------------------------------------------------------
--- | |
2280 | |
2281 def compute_read_sets(self,C, ntrans, nullable): | |
2282 FP = lambda x: self.dr_relation(C,x,nullable) | |
2283 R = lambda x: self.reads_relation(C,x,nullable) | |
2284 F = digraph(ntrans,R,FP) | |
2285 return F | |
2286 | |
2287 # --------------------------------------------------------------------------
--- | |
2288 # compute_follow_sets() | |
2289 # | |
2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, | |
2291 # and an include set, this function computes the follow sets | |
2292 # | |
2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} | |
2294 # | |
2295 # Inputs: | |
2296 # ntrans = Set of nonterminal transitions | |
2297 # readsets = Readset (previously computed) | |
2298 # inclsets = Include sets (previously computed) | |
2299 # | |
2300 # Returns a set containing the follow sets | |
2301 # --------------------------------------------------------------------------
--- | |
2302 | |
2303 def compute_follow_sets(self,ntrans,readsets,inclsets): | |
2304 FP = lambda x: readsets[x] | |
2305 R = lambda x: inclsets.get(x,[]) | |
2306 F = digraph(ntrans,R,FP) | |
2307 return F | |
2308 | |
2309 # --------------------------------------------------------------------------
--- | |
2310 # add_lookaheads() | |
2311 # | |
2312 # Attaches the lookahead symbols to grammar rules. | |
2313 # | |
2314 # Inputs: lookbacks - Set of lookback relations | |
2315 # followset - Computed follow set | |
2316 # | |
2317 # This function directly attaches the lookaheads to productions contained | |
2318 # in the lookbacks set | |
2319 # --------------------------------------------------------------------------
--- | |
2320 | |
2321 def add_lookaheads(self,lookbacks,followset): | |
2322 for trans,lb in lookbacks.items(): | |
2323 # Loop over productions in lookback | |
2324 for state,p in lb: | |
2325 if not state in p.lookaheads: | |
2326 p.lookaheads[state] = [] | |
2327 f = followset.get(trans,[]) | |
2328 for a in f: | |
2329 if a not in p.lookaheads[state]: p.lookaheads[state].appen
d(a) | |
2330 | |
2331 # --------------------------------------------------------------------------
--- | |
2332 # add_lalr_lookaheads() | |
2333 # | |
2334 # This function does all of the work of adding lookahead information for use | |
2335 # with LALR parsing | |
2336 # --------------------------------------------------------------------------
--- | |
2337 | |
2338 def add_lalr_lookaheads(self,C): | |
2339 # Determine all of the nullable nonterminals | |
2340 nullable = self.compute_nullable_nonterminals() | |
2341 | |
2342 # Find all non-terminal transitions | |
2343 trans = self.find_nonterminal_transitions(C) | |
2344 | |
2345 # Compute read sets | |
2346 readsets = self.compute_read_sets(C,trans,nullable) | |
2347 | |
2348 # Compute lookback/includes relations | |
2349 lookd, included = self.compute_lookback_includes(C,trans,nullable) | |
2350 | |
2351 # Compute LALR FOLLOW sets | |
2352 followsets = self.compute_follow_sets(trans,readsets,included) | |
2353 | |
2354 # Add all of the lookaheads | |
2355 self.add_lookaheads(lookd,followsets) | |
2356 | |
2357 # --------------------------------------------------------------------------
--- | |
2358 # lr_parse_table() | |
2359 # | |
2360 # This function constructs the parse tables for SLR or LALR | |
2361 # --------------------------------------------------------------------------
--- | |
2362 def lr_parse_table(self): | |
2363 Productions = self.grammar.Productions | |
2364 Precedence = self.grammar.Precedence | |
2365 goto = self.lr_goto # Goto array | |
2366 action = self.lr_action # Action array | |
2367 log = self.log # Logger for output | |
2368 | |
2369 actionp = { } # Action production array (temporary) | |
2370 | |
2371 log.info("Parsing method: %s", self.lr_method) | |
2372 | |
2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items | |
2374 # This determines the number of states | |
2375 | |
2376 C = self.lr0_items() | |
2377 | |
2378 if self.lr_method == 'LALR': | |
2379 self.add_lalr_lookaheads(C) | |
2380 | |
2381 # Build the parser table, state by state | |
2382 st = 0 | |
2383 for I in C: | |
2384 # Loop over each production in I | |
2385 actlist = [ ] # List of actions | |
2386 st_action = { } | |
2387 st_actionp = { } | |
2388 st_goto = { } | |
2389 log.info("") | |
2390 log.info("state %d", st) | |
2391 log.info("") | |
2392 for p in I: | |
2393 log.info(" (%d) %s", p.number, str(p)) | |
2394 log.info("") | |
2395 | |
2396 for p in I: | |
2397 if p.len == p.lr_index + 1: | |
2398 if p.name == "S'": | |
2399 # Start symbol. Accept! | |
2400 st_action["$end"] = 0 | |
2401 st_actionp["$end"] = p | |
2402 else: | |
2403 # We are at the end of a production. Reduce! | |
2404 if self.lr_method == 'LALR': | |
2405 laheads = p.lookaheads[st] | |
2406 else: | |
2407 laheads = self.grammar.Follow[p.name] | |
2408 for a in laheads: | |
2409 actlist.append((a,p,"reduce using rule %d (%s)"
% (p.number,p))) | |
2410 r = st_action.get(a,None) | |
2411 if r is not None: | |
2412 # Whoa. Have a shift/reduce or reduce/reduce
conflict | |
2413 if r > 0: | |
2414 # Need to decide on shift or reduce here | |
2415 # By default we favor shifting. Need to
add | |
2416 # some precedence rules here. | |
2417 sprec,slevel = Productions[st_actionp[a]
.number].prec | |
2418 rprec,rlevel = Precedence.get(a,('right'
,0)) | |
2419 if (slevel < rlevel) or ((slevel == rlev
el) and (rprec == 'left')): | |
2420 # We really need to reduce here. | |
2421 st_action[a] = -p.number | |
2422 st_actionp[a] = p | |
2423 if not slevel and not rlevel: | |
2424 log.info(" ! shift/reduce confl
ict for %s resolved as reduce",a) | |
2425 self.sr_conflicts.append((st,a,'
reduce')) | |
2426 Productions[p.number].reduced += 1 | |
2427 elif (slevel == rlevel) and (rprec == 'n
onassoc'): | |
2428 st_action[a] = None | |
2429 else: | |
2430 # Hmmm. Guess we'll keep the shift | |
2431 if not rlevel: | |
2432 log.info(" ! shift/reduce confl
ict for %s resolved as shift",a) | |
2433 self.sr_conflicts.append((st,a,'
shift')) | |
2434 elif r < 0: | |
2435 # Reduce/reduce conflict. In this case
, we favor the rule | |
2436 # that was defined first in the grammar
file | |
2437 oldp = Productions[-r] | |
2438 pp = Productions[p.number] | |
2439 if oldp.line > pp.line: | |
2440 st_action[a] = -p.number | |
2441 st_actionp[a] = p | |
2442 chosenp,rejectp = pp,oldp | |
2443 Productions[p.number].reduced += 1 | |
2444 Productions[oldp.number].reduced -=
1 | |
2445 else: | |
2446 chosenp,rejectp = oldp,pp | |
2447 self.rr_conflicts.append((st,chosenp,rej
ectp)) | |
2448 log.info(" ! reduce/reduce conflict for
%s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) | |
2449 else: | |
2450 raise LALRError("Unknown conflict in sta
te %d" % st) | |
2451 else: | |
2452 st_action[a] = -p.number | |
2453 st_actionp[a] = p | |
2454 Productions[p.number].reduced += 1 | |
2455 else: | |
2456 i = p.lr_index | |
2457 a = p.prod[i+1] # Get symbol right after the "." | |
2458 if a in self.grammar.Terminals: | |
2459 g = self.lr0_goto(I,a) | |
2460 j = self.lr0_cidhash.get(id(g),-1) | |
2461 if j >= 0: | |
2462 # We are in a shift state | |
2463 actlist.append((a,p,"shift and go to state %d" %
j)) | |
2464 r = st_action.get(a,None) | |
2465 if r is not None: | |
2466 # Whoa have a shift/reduce or shift/shift co
nflict | |
2467 if r > 0: | |
2468 if r != j: | |
2469 raise LALRError("Shift/shift conflic
t in state %d" % st) | |
2470 elif r < 0: | |
2471 # Do a precedence check. | |
2472 # - if precedence of reduce rule is h
igher, we reduce. | |
2473 # - if precedence of reduce is same a
nd left assoc, we reduce. | |
2474 # - otherwise we shift | |
2475 rprec,rlevel = Productions[st_actionp[a]
.number].prec | |
2476 sprec,slevel = Precedence.get(a,('right'
,0)) | |
2477 if (slevel > rlevel) or ((slevel == rlev
el) and (rprec == 'right')): | |
2478 # We decide to shift here... highest
precedence to shift | |
2479 Productions[st_actionp[a].number].re
duced -= 1 | |
2480 st_action[a] = j | |
2481 st_actionp[a] = p | |
2482 if not rlevel: | |
2483 log.info(" ! shift/reduce confl
ict for %s resolved as shift",a) | |
2484 self.sr_conflicts.append((st,a,'
shift')) | |
2485 elif (slevel == rlevel) and (rprec == 'n
onassoc'): | |
2486 st_action[a] = None | |
2487 else: | |
2488 # Hmmm. Guess we'll keep the reduce | |
2489 if not slevel and not rlevel: | |
2490 log.info(" ! shift/reduce confl
ict for %s resolved as reduce",a) | |
2491 self.sr_conflicts.append((st,a,'
reduce')) | |
2492 | |
2493 else: | |
2494 raise LALRError("Unknown conflict in sta
te %d" % st) | |
2495 else: | |
2496 st_action[a] = j | |
2497 st_actionp[a] = p | |
2498 | |
2499 # Print the actions associated with each terminal | |
2500 _actprint = { } | |
2501 for a,p,m in actlist: | |
2502 if a in st_action: | |
2503 if p is st_actionp[a]: | |
2504 log.info(" %-15s %s",a,m) | |
2505 _actprint[(a,m)] = 1 | |
2506 log.info("") | |
2507 # Print the actions that were not used. (debugging) | |
2508 not_used = 0 | |
2509 for a,p,m in actlist: | |
2510 if a in st_action: | |
2511 if p is not st_actionp[a]: | |
2512 if not (a,m) in _actprint: | |
2513 log.debug(" ! %-15s [ %s ]",a,m) | |
2514 not_used = 1 | |
2515 _actprint[(a,m)] = 1 | |
2516 if not_used: | |
2517 log.debug("") | |
2518 | |
2519 # Construct the goto table for this state | |
2520 | |
2521 nkeys = { } | |
2522 for ii in I: | |
2523 for s in ii.usyms: | |
2524 if s in self.grammar.Nonterminals: | |
2525 nkeys[s] = None | |
2526 for n in nkeys: | |
2527 g = self.lr0_goto(I,n) | |
2528 j = self.lr0_cidhash.get(id(g),-1) | |
2529 if j >= 0: | |
2530 st_goto[n] = j | |
2531 log.info(" %-30s shift and go to state %d",n,j) | |
2532 | |
2533 action[st] = st_action | |
2534 actionp[st] = st_actionp | |
2535 goto[st] = st_goto | |
2536 st += 1 | |
2537 | |
2538 | |
2539 # --------------------------------------------------------------------------
--- | |
2540 # write() | |
2541 # | |
2542 # This function writes the LR parsing tables to a file | |
2543 # --------------------------------------------------------------------------
--- | |
2544 | |
2545 def write_table(self,modulename,outputdir='',signature=""): | |
2546 basemodulename = modulename.split(".")[-1] | |
2547 filename = os.path.join(outputdir,basemodulename) + ".py" | |
2548 try: | |
2549 f = open(filename,"w") | |
2550 | |
2551 f.write(""" | |
2552 # %s | |
2553 # This file is automatically generated. Do not edit. | |
2554 _tabversion = %r | |
2555 | |
2556 _lr_method = %r | |
2557 | |
2558 _lr_signature = %r | |
2559 """ % (filename, __tabversion__, self.lr_method, signature)) | |
2560 | |
2561 # Change smaller to 0 to go back to original tables | |
2562 smaller = 1 | |
2563 | |
2564 # Factor out names to try and make smaller | |
2565 if smaller: | |
2566 items = { } | |
2567 | |
2568 for s,nd in self.lr_action.items(): | |
2569 for name,v in nd.items(): | |
2570 i = items.get(name) | |
2571 if not i: | |
2572 i = ([],[]) | |
2573 items[name] = i | |
2574 i[0].append(s) | |
2575 i[1].append(v) | |
2576 | |
2577 f.write("\n_lr_action_items = {") | |
2578 for k,v in items.items(): | |
2579 f.write("%r:([" % k) | |
2580 for i in v[0]: | |
2581 f.write("%r," % i) | |
2582 f.write("],[") | |
2583 for i in v[1]: | |
2584 f.write("%r," % i) | |
2585 | |
2586 f.write("]),") | |
2587 f.write("}\n") | |
2588 | |
2589 f.write(""" | |
2590 _lr_action = { } | |
2591 for _k, _v in _lr_action_items.items(): | |
2592 for _x,_y in zip(_v[0],_v[1]): | |
2593 if not _x in _lr_action: _lr_action[_x] = { } | |
2594 _lr_action[_x][_k] = _y | |
2595 del _lr_action_items | |
2596 """) | |
2597 | |
2598 else: | |
2599 f.write("\n_lr_action = { "); | |
2600 for k,v in self.lr_action.items(): | |
2601 f.write("(%r,%r):%r," % (k[0],k[1],v)) | |
2602 f.write("}\n"); | |
2603 | |
2604 if smaller: | |
2605 # Factor out names to try and make smaller | |
2606 items = { } | |
2607 | |
2608 for s,nd in self.lr_goto.items(): | |
2609 for name,v in nd.items(): | |
2610 i = items.get(name) | |
2611 if not i: | |
2612 i = ([],[]) | |
2613 items[name] = i | |
2614 i[0].append(s) | |
2615 i[1].append(v) | |
2616 | |
2617 f.write("\n_lr_goto_items = {") | |
2618 for k,v in items.items(): | |
2619 f.write("%r:([" % k) | |
2620 for i in v[0]: | |
2621 f.write("%r," % i) | |
2622 f.write("],[") | |
2623 for i in v[1]: | |
2624 f.write("%r," % i) | |
2625 | |
2626 f.write("]),") | |
2627 f.write("}\n") | |
2628 | |
2629 f.write(""" | |
2630 _lr_goto = { } | |
2631 for _k, _v in _lr_goto_items.items(): | |
2632 for _x,_y in zip(_v[0],_v[1]): | |
2633 if not _x in _lr_goto: _lr_goto[_x] = { } | |
2634 _lr_goto[_x][_k] = _y | |
2635 del _lr_goto_items | |
2636 """) | |
2637 else: | |
2638 f.write("\n_lr_goto = { "); | |
2639 for k,v in self.lr_goto.items(): | |
2640 f.write("(%r,%r):%r," % (k[0],k[1],v)) | |
2641 f.write("}\n"); | |
2642 | |
2643 # Write production table | |
2644 f.write("_lr_productions = [\n") | |
2645 for p in self.lr_productions: | |
2646 if p.func: | |
2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p
.func,p.file,p.line)) | |
2648 else: | |
2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p
.len)) | |
2650 f.write("]\n") | |
2651 f.close() | |
2652 | |
2653 except IOError: | |
2654 e = sys.exc_info()[1] | |
2655 sys.stderr.write("Unable to create '%s'\n" % filename) | |
2656 sys.stderr.write(str(e)+"\n") | |
2657 return | |
2658 | |
2659 | |
2660 # --------------------------------------------------------------------------
--- | |
2661 # pickle_table() | |
2662 # | |
2663 # This function pickles the LR parsing tables to a supplied file object | |
2664 # --------------------------------------------------------------------------
--- | |
2665 | |
2666 def pickle_table(self,filename,signature=""): | |
2667 try: | |
2668 import cPickle as pickle | |
2669 except ImportError: | |
2670 import pickle | |
2671 outf = open(filename,"wb") | |
2672 pickle.dump(__tabversion__,outf,pickle_protocol) | |
2673 pickle.dump(self.lr_method,outf,pickle_protocol) | |
2674 pickle.dump(signature,outf,pickle_protocol) | |
2675 pickle.dump(self.lr_action,outf,pickle_protocol) | |
2676 pickle.dump(self.lr_goto,outf,pickle_protocol) | |
2677 | |
2678 outp = [] | |
2679 for p in self.lr_productions: | |
2680 if p.func: | |
2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) | |
2682 else: | |
2683 outp.append((str(p),p.name,p.len,None,None,None)) | |
2684 pickle.dump(outp,outf,pickle_protocol) | |
2685 outf.close() | |
2686 | |
2687 # ----------------------------------------------------------------------------- | |
2688 # === INTROSPECTION === | |
2689 # | |
2690 # The following functions and classes are used to implement the PLY | |
2691 # introspection features followed by the yacc() function itself. | |
2692 # ----------------------------------------------------------------------------- | |
2693 | |
2694 # ----------------------------------------------------------------------------- | |
2695 # get_caller_module_dict() | |
2696 # | |
2697 # This function returns a dictionary containing all of the symbols defined withi
n | |
2698 # a caller further down the call stack. This is used to get the environment | |
2699 # associated with the yacc() call if none was provided. | |
2700 # ----------------------------------------------------------------------------- | |
2701 | |
2702 def get_caller_module_dict(levels): | |
2703 try: | |
2704 raise RuntimeError | |
2705 except RuntimeError: | |
2706 e,b,t = sys.exc_info() | |
2707 f = t.tb_frame | |
2708 while levels > 0: | |
2709 f = f.f_back | |
2710 levels -= 1 | |
2711 ldict = f.f_globals.copy() | |
2712 if f.f_globals != f.f_locals: | |
2713 ldict.update(f.f_locals) | |
2714 | |
2715 return ldict | |
2716 | |
2717 # ----------------------------------------------------------------------------- | |
2718 # parse_grammar() | |
2719 # | |
2720 # This takes a raw grammar rule string and parses it into production data | |
2721 # ----------------------------------------------------------------------------- | |
2722 def parse_grammar(doc,file,line): | |
2723 grammar = [] | |
2724 # Split the doc string into lines | |
2725 pstrings = doc.splitlines() | |
2726 lastp = None | |
2727 dline = line | |
2728 for ps in pstrings: | |
2729 dline += 1 | |
2730 p = ps.split() | |
2731 if not p: continue | |
2732 try: | |
2733 if p[0] == '|': | |
2734 # This is a continuation of a previous rule | |
2735 if not lastp: | |
2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) | |
2737 prodname = lastp | |
2738 syms = p[1:] | |
2739 else: | |
2740 prodname = p[0] | |
2741 lastp = prodname | |
2742 syms = p[2:] | |
2743 assign = p[1] | |
2744 if assign != ':' and assign != '::=': | |
2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (fil
e,dline)) | |
2746 | |
2747 grammar.append((file,dline,prodname,syms)) | |
2748 except SyntaxError: | |
2749 raise | |
2750 except Exception: | |
2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,p
s.strip())) | |
2752 | |
2753 return grammar | |
2754 | |
2755 # ----------------------------------------------------------------------------- | |
2756 # ParserReflect() | |
2757 # | |
2758 # This class represents information extracted for building a parser including | |
2759 # start symbol, error function, tokens, precedence list, action functions, | |
2760 # etc. | |
2761 # ----------------------------------------------------------------------------- | |
2762 class ParserReflect(object): | |
2763 def __init__(self,pdict,log=None): | |
2764 self.pdict = pdict | |
2765 self.start = None | |
2766 self.error_func = None | |
2767 self.tokens = None | |
2768 self.files = {} | |
2769 self.grammar = [] | |
2770 self.error = 0 | |
2771 | |
2772 if log is None: | |
2773 self.log = PlyLogger(sys.stderr) | |
2774 else: | |
2775 self.log = log | |
2776 | |
2777 # Get all of the basic information | |
2778 def get_all(self): | |
2779 self.get_start() | |
2780 self.get_error_func() | |
2781 self.get_tokens() | |
2782 self.get_precedence() | |
2783 self.get_pfunctions() | |
2784 | |
2785 # Validate all of the information | |
2786 def validate_all(self): | |
2787 self.validate_start() | |
2788 self.validate_error_func() | |
2789 self.validate_tokens() | |
2790 self.validate_precedence() | |
2791 self.validate_pfunctions() | |
2792 self.validate_files() | |
2793 return self.error | |
2794 | |
2795 # Compute a signature over the grammar | |
2796 def signature(self): | |
2797 try: | |
2798 from hashlib import md5 | |
2799 except ImportError: | |
2800 from md5 import md5 | |
2801 try: | |
2802 sig = md5() | |
2803 if self.start: | |
2804 sig.update(self.start.encode('latin-1')) | |
2805 if self.prec: | |
2806 sig.update("".join(["".join(p) for p in self.prec]).encode('lati
n-1')) | |
2807 if self.tokens: | |
2808 sig.update(" ".join(self.tokens).encode('latin-1')) | |
2809 for f in self.pfuncs: | |
2810 if f[3]: | |
2811 sig.update(f[3].encode('latin-1')) | |
2812 except (TypeError,ValueError): | |
2813 pass | |
2814 return sig.digest() | |
2815 | |
2816 # --------------------------------------------------------------------------
--- | |
2817 # validate_file() | |
2818 # | |
2819 # This method checks to see if there are duplicated p_rulename() functions | |
2820 # in the parser module file. Without this function, it is really easy for | |
2821 # users to make mistakes by cutting and pasting code fragments (and it's a r
eal | |
2822 # bugger to try and figure out why the resulting parser doesn't work). Ther
efore, | |
2823 # we just do a little regular expression pattern matching of def statements | |
2824 # to try and detect duplicates. | |
2825 # --------------------------------------------------------------------------
--- | |
2826 | |
2827 def validate_files(self): | |
2828 # Match def p_funcname( | |
2829 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') | |
2830 | |
2831 for filename in self.files.keys(): | |
2832 base,ext = os.path.splitext(filename) | |
2833 if ext != '.py': return 1 # No idea. Assume it's okay. | |
2834 | |
2835 try: | |
2836 f = open(filename) | |
2837 lines = f.readlines() | |
2838 f.close() | |
2839 except IOError: | |
2840 continue | |
2841 | |
2842 counthash = { } | |
2843 for linen,l in enumerate(lines): | |
2844 linen += 1 | |
2845 m = fre.match(l) | |
2846 if m: | |
2847 name = m.group(1) | |
2848 prev = counthash.get(name) | |
2849 if not prev: | |
2850 counthash[name] = linen | |
2851 else: | |
2852 self.log.warning("%s:%d: Function %s redefined. Previous
ly defined on line %d", filename,linen,name,prev) | |
2853 | |
2854 # Get the start symbol | |
2855 def get_start(self): | |
2856 self.start = self.pdict.get('start') | |
2857 | |
2858 # Validate the start symbol | |
2859 def validate_start(self): | |
2860 if self.start is not None: | |
2861 if not isinstance(self.start,str): | |
2862 self.log.error("'start' must be a string") | |
2863 | |
2864 # Look for error handler | |
2865 def get_error_func(self): | |
2866 self.error_func = self.pdict.get('p_error') | |
2867 | |
2868 # Validate the error function | |
2869 def validate_error_func(self): | |
2870 if self.error_func: | |
2871 if isinstance(self.error_func,types.FunctionType): | |
2872 ismethod = 0 | |
2873 elif isinstance(self.error_func, types.MethodType): | |
2874 ismethod = 1 | |
2875 else: | |
2876 self.log.error("'p_error' defined, but is not a function or meth
od") | |
2877 self.error = 1 | |
2878 return | |
2879 | |
2880 eline = func_code(self.error_func).co_firstlineno | |
2881 efile = func_code(self.error_func).co_filename | |
2882 self.files[efile] = 1 | |
2883 | |
2884 if (func_code(self.error_func).co_argcount != 1+ismethod): | |
2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,elin
e) | |
2886 self.error = 1 | |
2887 | |
2888 # Get the tokens map | |
2889 def get_tokens(self): | |
2890 tokens = self.pdict.get("tokens",None) | |
2891 if not tokens: | |
2892 self.log.error("No token list is defined") | |
2893 self.error = 1 | |
2894 return | |
2895 | |
2896 if not isinstance(tokens,(list, tuple)): | |
2897 self.log.error("tokens must be a list or tuple") | |
2898 self.error = 1 | |
2899 return | |
2900 | |
2901 if not tokens: | |
2902 self.log.error("tokens is empty") | |
2903 self.error = 1 | |
2904 return | |
2905 | |
2906 self.tokens = tokens | |
2907 | |
2908 # Validate the tokens | |
2909 def validate_tokens(self): | |
2910 # Validate the tokens. | |
2911 if 'error' in self.tokens: | |
2912 self.log.error("Illegal token name 'error'. Is a reserved word") | |
2913 self.error = 1 | |
2914 return | |
2915 | |
2916 terminals = {} | |
2917 for n in self.tokens: | |
2918 if n in terminals: | |
2919 self.log.warning("Token '%s' multiply defined", n) | |
2920 terminals[n] = 1 | |
2921 | |
2922 # Get the precedence map (if any) | |
2923 def get_precedence(self): | |
2924 self.prec = self.pdict.get("precedence",None) | |
2925 | |
2926 # Validate and parse the precedence map | |
2927 def validate_precedence(self): | |
2928 preclist = [] | |
2929 if self.prec: | |
2930 if not isinstance(self.prec,(list,tuple)): | |
2931 self.log.error("precedence must be a list or tuple") | |
2932 self.error = 1 | |
2933 return | |
2934 for level,p in enumerate(self.prec): | |
2935 if not isinstance(p,(list,tuple)): | |
2936 self.log.error("Bad precedence table") | |
2937 self.error = 1 | |
2938 return | |
2939 | |
2940 if len(p) < 2: | |
2941 self.log.error("Malformed precedence entry %s. Must be (asso
c, term, ..., term)",p) | |
2942 self.error = 1 | |
2943 return | |
2944 assoc = p[0] | |
2945 if not isinstance(assoc,str): | |
2946 self.log.error("precedence associativity must be a string") | |
2947 self.error = 1 | |
2948 return | |
2949 for term in p[1:]: | |
2950 if not isinstance(term,str): | |
2951 self.log.error("precedence items must be strings") | |
2952 self.error = 1 | |
2953 return | |
2954 preclist.append((term,assoc,level+1)) | |
2955 self.preclist = preclist | |
2956 | |
2957 # Get all p_functions from the grammar | |
2958 def get_pfunctions(self): | |
2959 p_functions = [] | |
2960 for name, item in self.pdict.items(): | |
2961 if name[:2] != 'p_': continue | |
2962 if name == 'p_error': continue | |
2963 if isinstance(item,(types.FunctionType,types.MethodType)): | |
2964 line = func_code(item).co_firstlineno | |
2965 file = func_code(item).co_filename | |
2966 p_functions.append((line,file,name,item.__doc__)) | |
2967 | |
2968 # Sort all of the actions by line number | |
2969 p_functions.sort() | |
2970 self.pfuncs = p_functions | |
2971 | |
2972 | |
2973 # Validate all of the p_functions | |
2974 def validate_pfunctions(self): | |
2975 grammar = [] | |
2976 # Check for non-empty symbols | |
2977 if len(self.pfuncs) == 0: | |
2978 self.log.error("no rules of the form p_rulename are defined") | |
2979 self.error = 1 | |
2980 return | |
2981 | |
2982 for line, file, name, doc in self.pfuncs: | |
2983 func = self.pdict[name] | |
2984 if isinstance(func, types.MethodType): | |
2985 reqargs = 2 | |
2986 else: | |
2987 reqargs = 1 | |
2988 if func_code(func).co_argcount > reqargs: | |
2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,li
ne,func.__name__) | |
2990 self.error = 1 | |
2991 elif func_code(func).co_argcount < reqargs: | |
2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line
,func.__name__) | |
2993 self.error = 1 | |
2994 elif not func.__doc__: | |
2995 self.log.warning("%s:%d: No documentation string specified in fu
nction '%s' (ignored)",file,line,func.__name__) | |
2996 else: | |
2997 try: | |
2998 parsed_g = parse_grammar(doc,file,line) | |
2999 for g in parsed_g: | |
3000 grammar.append((name, g)) | |
3001 except SyntaxError: | |
3002 e = sys.exc_info()[1] | |
3003 self.log.error(str(e)) | |
3004 self.error = 1 | |
3005 | |
3006 # Looks like a valid grammar rule | |
3007 # Mark the file in which defined. | |
3008 self.files[file] = 1 | |
3009 | |
3010 # Secondary validation step that looks for p_ definitions that are not f
unctions | |
3011 # or functions that look like they might be grammar rules. | |
3012 | |
3013 for n,v in self.pdict.items(): | |
3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.Metho
dType)): continue | |
3015 if n[0:2] == 't_': continue | |
3016 if n[0:2] == 'p_' and n != 'p_error': | |
3017 self.log.warning("'%s' not defined as a function", n) | |
3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount =
= 1) or | |
3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount ==
2)): | |
3020 try: | |
3021 doc = v.__doc__.split(" ") | |
3022 if doc[1] == ':': | |
3023 self.log.warning("%s:%d: Possible grammar rule '%s' defi
ned without p_ prefix", | |
3024 func_code(v).co_filename, func_code(v).
co_firstlineno,n) | |
3025 except Exception: | |
3026 pass | |
3027 | |
3028 self.grammar = grammar | |
3029 | |
3030 # ----------------------------------------------------------------------------- | |
3031 # yacc(module) | |
3032 # | |
3033 # Build a parser | |
3034 # ----------------------------------------------------------------------------- | |
3035 | |
3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star
t=None, | |
3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,out
putdir='', | |
3038 debuglog=None, errorlog = None, picklefile=None): | |
3039 | |
3040 global parse # Reference to the parsing method of the last b
uilt parser | |
3041 | |
3042 # If pickling is enabled, table files are not created | |
3043 | |
3044 if picklefile: | |
3045 write_tables = 0 | |
3046 | |
3047 if errorlog is None: | |
3048 errorlog = PlyLogger(sys.stderr) | |
3049 | |
3050 # Get the module dictionary used for the parser | |
3051 if module: | |
3052 _items = [(k,getattr(module,k)) for k in dir(module)] | |
3053 pdict = dict(_items) | |
3054 else: | |
3055 pdict = get_caller_module_dict(2) | |
3056 | |
3057 # Collect parser information from the dictionary | |
3058 pinfo = ParserReflect(pdict,log=errorlog) | |
3059 pinfo.get_all() | |
3060 | |
3061 if pinfo.error: | |
3062 raise YaccError("Unable to build parser") | |
3063 | |
3064 # Check signature against table files (if any) | |
3065 signature = pinfo.signature() | |
3066 | |
3067 # Read the tables | |
3068 try: | |
3069 lr = LRTable() | |
3070 if picklefile: | |
3071 read_signature = lr.read_pickle(picklefile) | |
3072 else: | |
3073 read_signature = lr.read_table(tabmodule) | |
3074 if optimize or (read_signature == signature): | |
3075 try: | |
3076 lr.bind_callables(pinfo.pdict) | |
3077 parser = LRParser(lr,pinfo.error_func) | |
3078 parse = parser.parse | |
3079 return parser | |
3080 except Exception: | |
3081 e = sys.exc_info()[1] | |
3082 errorlog.warning("There was a problem loading the table file: %s
", repr(e)) | |
3083 except VersionError: | |
3084 e = sys.exc_info() | |
3085 errorlog.warning(str(e)) | |
3086 except Exception: | |
3087 pass | |
3088 | |
3089 if debuglog is None: | |
3090 if debug: | |
3091 debuglog = PlyLogger(open(debugfile,"w")) | |
3092 else: | |
3093 debuglog = NullLogger() | |
3094 | |
3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __ver
sion__) | |
3096 | |
3097 | |
3098 errors = 0 | |
3099 | |
3100 # Validate the parser information | |
3101 if pinfo.validate_all(): | |
3102 raise YaccError("Unable to build parser") | |
3103 | |
3104 if not pinfo.error_func: | |
3105 errorlog.warning("no p_error() function is defined") | |
3106 | |
3107 # Create a grammar object | |
3108 grammar = Grammar(pinfo.tokens) | |
3109 | |
3110 # Set precedence level for terminals | |
3111 for term, assoc, level in pinfo.preclist: | |
3112 try: | |
3113 grammar.set_precedence(term,assoc,level) | |
3114 except GrammarError: | |
3115 e = sys.exc_info()[1] | |
3116 errorlog.warning("%s",str(e)) | |
3117 | |
3118 # Add productions to the grammar | |
3119 for funcname, gram in pinfo.grammar: | |
3120 file, line, prodname, syms = gram | |
3121 try: | |
3122 grammar.add_production(prodname,syms,funcname,file,line) | |
3123 except GrammarError: | |
3124 e = sys.exc_info()[1] | |
3125 errorlog.error("%s",str(e)) | |
3126 errors = 1 | |
3127 | |
3128 # Set the grammar start symbols | |
3129 try: | |
3130 if start is None: | |
3131 grammar.set_start(pinfo.start) | |
3132 else: | |
3133 grammar.set_start(start) | |
3134 except GrammarError: | |
3135 e = sys.exc_info()[1] | |
3136 errorlog.error(str(e)) | |
3137 errors = 1 | |
3138 | |
3139 if errors: | |
3140 raise YaccError("Unable to build parser") | |
3141 | |
3142 # Verify the grammar structure | |
3143 undefined_symbols = grammar.undefined_symbols() | |
3144 for sym, prod in undefined_symbols: | |
3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a
rule",prod.file,prod.line,sym) | |
3146 errors = 1 | |
3147 | |
3148 unused_terminals = grammar.unused_terminals() | |
3149 if unused_terminals: | |
3150 debuglog.info("") | |
3151 debuglog.info("Unused terminals:") | |
3152 debuglog.info("") | |
3153 for term in unused_terminals: | |
3154 errorlog.warning("Token '%s' defined, but not used", term) | |
3155 debuglog.info(" %s", term) | |
3156 | |
3157 # Print out all productions to the debug log | |
3158 if debug: | |
3159 debuglog.info("") | |
3160 debuglog.info("Grammar") | |
3161 debuglog.info("") | |
3162 for n,p in enumerate(grammar.Productions): | |
3163 debuglog.info("Rule %-5d %s", n, p) | |
3164 | |
3165 # Find unused non-terminals | |
3166 unused_rules = grammar.unused_rules() | |
3167 for prod in unused_rules: | |
3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, pr
od.line, prod.name) | |
3169 | |
3170 if len(unused_terminals) == 1: | |
3171 errorlog.warning("There is 1 unused token") | |
3172 if len(unused_terminals) > 1: | |
3173 errorlog.warning("There are %d unused tokens", len(unused_terminals)) | |
3174 | |
3175 if len(unused_rules) == 1: | |
3176 errorlog.warning("There is 1 unused rule") | |
3177 if len(unused_rules) > 1: | |
3178 errorlog.warning("There are %d unused rules", len(unused_rules)) | |
3179 | |
3180 if debug: | |
3181 debuglog.info("") | |
3182 debuglog.info("Terminals, with rules where they appear") | |
3183 debuglog.info("") | |
3184 terms = list(grammar.Terminals) | |
3185 terms.sort() | |
3186 for term in terms: | |
3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.
Terminals[term]])) | |
3188 | |
3189 debuglog.info("") | |
3190 debuglog.info("Nonterminals, with rules where they appear") | |
3191 debuglog.info("") | |
3192 nonterms = list(grammar.Nonterminals) | |
3193 nonterms.sort() | |
3194 for nonterm in nonterms: | |
3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in gramm
ar.Nonterminals[nonterm]])) | |
3196 debuglog.info("") | |
3197 | |
3198 if check_recursion: | |
3199 unreachable = grammar.find_unreachable() | |
3200 for u in unreachable: | |
3201 errorlog.warning("Symbol '%s' is unreachable",u) | |
3202 | |
3203 infinite = grammar.infinite_cycles() | |
3204 for inf in infinite: | |
3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf) | |
3206 errors = 1 | |
3207 | |
3208 unused_prec = grammar.unused_precedence() | |
3209 for term, assoc in unused_prec: | |
3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", a
ssoc, term) | |
3211 errors = 1 | |
3212 | |
3213 if errors: | |
3214 raise YaccError("Unable to build parser") | |
3215 | |
3216 # Run the LRGeneratedTable on the grammar | |
3217 if debug: | |
3218 errorlog.debug("Generating %s tables", method) | |
3219 | |
3220 lr = LRGeneratedTable(grammar,method,debuglog) | |
3221 | |
3222 if debug: | |
3223 num_sr = len(lr.sr_conflicts) | |
3224 | |
3225 # Report shift/reduce and reduce/reduce conflicts | |
3226 if num_sr == 1: | |
3227 errorlog.warning("1 shift/reduce conflict") | |
3228 elif num_sr > 1: | |
3229 errorlog.warning("%d shift/reduce conflicts", num_sr) | |
3230 | |
3231 num_rr = len(lr.rr_conflicts) | |
3232 if num_rr == 1: | |
3233 errorlog.warning("1 reduce/reduce conflict") | |
3234 elif num_rr > 1: | |
3235 errorlog.warning("%d reduce/reduce conflicts", num_rr) | |
3236 | |
3237 # Write out conflicts to the output file | |
3238 if debug and (lr.sr_conflicts or lr.rr_conflicts): | |
3239 debuglog.warning("") | |
3240 debuglog.warning("Conflicts:") | |
3241 debuglog.warning("") | |
3242 | |
3243 for state, tok, resolution in lr.sr_conflicts: | |
3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved
as %s", tok, state, resolution) | |
3245 | |
3246 already_reported = {} | |
3247 for state, rule, rejected in lr.rr_conflicts: | |
3248 if (state,id(rule),id(rejected)) in already_reported: | |
3249 continue | |
3250 debuglog.warning("reduce/reduce conflict in state %d resolved using
rule (%s)", state, rule) | |
3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state) | |
3252 errorlog.warning("reduce/reduce conflict in state %d resolved using
rule (%s)", state, rule) | |
3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state) | |
3254 already_reported[state,id(rule),id(rejected)] = 1 | |
3255 | |
3256 warned_never = [] | |
3257 for state, rule, rejected in lr.rr_conflicts: | |
3258 if not rejected.reduced and (rejected not in warned_never): | |
3259 debuglog.warning("Rule (%s) is never reduced", rejected) | |
3260 errorlog.warning("Rule (%s) is never reduced", rejected) | |
3261 warned_never.append(rejected) | |
3262 | |
3263 # Write the table file if requested | |
3264 if write_tables: | |
3265 lr.write_table(tabmodule,outputdir,signature) | |
3266 | |
3267 # Write a pickled version of the tables | |
3268 if picklefile: | |
3269 lr.pickle_table(picklefile,signature) | |
3270 | |
3271 # Build the parser | |
3272 lr.bind_callables(pinfo.pdict) | |
3273 parser = LRParser(lr,pinfo.error_func) | |
3274 | |
3275 parse = parser.parse | |
3276 return parser | |
OLD | NEW |