Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: tools/nixysa/third_party/ply-3.1/ply/lex.py

Issue 2043006: WTF NPAPI extension. Early draft. Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 10 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 # -----------------------------------------------------------------------------
2 # ply: lex.py
3 #
4 # Author: David M. Beazley (dave@dabeaz.com)
5 #
6 # Copyright (C) 2001-2009, David M. Beazley
7 #
8 # This library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU Lesser General Public
10 # License as published by the Free Software Foundation; either
11 # version 2.1 of the License, or (at your option) any later version.
12 #
13 # This library is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Lesser General Public License for more details.
17 #
18 # You should have received a copy of the GNU Lesser General Public
19 # License along with this library; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #
22 # See the file COPYING for a complete copy of the LGPL.
23 # -----------------------------------------------------------------------------
24
25 __version__ = "3.0"
26 __tabversion__ = "3.0" # Version of table file used
27
28 import re, sys, types, copy, os
29
30 # This tuple contains known string types
31 try:
32 # Python 2.6
33 StringTypes = (types.StringType, types.UnicodeType)
34 except AttributeError:
35 # Python 3.0
36 StringTypes = (str, bytes)
37
38 # Extract the code attribute of a function. Different implementations
39 # are for Python 2/3 compatibility.
40
41 if sys.version_info[0] < 3:
42 def func_code(f):
43 return f.func_code
44 else:
45 def func_code(f):
46 return f.__code__
47
48 # This regular expression is used to match valid token names
49 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
50
51 # Exception thrown when invalid token encountered and no default error
52 # handler is defined.
53
54 class LexError(Exception):
55 def __init__(self,message,s):
56 self.args = (message,)
57 self.text = s
58
59 # Token class. This class is used to represent the tokens produced.
60 class LexToken(object):
61 def __str__(self):
62 return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self. lexpos)
63 def __repr__(self):
64 return str(self)
65
66 # This object is a stand-in for a logging object created by the
67 # logging module.
68
69 class PlyLogger(object):
70 def __init__(self,f):
71 self.f = f
72 def critical(self,msg,*args,**kwargs):
73 self.f.write((msg % args) + "\n")
74
75 def warning(self,msg,*args,**kwargs):
76 self.f.write("WARNING: "+ (msg % args) + "\n")
77
78 def error(self,msg,*args,**kwargs):
79 self.f.write("ERROR: " + (msg % args) + "\n")
80
81 info = critical
82 debug = critical
83
84 # Null logger is used when no output is generated. Does nothing.
85 class NullLogger(object):
86 def __getattribute__(self,name):
87 return self
88 def __call__(self,*args,**kwargs):
89 return self
90
91 # -----------------------------------------------------------------------------
92 # === Lexing Engine ===
93 #
94 # The following Lexer class implements the lexer runtime. There are only
95 # a few public methods and attributes:
96 #
97 # input() - Store a new string in the lexer
98 # token() - Get the next token
99 # clone() - Clone the lexer
100 #
101 # lineno - Current line number
102 # lexpos - Current position in the input string
103 # -----------------------------------------------------------------------------
104
105 class Lexer:
106 def __init__(self):
107 self.lexre = None # Master regular expression. This is a lis t of
108 # tuples (re,findex) where re is a compile d
109 # regular expression and findex is a list
110 # mapping regex group numbers to rules
111 self.lexretext = None # Current regular expression strings
112 self.lexstatere = {} # Dictionary mapping lexer states to maste r regexs
113 self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
114 self.lexstaterenames = {} # Dictionary mapping lexer states to symbo l names
115 self.lexstate = "INITIAL" # Current lexer state
116 self.lexstatestack = [] # Stack of lexer states
117 self.lexstateinfo = None # State information
118 self.lexstateignore = {} # Dictionary of ignored characters for eac h state
119 self.lexstateerrorf = {} # Dictionary of error functions for each s tate
120 self.lexreflags = 0 # Optional re compile flags
121 self.lexdata = None # Actual input data (as a string)
122 self.lexpos = 0 # Current position in input text
123 self.lexlen = 0 # Length of the input text
124 self.lexerrorf = None # Error rule (if any)
125 self.lextokens = None # List of valid tokens
126 self.lexignore = "" # Ignored characters
127 self.lexliterals = "" # Literal characters that can be passed th rough
128 self.lexmodule = None # Module
129 self.lineno = 1 # Current line number
130 self.lexoptimize = 0 # Optimized mode
131
132 def clone(self,object=None):
133 c = copy.copy(self)
134
135 # If the object parameter has been supplied, it means we are attaching t he
136 # lexer to a new object. In this case, we have to rebind all methods in
137 # the lexstatere and lexstateerrorf tables.
138
139 if object:
140 newtab = { }
141 for key, ritem in self.lexstatere.items():
142 newre = []
143 for cre, findex in ritem:
144 newfindex = []
145 for f in findex:
146 if not f or not f[0]:
147 newfindex.append(f)
148 continue
149 newfindex.append((getattr(object,f[0].__name__),f[1]))
150 newre.append((cre,newfindex))
151 newtab[key] = newre
152 c.lexstatere = newtab
153 c.lexstateerrorf = { }
154 for key, ef in self.lexstateerrorf.items():
155 c.lexstateerrorf[key] = getattr(object,ef.__name__)
156 c.lexmodule = object
157 return c
158
159 # ------------------------------------------------------------
160 # writetab() - Write lexer information to a table file
161 # ------------------------------------------------------------
162 def writetab(self,tabfile,outputdir=""):
163 if isinstance(tabfile,types.ModuleType):
164 return
165 basetabfilename = tabfile.split(".")[-1]
166 filename = os.path.join(outputdir,basetabfilename)+".py"
167 tf = open(filename,"w")
168 tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
169 tf.write("_tabversion = %s\n" % repr(__version__))
170 tf.write("_lextokens = %s\n" % repr(self.lextokens))
171 tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
172 tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
173 tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
174
175 tabre = { }
176 # Collect all functions in the initial state
177 initial = self.lexstatere["INITIAL"]
178 initialfuncs = []
179 for part in initial:
180 for f in part[1]:
181 if f and f[0]:
182 initialfuncs.append(f)
183
184 for key, lre in self.lexstatere.items():
185 titem = []
186 for i in range(len(lre)):
187 titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[ i][1],self.lexstaterenames[key][i])))
188 tabre[key] = titem
189
190 tf.write("_lexstatere = %s\n" % repr(tabre))
191 tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
192
193 taberr = { }
194 for key, ef in self.lexstateerrorf.items():
195 if ef:
196 taberr[key] = ef.__name__
197 else:
198 taberr[key] = None
199 tf.write("_lexstateerrorf = %s\n" % repr(taberr))
200 tf.close()
201
202 # ------------------------------------------------------------
203 # readtab() - Read lexer information from a tab file
204 # ------------------------------------------------------------
205 def readtab(self,tabfile,fdict):
206 if isinstance(tabfile,types.ModuleType):
207 lextab = tabfile
208 else:
209 if sys.version_info[0] < 3:
210 exec("import %s as lextab" % tabfile)
211 else:
212 env = { }
213 exec("import %s as lextab" % tabfile, env,env)
214 lextab = env['lextab']
215
216 if getattr(lextab,"_tabversion","0.0") != __version__:
217 raise ImportError("Inconsistent PLY version")
218
219 self.lextokens = lextab._lextokens
220 self.lexreflags = lextab._lexreflags
221 self.lexliterals = lextab._lexliterals
222 self.lexstateinfo = lextab._lexstateinfo
223 self.lexstateignore = lextab._lexstateignore
224 self.lexstatere = { }
225 self.lexstateretext = { }
226 for key,lre in lextab._lexstatere.items():
227 titem = []
228 txtitem = []
229 for i in range(len(lre)):
230 titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_ to_funcs(lre[i][1],fdict)))
231 txtitem.append(lre[i][0])
232 self.lexstatere[key] = titem
233 self.lexstateretext[key] = txtitem
234 self.lexstateerrorf = { }
235 for key,ef in lextab._lexstateerrorf.items():
236 self.lexstateerrorf[key] = fdict[ef]
237 self.begin('INITIAL')
238
239 # ------------------------------------------------------------
240 # input() - Push a new string into the lexer
241 # ------------------------------------------------------------
242 def input(self,s):
243 # Pull off the first character to see if s looks like a string
244 c = s[:1]
245 if not isinstance(c,StringTypes):
246 raise ValueError("Expected a string")
247 self.lexdata = s
248 self.lexpos = 0
249 self.lexlen = len(s)
250
251 # ------------------------------------------------------------
252 # begin() - Changes the lexing state
253 # ------------------------------------------------------------
254 def begin(self,state):
255 if not state in self.lexstatere:
256 raise ValueError("Undefined state")
257 self.lexre = self.lexstatere[state]
258 self.lexretext = self.lexstateretext[state]
259 self.lexignore = self.lexstateignore.get(state,"")
260 self.lexerrorf = self.lexstateerrorf.get(state,None)
261 self.lexstate = state
262
263 # ------------------------------------------------------------
264 # push_state() - Changes the lexing state and saves old on stack
265 # ------------------------------------------------------------
266 def push_state(self,state):
267 self.lexstatestack.append(self.lexstate)
268 self.begin(state)
269
270 # ------------------------------------------------------------
271 # pop_state() - Restores the previous state
272 # ------------------------------------------------------------
273 def pop_state(self):
274 self.begin(self.lexstatestack.pop())
275
276 # ------------------------------------------------------------
277 # current_state() - Returns the current lexing state
278 # ------------------------------------------------------------
279 def current_state(self):
280 return self.lexstate
281
282 # ------------------------------------------------------------
283 # skip() - Skip ahead n characters
284 # ------------------------------------------------------------
285 def skip(self,n):
286 self.lexpos += n
287
288 # ------------------------------------------------------------
289 # opttoken() - Return the next token from the Lexer
290 #
291 # Note: This function has been carefully implemented to be as fast
292 # as possible. Don't make changes unless you really know what
293 # you are doing
294 # ------------------------------------------------------------
295 def token(self):
296 # Make local copies of frequently referenced attributes
297 lexpos = self.lexpos
298 lexlen = self.lexlen
299 lexignore = self.lexignore
300 lexdata = self.lexdata
301
302 while lexpos < lexlen:
303 # This code provides some short-circuit code for whitespace, tabs, a nd other ignored characters
304 if lexdata[lexpos] in lexignore:
305 lexpos += 1
306 continue
307
308 # Look for a regular expression match
309 for lexre,lexindexfunc in self.lexre:
310 m = lexre.match(lexdata,lexpos)
311 if not m: continue
312
313 # Create a token for return
314 tok = LexToken()
315 tok.value = m.group()
316 tok.lineno = self.lineno
317 tok.lexpos = lexpos
318
319 i = m.lastindex
320 func,tok.type = lexindexfunc[i]
321
322 if not func:
323 # If no token type was set, it's an ignored token
324 if tok.type:
325 self.lexpos = m.end()
326 return tok
327 else:
328 lexpos = m.end()
329 break
330
331 lexpos = m.end()
332
333 # If token is processed by a function, call it
334
335 tok.lexer = self # Set additional attributes useful in toke n rules
336 self.lexmatch = m
337 self.lexpos = lexpos
338
339 newtok = func(tok)
340
341 # Every function must return a token, if nothing, we just move t o next token
342 if not newtok:
343 lexpos = self.lexpos # This is here in case user has updated lexpos.
344 lexignore = self.lexignore # This is here in case there was a state change
345 break
346
347 # Verify type of the token. If not in the token map, raise an e rror
348 if not self.lexoptimize:
349 if not newtok.type in self.lextokens:
350 raise LexError("%s:%d: Rule '%s' returned an unknown tok en type '%s'" % (
351 func_code(func).co_filename, func_code(func).co_firs tlineno,
352 func.__name__, newtok.type),lexdata[lexpos:])
353
354 return newtok
355 else:
356 # No match, see if in literals
357 if lexdata[lexpos] in self.lexliterals:
358 tok = LexToken()
359 tok.value = lexdata[lexpos]
360 tok.lineno = self.lineno
361 tok.type = tok.value
362 tok.lexpos = lexpos
363 self.lexpos = lexpos + 1
364 return tok
365
366 # No match. Call t_error() if defined.
367 if self.lexerrorf:
368 tok = LexToken()
369 tok.value = self.lexdata[lexpos:]
370 tok.lineno = self.lineno
371 tok.type = "error"
372 tok.lexer = self
373 tok.lexpos = lexpos
374 self.lexpos = lexpos
375 newtok = self.lexerrorf(tok)
376 if lexpos == self.lexpos:
377 # Error method didn't change text position at all. This is an error.
378 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
379 lexpos = self.lexpos
380 if not newtok: continue
381 return newtok
382
383 self.lexpos = lexpos
384 raise LexError("Illegal character '%s' at index %d" % (lexdata[l expos],lexpos), lexdata[lexpos:])
385
386 self.lexpos = lexpos + 1
387 if self.lexdata is None:
388 raise RuntimeError("No input string given with input()")
389 return None
390
391 # Iterator interface
392 def __iter__(self):
393 return self
394
395 def next(self):
396 t = self.token()
397 if t is None:
398 raise StopIteration
399 return t
400
401 __next__ = next
402
403 # -----------------------------------------------------------------------------
404 # ==== Lex Builder ===
405 #
406 # The functions and classes below are used to collect lexing information
407 # and build a Lexer object from it.
408 # -----------------------------------------------------------------------------
409
410 # -----------------------------------------------------------------------------
411 # get_caller_module_dict()
412 #
413 # This function returns a dictionary containing all of the symbols defined withi n
414 # a caller further down the call stack. This is used to get the environment
415 # associated with the yacc() call if none was provided.
416 # -----------------------------------------------------------------------------
417
418 def get_caller_module_dict(levels):
419 try:
420 raise RuntimeError
421 except RuntimeError:
422 e,b,t = sys.exc_info()
423 f = t.tb_frame
424 while levels > 0:
425 f = f.f_back
426 levels -= 1
427 ldict = f.f_globals.copy()
428 if f.f_globals != f.f_locals:
429 ldict.update(f.f_locals)
430
431 return ldict
432
433 # -----------------------------------------------------------------------------
434 # _funcs_to_names()
435 #
436 # Given a list of regular expression functions, this converts it to a list
437 # suitable for output to a table file
438 # -----------------------------------------------------------------------------
439
440 def _funcs_to_names(funclist,namelist):
441 result = []
442 for f,name in zip(funclist,namelist):
443 if f and f[0]:
444 result.append((name, f[1]))
445 else:
446 result.append(f)
447 return result
448
449 # -----------------------------------------------------------------------------
450 # _names_to_funcs()
451 #
452 # Given a list of regular expression function names, this converts it back to
453 # functions.
454 # -----------------------------------------------------------------------------
455
456 def _names_to_funcs(namelist,fdict):
457 result = []
458 for n in namelist:
459 if n and n[0]:
460 result.append((fdict[n[0]],n[1]))
461 else:
462 result.append(n)
463 return result
464
465 # -----------------------------------------------------------------------------
466 # _form_master_re()
467 #
468 # This function takes a list of all of the regex components and attempts to
469 # form the master regular expression. Given limitations in the Python re
470 # module, it may be necessary to break the master regex into separate expression s.
471 # -----------------------------------------------------------------------------
472
473 def _form_master_re(relist,reflags,ldict,toknames):
474 if not relist: return []
475 regex = "|".join(relist)
476 try:
477 lexre = re.compile(regex,re.VERBOSE | reflags)
478
479 # Build the index to function map for the matching engine
480 lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
481 lexindexnames = lexindexfunc[:]
482
483 for f,i in lexre.groupindex.items():
484 handle = ldict.get(f,None)
485 if type(handle) in (types.FunctionType, types.MethodType):
486 lexindexfunc[i] = (handle,toknames[f])
487 lexindexnames[i] = f
488 elif handle is not None:
489 lexindexnames[i] = f
490 if f.find("ignore_") > 0:
491 lexindexfunc[i] = (None,None)
492 else:
493 lexindexfunc[i] = (None, toknames[f])
494
495 return [(lexre,lexindexfunc)],[regex],[lexindexnames]
496 except Exception:
497 m = int(len(relist)/2)
498 if m == 0: m = 1
499 llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
500 rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
501 return llist+rlist, lre+rre, lnames+rnames
502
503 # -----------------------------------------------------------------------------
504 # def _statetoken(s,names)
505 #
506 # Given a declaration name s of the form "t_" and a dictionary whose keys are
507 # state names, this function returns a tuple (states,tokenname) where states
508 # is a tuple of state names and tokenname is the name of the token. For example ,
509 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
510 # -----------------------------------------------------------------------------
511
512 def _statetoken(s,names):
513 nonstate = 1
514 parts = s.split("_")
515 for i in range(1,len(parts)):
516 if not parts[i] in names and parts[i] != 'ANY': break
517 if i > 1:
518 states = tuple(parts[1:i])
519 else:
520 states = ('INITIAL',)
521
522 if 'ANY' in states:
523 states = tuple(names)
524
525 tokenname = "_".join(parts[i:])
526 return (states,tokenname)
527
528
529 # -----------------------------------------------------------------------------
530 # LexerReflect()
531 #
532 # This class represents information needed to build a lexer as extracted from a
533 # user's input file.
534 # -----------------------------------------------------------------------------
535 class LexerReflect(object):
536 def __init__(self,ldict,log=None,reflags=0):
537 self.ldict = ldict
538 self.error_func = None
539 self.tokens = []
540 self.reflags = reflags
541 self.stateinfo = { 'INITIAL' : 'inclusive'}
542 self.files = {}
543 self.error = 0
544
545 if log is None:
546 self.log = PlyLogger(sys.stderr)
547 else:
548 self.log = log
549
550 # Get all of the basic information
551 def get_all(self):
552 self.get_tokens()
553 self.get_literals()
554 self.get_states()
555 self.get_rules()
556
557 # Validate all of the information
558 def validate_all(self):
559 self.validate_tokens()
560 self.validate_literals()
561 self.validate_rules()
562 return self.error
563
564 # Get the tokens map
565 def get_tokens(self):
566 tokens = self.ldict.get("tokens",None)
567 if not tokens:
568 self.log.error("No token list is defined")
569 self.error = 1
570 return
571
572 if not isinstance(tokens,(list, tuple)):
573 self.log.error("tokens must be a list or tuple")
574 self.error = 1
575 return
576
577 if not tokens:
578 self.log.error("tokens is empty")
579 self.error = 1
580 return
581
582 self.tokens = tokens
583
584 # Validate the tokens
585 def validate_tokens(self):
586 terminals = {}
587 for n in self.tokens:
588 if not _is_identifier.match(n):
589 self.log.error("Bad token name '%s'",n)
590 self.error = 1
591 if n in terminals:
592 self.log.warning("Token '%s' multiply defined", n)
593 terminals[n] = 1
594
595 # Get the literals specifier
596 def get_literals(self):
597 self.literals = self.ldict.get("literals","")
598
599 # Validate literals
600 def validate_literals(self):
601 try:
602 for c in self.literals:
603 if not isinstance(c,StringTypes) or len(c) > 1:
604 self.log.error("Invalid literal %s. Must be a single charact er", repr(c))
605 self.error = 1
606 continue
607
608 except TypeError:
609 self.log.error("Invalid literals specification. literals must be a s equence of characters")
610 self.error = 1
611
612 def get_states(self):
613 self.states = self.ldict.get("states",None)
614 # Build statemap
615 if self.states:
616 if not isinstance(self.states,(tuple,list)):
617 self.log.error("states must be defined as a tuple or list")
618 self.error = 1
619 else:
620 for s in self.states:
621 if not isinstance(s,tuple) or len(s) != 2:
622 self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
623 self.error = 1
624 continue
625 name, statetype = s
626 if not isinstance(name,StringTypes):
627 self.log.error("State name %s must be a string", repr(name))
628 self.error = 1
629 continue
630 if not (statetype == 'inclusive' or statetype == 'exclus ive'):
631 self.log.error("State type for state %s must be ' inclusive' or 'exclusive'",name)
632 self.error = 1
633 continue
634 if name in self.stateinfo:
635 self.log.error("State '%s' already defined",name)
636 self.error = 1
637 continue
638 self.stateinfo[name] = statetype
639
640 # Get all of the symbols with a t_ prefix and sort them into various
641 # categories (functions, strings, error functions, and ignore characters)
642
643 def get_rules(self):
644 tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
645
646 # Now build up a list of functions and a list of strings
647
648 self.toknames = { } # Mapping of symbols to token names
649 self.funcsym = { } # Symbols defined as functions
650 self.strsym = { } # Symbols defined as strings
651 self.ignore = { } # Ignore strings by state
652 self.errorf = { } # Error functions by state
653
654 for s in self.stateinfo:
655 self.funcsym[s] = []
656 self.strsym[s] = []
657
658 if len(tsymbols) == 0:
659 self.log.error("No rules of the form t_rulename are defined")
660 self.error = 1
661 return
662
663 for f in tsymbols:
664 t = self.ldict[f]
665 states, tokname = _statetoken(f,self.stateinfo)
666 self.toknames[f] = tokname
667
668 if hasattr(t,"__call__"):
669 if tokname == 'error':
670 for s in states:
671 self.errorf[s] = t
672 elif tokname == 'ignore':
673 line = func_code(t).co_firstlineno
674 file = func_code(t).co_filename
675 self.log.error("%s:%d: Rule '%s' must be defined as a string ",file,line,t.__name__)
676 self.error = 1
677 else:
678 for s in states:
679 self.funcsym[s].append((f,t))
680 elif isinstance(t, StringTypes):
681 if tokname == 'ignore':
682 for s in states:
683 self.ignore[s] = t
684 if "\\" in t:
685 self.log.warning("%s contains a literal backslash '\\'", f)
686
687 elif tokname == 'error':
688 self.log.error("Rule '%s' must be defined as a function", f)
689 self.error = 1
690 else:
691 for s in states:
692 self.strsym[s].append((f,t))
693 else:
694 self.log.error("%s not defined as a function or string", f)
695 self.error = 1
696
697 # Sort the functions by line number
698 for f in self.funcsym.values():
699 if sys.version_info[0] < 3:
700 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code( y[1]).co_firstlineno))
701 else:
702 # Python 3.0
703 f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
704
705 # Sort the strings by regular expression length
706 for s in self.strsym.values():
707 if sys.version_info[0] < 3:
708 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[ 1])))
709 else:
710 # Python 3.0
711 s.sort(key=lambda x: len(x[1]),reverse=True)
712
713 # Validate all of the t_rules collected
714 def validate_rules(self):
715 for state in self.stateinfo:
716 # Validate all rules defined by functions
717
718
719
720 for fname, f in self.funcsym[state]:
721 line = func_code(f).co_firstlineno
722 file = func_code(f).co_filename
723 self.files[file] = 1
724
725 tokname = self.toknames[fname]
726 if isinstance(f, types.MethodType):
727 reqargs = 2
728 else:
729 reqargs = 1
730 nargs = func_code(f).co_argcount
731 if nargs > reqargs:
732 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)
733 self.error = 1
734 continue
735
736 if nargs < reqargs:
737 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)
738 self.error = 1
739 continue
740
741 if not f.__doc__:
742 self.log.error("%s:%d: No regular expression defined for rul e '%s'",file,line,f.__name__)
743 self.error = 1
744 continue
745
746 try:
747 c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
748 if c.match(""):
749 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
750 self.error = 1
751 except re.error:
752 _etype, e, _etrace = sys.exc_info()
753 self.log.error("%s:%d: Invalid regular expression for rule ' %s'. %s", file,line,f.__name__,e)
754 if '#' in f.__doc__:
755 self.log.error("%s:%d. Make sure '#' in rule '%s' is esc aped with '\\#'",file,line, f.__name__)
756 self.error = 1
757
758 # Validate all rules defined by strings
759 for name,r in self.strsym[state]:
760 tokname = self.toknames[name]
761 if tokname == 'error':
762 self.log.error("Rule '%s' must be defined as a function", na me)
763 self.error = 1
764 continue
765
766 if not tokname in self.tokens and tokname.find("ignore_") < 0:
767 self.log.error("Rule '%s' defined for an unspecified token % s",name,tokname)
768 self.error = 1
769 continue
770
771 try:
772 c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.ref lags)
773 if (c.match("")):
774 self.log.error("Regular expression for rule '%s' matche s empty string",name)
775 self.error = 1
776 except re.error:
777 _etype, e, _etrace = sys.exc_info()
778 self.log.error("Invalid regular expression for rule '%s'. %s ",name,e)
779 if '#' in r:
780 self.log.error("Make sure '#' in rule '%s' is escaped w ith '\\#'",name)
781 self.error = 1
782
783 if not self.funcsym[state] and not self.strsym[state]:
784 self.log.error("No rules defined for state '%s'",state)
785 self.error = 1
786
787 # Validate the error function
788 efunc = self.errorf.get(state,None)
789 if efunc:
790 f = efunc
791 line = func_code(f).co_firstlineno
792 file = func_code(f).co_filename
793 self.files[file] = 1
794
795 if isinstance(f, types.MethodType):
796 reqargs = 2
797 else:
798 reqargs = 1
799 nargs = func_code(f).co_argcount
800 if nargs > reqargs:
801 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)
802 self.error = 1
803
804 if nargs < reqargs:
805 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)
806 self.error = 1
807
808 for f in self.files:
809 self.validate_file(f)
810
811
812 # -------------------------------------------------------------------------- ---
813 # validate_file()
814 #
815 # This checks to see if there are duplicated t_rulename() functions or strin gs
816 # in the parser input file. This is done using a simple regular expression
817 # match on each line in the given file.
818 # -------------------------------------------------------------------------- ---
819
820 def validate_file(self,filename):
821 import os.path
822 base,ext = os.path.splitext(filename)
823 if ext != '.py': return # No idea what the file is. Return OK
824
825 try:
826 f = open(filename)
827 lines = f.readlines()
828 f.close()
829 except IOError:
830 return # Couldn't find the file. Don't worry a bout it
831
832 fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
833 sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
834
835 counthash = { }
836 linen = 1
837 for l in lines:
838 m = fre.match(l)
839 if not m:
840 m = sre.match(l)
841 if m:
842 name = m.group(1)
843 prev = counthash.get(name)
844 if not prev:
845 counthash[name] = linen
846 else:
847 self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
848 self.error = 1
849 linen += 1
850
851 # -----------------------------------------------------------------------------
852 # lex(module)
853 #
854 # Build all of the regular expression rules from definitions in the supplied mod ule
855 # -----------------------------------------------------------------------------
856 def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now arn=0,outputdir="", debuglog=None, errorlog=None):
857 global lexer
858 ldict = None
859 stateinfo = { 'INITIAL' : 'inclusive'}
860 lexobj = Lexer()
861 lexobj.lexoptimize = optimize
862 global token,input
863
864 if errorlog is None:
865 errorlog = PlyLogger(sys.stderr)
866
867 if debug:
868 if debuglog is None:
869 debuglog = PlyLogger(sys.stderr)
870
871 # Get the module dictionary used for the lexer
872 if object: module = object
873
874 if module:
875 _items = [(k,getattr(module,k)) for k in dir(module)]
876 ldict = dict(_items)
877 else:
878 ldict = get_caller_module_dict(2)
879
880 # Collect parser information from the dictionary
881 linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
882 linfo.get_all()
883 if not optimize:
884 if linfo.validate_all():
885 raise SyntaxError("Can't build lexer")
886
887 if optimize and lextab:
888 try:
889 lexobj.readtab(lextab,ldict)
890 token = lexobj.token
891 input = lexobj.input
892 lexer = lexobj
893 return lexobj
894
895 except ImportError:
896 pass
897
898 # Dump some basic debugging information
899 if debug:
900 debuglog.info("lex: tokens = %r", linfo.tokens)
901 debuglog.info("lex: literals = %r", linfo.literals)
902 debuglog.info("lex: states = %r", linfo.stateinfo)
903
904 # Build a dictionary of valid token names
905 lexobj.lextokens = { }
906 for n in linfo.tokens:
907 lexobj.lextokens[n] = 1
908
909 # Get literals specification
910 if isinstance(linfo.literals,(list,tuple)):
911 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
912 else:
913 lexobj.lexliterals = linfo.literals
914
915 # Get the stateinfo dictionary
916 stateinfo = linfo.stateinfo
917
918 regexs = { }
919 # Build the master regular expressions
920 for state in stateinfo:
921 regex_list = []
922
923 # Add rules defined by functions first
924 for fname, f in linfo.funcsym[state]:
925 line = func_code(f).co_firstlineno
926 file = func_code(f).co_filename
927 regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
928 if debug:
929 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f .__doc__, state)
930
931 # Now add all of the simple rules
932 for name,r in linfo.strsym[state]:
933 regex_list.append("(?P<%s>%s)" % (name,r))
934 if debug:
935 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
936
937 regexs[state] = regex_list
938
939 # Build the master regular expressions
940
941 if debug:
942 debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
943
944 for state in regexs:
945 lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,l info.toknames)
946 lexobj.lexstatere[state] = lexre
947 lexobj.lexstateretext[state] = re_text
948 lexobj.lexstaterenames[state] = re_names
949 if debug:
950 for i in range(len(re_text)):
951 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_ text[i])
952
953 # For inclusive states, we need to add the regular expressions from the INIT IAL state
954 for state,stype in stateinfo.items():
955 if state != "INITIAL" and stype == 'inclusive':
956 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
957 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL' ])
958 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIA L'])
959
960 lexobj.lexstateinfo = stateinfo
961 lexobj.lexre = lexobj.lexstatere["INITIAL"]
962 lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
963
964 # Set up ignore variables
965 lexobj.lexstateignore = linfo.ignore
966 lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
967
968 # Set up error functions
969 lexobj.lexstateerrorf = linfo.errorf
970 lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
971 if not lexobj.lexerrorf:
972 errorlog.warning("No t_error rule is defined")
973
974 # Check state information for ignore and error rules
975 for s,stype in stateinfo.items():
976 if stype == 'exclusive':
977 if not s in linfo.errorf:
978 errorlog.warning("No error rule is defined for exclusive stat e '%s'", s)
979 if not s in linfo.ignore and lexobj.lexignore:
980 errorlog.warning("No ignore rule is defined for exclusive sta te '%s'", s)
981 elif stype == 'inclusive':
982 if not s in linfo.errorf:
983 linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
984 if not s in linfo.ignore:
985 linfo.ignore[s] = linfo.ignore.get("INITIAL","")
986
987 # Create global versions of the token() and input() functions
988 token = lexobj.token
989 input = lexobj.input
990 lexer = lexobj
991
992 # If in optimize mode, we write the lextab
993 if lextab and optimize:
994 lexobj.writetab(lextab,outputdir)
995
996 return lexobj
997
998 # -----------------------------------------------------------------------------
999 # runmain()
1000 #
1001 # This runs the lexer as a main program
1002 # -----------------------------------------------------------------------------
1003
1004 def runmain(lexer=None,data=None):
1005 if not data:
1006 try:
1007 filename = sys.argv[1]
1008 f = open(filename)
1009 data = f.read()
1010 f.close()
1011 except IndexError:
1012 sys.stdout.write("Reading from standard input (type EOF to end):\n")
1013 data = sys.stdin.read()
1014
1015 if lexer:
1016 _input = lexer.input
1017 else:
1018 _input = input
1019 _input(data)
1020 if lexer:
1021 _token = lexer.token
1022 else:
1023 _token = token
1024
1025 while 1:
1026 tok = _token()
1027 if not tok: break
1028 sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,to k.lexpos))
1029
1030 # -----------------------------------------------------------------------------
1031 # @TOKEN(regex)
1032 #
1033 # This decorator function can be used to set the regex expression on a function
1034 # when its docstring might need to be set in an alternative way
1035 # -----------------------------------------------------------------------------
1036
1037 def TOKEN(r):
1038 def set_doc(f):
1039 if hasattr(r,"__call__"):
1040 f.__doc__ = r.__doc__
1041 else:
1042 f.__doc__ = r
1043 return f
1044 return set_doc
1045
1046 # Alternative spelling of the TOKEN decorator
1047 Token = TOKEN
1048
OLDNEW
« no previous file with comments | « tools/nixysa/third_party/ply-3.1/ply/ctokens.py ('k') | tools/nixysa/third_party/ply-3.1/ply/yacc.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698