tools/nixysa/third_party/ply-3.1/ply/lex.py - Issue 2043006: WTF NPAPI extension. Early draft.

Side by Side Diff: tools/nixysa/third_party/ply-3.1/ply/lex.py

Issue 2043006: WTF NPAPI extension. Early draft. Base URL: http://src.chromium.org/svn/trunk/src/

Patch Set: Created 10 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 # -----------------------------------------------------------------------------

	2 # ply: lex.py

	3 #

	4 # Author: David M. Beazley (dave@dabeaz.com)

	5 #

	6 # Copyright (C) 2001-2009, David M. Beazley

	7 #

	8 # This library is free software; you can redistribute it and/or

	9 # modify it under the terms of the GNU Lesser General Public

	10 # License as published by the Free Software Foundation; either

	11 # version 2.1 of the License, or (at your option) any later version.

	12 #

	13 # This library is distributed in the hope that it will be useful,

	14 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

	16 # Lesser General Public License for more details.

	17 #

	18 # You should have received a copy of the GNU Lesser General Public

	19 # License along with this library; if not, write to the Free Software

	20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

	21 #

	22 # See the file COPYING for a complete copy of the LGPL.

	23 # -----------------------------------------------------------------------------

	24

	25 __version__ = "3.0"

	26 __tabversion__ = "3.0" # Version of table file used

	27

	28 import re, sys, types, copy, os

	29

	30 # This tuple contains known string types

	31 try:

	32 # Python 2.6

	33 StringTypes = (types.StringType, types.UnicodeType)

	34 except AttributeError:

	35 # Python 3.0

	36 StringTypes = (str, bytes)

	37

	38 # Extract the code attribute of a function. Different implementations

	39 # are for Python 2/3 compatibility.

	40

	41 if sys.version_info[0] < 3:

	42 def func_code(f):

	43 return f.func_code

	44 else:

	45 def func_code(f):

	46 return f.__code__

	47

	48 # This regular expression is used to match valid token names

	49 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

	50

	51 # Exception thrown when invalid token encountered and no default error

	52 # handler is defined.

	53

	54 class LexError(Exception):

	55 def __init__(self,message,s):

	56 self.args = (message,)

	57 self.text = s

	58

	59 # Token class. This class is used to represent the tokens produced.

	60 class LexToken(object):

	61 def __str__(self):

	62 return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self. lexpos)

	63 def __repr__(self):

	64 return str(self)

	65

	66 # This object is a stand-in for a logging object created by the

	67 # logging module.

	68

	69 class PlyLogger(object):

	70 def __init__(self,f):

	71 self.f = f

	72 def critical(self,msg,args,*kwargs):

	73 self.f.write((msg % args) + "\n")

	74

	75 def warning(self,msg,args,*kwargs):

	76 self.f.write("WARNING: "+ (msg % args) + "\n")

	77

	78 def error(self,msg,args,*kwargs):

	79 self.f.write("ERROR: " + (msg % args) + "\n")

	80

	81 info = critical

	82 debug = critical

	83

	84 # Null logger is used when no output is generated. Does nothing.

	85 class NullLogger(object):

	86 def __getattribute__(self,name):

	87 return self

	88 def __call__(self,args,*kwargs):

	89 return self

	90

	91 # -----------------------------------------------------------------------------

	92 # === Lexing Engine ===

	93 #

	94 # The following Lexer class implements the lexer runtime. There are only

	95 # a few public methods and attributes:

	96 #

	97 # input() - Store a new string in the lexer

	98 # token() - Get the next token

	99 # clone() - Clone the lexer

	100 #

	101 # lineno - Current line number

	102 # lexpos - Current position in the input string

	103 # -----------------------------------------------------------------------------

	104

	105 class Lexer:

	106 def __init__(self):

	107 self.lexre = None # Master regular expression. This is a lis t of

	108 # tuples (re,findex) where re is a compile d

	109 # regular expression and findex is a list

	110 # mapping regex group numbers to rules

	111 self.lexretext = None # Current regular expression strings

	112 self.lexstatere = {} # Dictionary mapping lexer states to maste r regexs

	113 self.lexstateretext = {} # Dictionary mapping lexer states to regex strings

	114 self.lexstaterenames = {} # Dictionary mapping lexer states to symbo l names

	115 self.lexstate = "INITIAL" # Current lexer state

	116 self.lexstatestack = [] # Stack of lexer states

	117 self.lexstateinfo = None # State information

	118 self.lexstateignore = {} # Dictionary of ignored characters for eac h state

	119 self.lexstateerrorf = {} # Dictionary of error functions for each s tate

	120 self.lexreflags = 0 # Optional re compile flags

	121 self.lexdata = None # Actual input data (as a string)

	122 self.lexpos = 0 # Current position in input text

	123 self.lexlen = 0 # Length of the input text

	124 self.lexerrorf = None # Error rule (if any)

	125 self.lextokens = None # List of valid tokens

	126 self.lexignore = "" # Ignored characters

	127 self.lexliterals = "" # Literal characters that can be passed th rough

	128 self.lexmodule = None # Module

	129 self.lineno = 1 # Current line number

	130 self.lexoptimize = 0 # Optimized mode

	131

	132 def clone(self,object=None):

	133 c = copy.copy(self)

	134

	135 # If the object parameter has been supplied, it means we are attaching t he

	136 # lexer to a new object. In this case, we have to rebind all methods in

	137 # the lexstatere and lexstateerrorf tables.

	138

	139 if object:

	140 newtab = { }

	141 for key, ritem in self.lexstatere.items():

	142 newre = []

	143 for cre, findex in ritem:

	144 newfindex = []

	145 for f in findex:

	146 if not f or not f[0]:

	147 newfindex.append(f)

	148 continue

	149 newfindex.append((getattr(object,f[0].__name__),f[1]))

	150 newre.append((cre,newfindex))

	151 newtab[key] = newre

	152 c.lexstatere = newtab

	153 c.lexstateerrorf = { }

	154 for key, ef in self.lexstateerrorf.items():

	155 c.lexstateerrorf[key] = getattr(object,ef.__name__)

	156 c.lexmodule = object

	157 return c

	158

	159 # ------------------------------------------------------------

	160 # writetab() - Write lexer information to a table file

	161 # ------------------------------------------------------------

	162 def writetab(self,tabfile,outputdir=""):

	163 if isinstance(tabfile,types.ModuleType):

	164 return

	165 basetabfilename = tabfile.split(".")[-1]

	166 filename = os.path.join(outputdir,basetabfilename)+".py"

	167 tf = open(filename,"w")

	168 tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))

	169 tf.write("_tabversion = %s\n" % repr(__version__))

	170 tf.write("_lextokens = %s\n" % repr(self.lextokens))

	171 tf.write("_lexreflags = %s\n" % repr(self.lexreflags))

	172 tf.write("_lexliterals = %s\n" % repr(self.lexliterals))

	173 tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))

	174

	175 tabre = { }

	176 # Collect all functions in the initial state

	177 initial = self.lexstatere["INITIAL"]

	178 initialfuncs = []

	179 for part in initial:

	180 for f in part[1]:

	181 if f and f[0]:

	182 initialfuncs.append(f)

	183

	184 for key, lre in self.lexstatere.items():

	185 titem = []

	186 for i in range(len(lre)):

	187 titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[ i][1],self.lexstaterenames[key][i])))

	188 tabre[key] = titem

	189

	190 tf.write("_lexstatere = %s\n" % repr(tabre))

	191 tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))

	192

	193 taberr = { }

	194 for key, ef in self.lexstateerrorf.items():

	195 if ef:

	196 taberr[key] = ef.__name__

	197 else:

	198 taberr[key] = None

	199 tf.write("_lexstateerrorf = %s\n" % repr(taberr))

	200 tf.close()

	201

	202 # ------------------------------------------------------------

	203 # readtab() - Read lexer information from a tab file

	204 # ------------------------------------------------------------

	205 def readtab(self,tabfile,fdict):

	206 if isinstance(tabfile,types.ModuleType):

	207 lextab = tabfile

	208 else:

	209 if sys.version_info[0] < 3:

	210 exec("import %s as lextab" % tabfile)

	211 else:

	212 env = { }

	213 exec("import %s as lextab" % tabfile, env,env)

	214 lextab = env['lextab']

	215

	216 if getattr(lextab,"_tabversion","0.0") != __version__:

	217 raise ImportError("Inconsistent PLY version")

	218

	219 self.lextokens = lextab._lextokens

	220 self.lexreflags = lextab._lexreflags

	221 self.lexliterals = lextab._lexliterals

	222 self.lexstateinfo = lextab._lexstateinfo

	223 self.lexstateignore = lextab._lexstateignore

	224 self.lexstatere = { }

	225 self.lexstateretext = { }

	226 for key,lre in lextab._lexstatere.items():

	227 titem = []

	228 txtitem = []

	229 for i in range(len(lre)):

	230 titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_ to_funcs(lre[i][1],fdict)))

	231 txtitem.append(lre[i][0])

	232 self.lexstatere[key] = titem

	233 self.lexstateretext[key] = txtitem

	234 self.lexstateerrorf = { }

	235 for key,ef in lextab._lexstateerrorf.items():

	236 self.lexstateerrorf[key] = fdict[ef]

	237 self.begin('INITIAL')

	238

	239 # ------------------------------------------------------------

	240 # input() - Push a new string into the lexer

	241 # ------------------------------------------------------------

	242 def input(self,s):

	243 # Pull off the first character to see if s looks like a string

	244 c = s[:1]

	245 if not isinstance(c,StringTypes):

	246 raise ValueError("Expected a string")

	247 self.lexdata = s

	248 self.lexpos = 0

	249 self.lexlen = len(s)

	250

	251 # ------------------------------------------------------------

	252 # begin() - Changes the lexing state

	253 # ------------------------------------------------------------

	254 def begin(self,state):

	255 if not state in self.lexstatere:

	256 raise ValueError("Undefined state")

	257 self.lexre = self.lexstatere[state]

	258 self.lexretext = self.lexstateretext[state]

	259 self.lexignore = self.lexstateignore.get(state,"")

	260 self.lexerrorf = self.lexstateerrorf.get(state,None)

	261 self.lexstate = state

	262

	263 # ------------------------------------------------------------

	264 # push_state() - Changes the lexing state and saves old on stack

	265 # ------------------------------------------------------------

	266 def push_state(self,state):

	267 self.lexstatestack.append(self.lexstate)

	268 self.begin(state)

	269

	270 # ------------------------------------------------------------

	271 # pop_state() - Restores the previous state

	272 # ------------------------------------------------------------

	273 def pop_state(self):

	274 self.begin(self.lexstatestack.pop())

	275

	276 # ------------------------------------------------------------

	277 # current_state() - Returns the current lexing state

	278 # ------------------------------------------------------------

	279 def current_state(self):

	280 return self.lexstate

	281

	282 # ------------------------------------------------------------

	283 # skip() - Skip ahead n characters

	284 # ------------------------------------------------------------

	285 def skip(self,n):

	286 self.lexpos += n

	287

	288 # ------------------------------------------------------------

	289 # opttoken() - Return the next token from the Lexer

	290 #

	291 # Note: This function has been carefully implemented to be as fast

	292 # as possible. Don't make changes unless you really know what

	293 # you are doing

	294 # ------------------------------------------------------------

	295 def token(self):

	296 # Make local copies of frequently referenced attributes

	297 lexpos = self.lexpos

	298 lexlen = self.lexlen

	299 lexignore = self.lexignore

	300 lexdata = self.lexdata

	301

	302 while lexpos < lexlen:

	303 # This code provides some short-circuit code for whitespace, tabs, a nd other ignored characters

	304 if lexdata[lexpos] in lexignore:

	305 lexpos += 1

	306 continue

	307

	308 # Look for a regular expression match

	309 for lexre,lexindexfunc in self.lexre:

	310 m = lexre.match(lexdata,lexpos)

	311 if not m: continue

	312

	313 # Create a token for return

	314 tok = LexToken()

	315 tok.value = m.group()

	316 tok.lineno = self.lineno

	317 tok.lexpos = lexpos

	318

	319 i = m.lastindex

	320 func,tok.type = lexindexfunc[i]

	321

	322 if not func:

	323 # If no token type was set, it's an ignored token

	324 if tok.type:

	325 self.lexpos = m.end()

	326 return tok

	327 else:

	328 lexpos = m.end()

	329 break

	330

	331 lexpos = m.end()

	332

	333 # If token is processed by a function, call it

	334

	335 tok.lexer = self # Set additional attributes useful in toke n rules

	336 self.lexmatch = m

	337 self.lexpos = lexpos

	338

	339 newtok = func(tok)

	340

	341 # Every function must return a token, if nothing, we just move t o next token

	342 if not newtok:

	343 lexpos = self.lexpos # This is here in case user has updated lexpos.

	344 lexignore = self.lexignore # This is here in case there was a state change

	345 break

	346

	347 # Verify type of the token. If not in the token map, raise an e rror

	348 if not self.lexoptimize:

	349 if not newtok.type in self.lextokens:

	350 raise LexError("%s:%d: Rule '%s' returned an unknown tok en type '%s'" % (

	351 func_code(func).co_filename, func_code(func).co_firs tlineno,

	352 func.__name__, newtok.type),lexdata[lexpos:])

	353

	354 return newtok

	355 else:

	356 # No match, see if in literals

	357 if lexdata[lexpos] in self.lexliterals:

	358 tok = LexToken()

	359 tok.value = lexdata[lexpos]

	360 tok.lineno = self.lineno

	361 tok.type = tok.value

	362 tok.lexpos = lexpos

	363 self.lexpos = lexpos + 1

	364 return tok

	365

	366 # No match. Call t_error() if defined.

	367 if self.lexerrorf:

	368 tok = LexToken()

	369 tok.value = self.lexdata[lexpos:]

	370 tok.lineno = self.lineno

	371 tok.type = "error"

	372 tok.lexer = self

	373 tok.lexpos = lexpos

	374 self.lexpos = lexpos

	375 newtok = self.lexerrorf(tok)

	376 if lexpos == self.lexpos:

	377 # Error method didn't change text position at all. This is an error.

	378 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])

	379 lexpos = self.lexpos

	380 if not newtok: continue

	381 return newtok

	382

	383 self.lexpos = lexpos

	384 raise LexError("Illegal character '%s' at index %d" % (lexdata[l expos],lexpos), lexdata[lexpos:])

	385

	386 self.lexpos = lexpos + 1

	387 if self.lexdata is None:

	388 raise RuntimeError("No input string given with input()")

	389 return None

	390

	391 # Iterator interface

	392 def __iter__(self):

	393 return self

	394

	395 def next(self):

	396 t = self.token()

	397 if t is None:

	398 raise StopIteration

	399 return t

	400

	401 __next__ = next

	402

	403 # -----------------------------------------------------------------------------

	404 # ==== Lex Builder ===

	405 #

	406 # The functions and classes below are used to collect lexing information

	407 # and build a Lexer object from it.

	408 # -----------------------------------------------------------------------------

	409

	410 # -----------------------------------------------------------------------------

	411 # get_caller_module_dict()

	412 #

	413 # This function returns a dictionary containing all of the symbols defined withi n

	414 # a caller further down the call stack. This is used to get the environment

	415 # associated with the yacc() call if none was provided.

	416 # -----------------------------------------------------------------------------

	417

	418 def get_caller_module_dict(levels):

	419 try:

	420 raise RuntimeError

	421 except RuntimeError:

	422 e,b,t = sys.exc_info()

	423 f = t.tb_frame

	424 while levels > 0:

	425 f = f.f_back

	426 levels -= 1

	427 ldict = f.f_globals.copy()

	428 if f.f_globals != f.f_locals:

	429 ldict.update(f.f_locals)

	430

	431 return ldict

	432

	433 # -----------------------------------------------------------------------------

	434 # _funcs_to_names()

	435 #

	436 # Given a list of regular expression functions, this converts it to a list

	437 # suitable for output to a table file

	438 # -----------------------------------------------------------------------------

	439

	440 def _funcs_to_names(funclist,namelist):

	441 result = []

	442 for f,name in zip(funclist,namelist):

	443 if f and f[0]:

	444 result.append((name, f[1]))

	445 else:

	446 result.append(f)

	447 return result

	448

	449 # -----------------------------------------------------------------------------

	450 # _names_to_funcs()

	451 #

	452 # Given a list of regular expression function names, this converts it back to

	453 # functions.

	454 # -----------------------------------------------------------------------------

	455

	456 def _names_to_funcs(namelist,fdict):

	457 result = []

	458 for n in namelist:

	459 if n and n[0]:

	460 result.append((fdict[n[0]],n[1]))

	461 else:

	462 result.append(n)

	463 return result

	464

	465 # -----------------------------------------------------------------------------

	466 # _form_master_re()

	467 #

	468 # This function takes a list of all of the regex components and attempts to

	469 # form the master regular expression. Given limitations in the Python re

	470 # module, it may be necessary to break the master regex into separate expression s.

	471 # -----------------------------------------------------------------------------

	472

	473 def _form_master_re(relist,reflags,ldict,toknames):

	474 if not relist: return []

	475 regex = "\|".join(relist)

	476 try:

	477 lexre = re.compile(regex,re.VERBOSE \| reflags)

	478

	479 # Build the index to function map for the matching engine

	480 lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)

	481 lexindexnames = lexindexfunc[:]

	482

	483 for f,i in lexre.groupindex.items():

	484 handle = ldict.get(f,None)

	485 if type(handle) in (types.FunctionType, types.MethodType):

	486 lexindexfunc[i] = (handle,toknames[f])

	487 lexindexnames[i] = f

	488 elif handle is not None:

	489 lexindexnames[i] = f

	490 if f.find("ignore_") > 0:

	491 lexindexfunc[i] = (None,None)

	492 else:

	493 lexindexfunc[i] = (None, toknames[f])

	494

	495 return [(lexre,lexindexfunc)],[regex],[lexindexnames]

	496 except Exception:

	497 m = int(len(relist)/2)

	498 if m == 0: m = 1

	499 llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)

	500 rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)

	501 return llist+rlist, lre+rre, lnames+rnames

	502

	503 # -----------------------------------------------------------------------------

	504 # def _statetoken(s,names)

	505 #

	506 # Given a declaration name s of the form "t_" and a dictionary whose keys are

	507 # state names, this function returns a tuple (states,tokenname) where states

	508 # is a tuple of state names and tokenname is the name of the token. For example ,

	509 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')

	510 # -----------------------------------------------------------------------------

	511

	512 def _statetoken(s,names):

	513 nonstate = 1

	514 parts = s.split("_")

	515 for i in range(1,len(parts)):

	516 if not parts[i] in names and parts[i] != 'ANY': break

	517 if i > 1:

	518 states = tuple(parts[1:i])

	519 else:

	520 states = ('INITIAL',)

	521

	522 if 'ANY' in states:

	523 states = tuple(names)

	524

	525 tokenname = "_".join(parts[i:])

	526 return (states,tokenname)

	527

	528

	529 # -----------------------------------------------------------------------------

	530 # LexerReflect()

	531 #

	532 # This class represents information needed to build a lexer as extracted from a

	533 # user's input file.

	534 # -----------------------------------------------------------------------------

	535 class LexerReflect(object):

	536 def __init__(self,ldict,log=None,reflags=0):

	537 self.ldict = ldict

	538 self.error_func = None

	539 self.tokens = []

	540 self.reflags = reflags

	541 self.stateinfo = { 'INITIAL' : 'inclusive'}

	542 self.files = {}

	543 self.error = 0

	544

	545 if log is None:

	546 self.log = PlyLogger(sys.stderr)

	547 else:

	548 self.log = log

	549

	550 # Get all of the basic information

	551 def get_all(self):

	552 self.get_tokens()

	553 self.get_literals()

	554 self.get_states()

	555 self.get_rules()

	556

	557 # Validate all of the information

	558 def validate_all(self):

	559 self.validate_tokens()

	560 self.validate_literals()

	561 self.validate_rules()

	562 return self.error

	563

	564 # Get the tokens map

	565 def get_tokens(self):

	566 tokens = self.ldict.get("tokens",None)

	567 if not tokens:

	568 self.log.error("No token list is defined")

	569 self.error = 1

	570 return

	571

	572 if not isinstance(tokens,(list, tuple)):

	573 self.log.error("tokens must be a list or tuple")

	574 self.error = 1

	575 return

	576

	577 if not tokens:

	578 self.log.error("tokens is empty")

	579 self.error = 1

	580 return

	581

	582 self.tokens = tokens

	583

	584 # Validate the tokens

	585 def validate_tokens(self):

	586 terminals = {}

	587 for n in self.tokens:

	588 if not _is_identifier.match(n):

	589 self.log.error("Bad token name '%s'",n)

	590 self.error = 1

	591 if n in terminals:

	592 self.log.warning("Token '%s' multiply defined", n)

	593 terminals[n] = 1

	594

	595 # Get the literals specifier

	596 def get_literals(self):

	597 self.literals = self.ldict.get("literals","")

	598

	599 # Validate literals

	600 def validate_literals(self):

	601 try:

	602 for c in self.literals:

	603 if not isinstance(c,StringTypes) or len(c) > 1:

	604 self.log.error("Invalid literal %s. Must be a single charact er", repr(c))

	605 self.error = 1

	606 continue

	607

	608 except TypeError:

	609 self.log.error("Invalid literals specification. literals must be a s equence of characters")

	610 self.error = 1

	611

	612 def get_states(self):

	613 self.states = self.ldict.get("states",None)

	614 # Build statemap

	615 if self.states:

	616 if not isinstance(self.states,(tuple,list)):

	617 self.log.error("states must be defined as a tuple or list")

	618 self.error = 1

	619 else:

	620 for s in self.states:

	621 if not isinstance(s,tuple) or len(s) != 2:

	622 self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive\|inclusive')",repr(s))

	623 self.error = 1

	624 continue

	625 name, statetype = s

	626 if not isinstance(name,StringTypes):

	627 self.log.error("State name %s must be a string", repr(name))

	628 self.error = 1

	629 continue

	630 if not (statetype == 'inclusive' or statetype == 'exclus ive'):

	631 self.log.error("State type for state %s must be ' inclusive' or 'exclusive'",name)

	632 self.error = 1

	633 continue

	634 if name in self.stateinfo:

	635 self.log.error("State '%s' already defined",name)

	636 self.error = 1

	637 continue

	638 self.stateinfo[name] = statetype

	639

	640 # Get all of the symbols with a t_ prefix and sort them into various

	641 # categories (functions, strings, error functions, and ignore characters)

	642

	643 def get_rules(self):

	644 tsymbols = [f for f in self.ldict if f[:2] == 't_' ]

	645

	646 # Now build up a list of functions and a list of strings

	647

	648 self.toknames = { } # Mapping of symbols to token names

	649 self.funcsym = { } # Symbols defined as functions

	650 self.strsym = { } # Symbols defined as strings

	651 self.ignore = { } # Ignore strings by state

	652 self.errorf = { } # Error functions by state

	653

	654 for s in self.stateinfo:

	655 self.funcsym[s] = []

	656 self.strsym[s] = []

	657

	658 if len(tsymbols) == 0:

	659 self.log.error("No rules of the form t_rulename are defined")

	660 self.error = 1

	661 return

	662

	663 for f in tsymbols:

	664 t = self.ldict[f]

	665 states, tokname = _statetoken(f,self.stateinfo)

	666 self.toknames[f] = tokname

	667

	668 if hasattr(t,"__call__"):

	669 if tokname == 'error':

	670 for s in states:

	671 self.errorf[s] = t

	672 elif tokname == 'ignore':

	673 line = func_code(t).co_firstlineno

	674 file = func_code(t).co_filename

	675 self.log.error("%s:%d: Rule '%s' must be defined as a string ",file,line,t.__name__)

	676 self.error = 1

	677 else:

	678 for s in states:

	679 self.funcsym[s].append((f,t))

	680 elif isinstance(t, StringTypes):

	681 if tokname == 'ignore':

	682 for s in states:

	683 self.ignore[s] = t

	684 if "\\" in t:

	685 self.log.warning("%s contains a literal backslash '\\'", f)

	686

	687 elif tokname == 'error':

	688 self.log.error("Rule '%s' must be defined as a function", f)

	689 self.error = 1

	690 else:

	691 for s in states:

	692 self.strsym[s].append((f,t))

	693 else:

	694 self.log.error("%s not defined as a function or string", f)

	695 self.error = 1

	696

	697 # Sort the functions by line number

	698 for f in self.funcsym.values():

	699 if sys.version_info[0] < 3:

	700 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code( y[1]).co_firstlineno))

	701 else:

	702 # Python 3.0

	703 f.sort(key=lambda x: func_code(x[1]).co_firstlineno)

	704

	705 # Sort the strings by regular expression length

	706 for s in self.strsym.values():

	707 if sys.version_info[0] < 3:

	708 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[ 1])))

	709 else:

	710 # Python 3.0

	711 s.sort(key=lambda x: len(x[1]),reverse=True)

	712

	713 # Validate all of the t_rules collected

	714 def validate_rules(self):

	715 for state in self.stateinfo:

	716 # Validate all rules defined by functions

	717

	718

	719

	720 for fname, f in self.funcsym[state]:

	721 line = func_code(f).co_firstlineno

	722 file = func_code(f).co_filename

	723 self.files[file] = 1

	724

	725 tokname = self.toknames[fname]

	726 if isinstance(f, types.MethodType):

	727 reqargs = 2

	728 else:

	729 reqargs = 1

	730 nargs = func_code(f).co_argcount

	731 if nargs > reqargs:

	732 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)

	733 self.error = 1

	734 continue

	735

	736 if nargs < reqargs:

	737 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)

	738 self.error = 1

	739 continue

	740

	741 if not f.__doc__:

	742 self.log.error("%s:%d: No regular expression defined for rul e '%s'",file,line,f.__name__)

	743 self.error = 1

	744 continue

	745

	746 try:

	747 c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE \| self.reflags)

	748 if c.match(""):

	749 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)

	750 self.error = 1

	751 except re.error:

	752 _etype, e, _etrace = sys.exc_info()

	753 self.log.error("%s:%d: Invalid regular expression for rule ' %s'. %s", file,line,f.__name__,e)

	754 if '#' in f.__doc__:

	755 self.log.error("%s:%d. Make sure '#' in rule '%s' is esc aped with '\\#'",file,line, f.__name__)

	756 self.error = 1

	757

	758 # Validate all rules defined by strings

	759 for name,r in self.strsym[state]:

	760 tokname = self.toknames[name]

	761 if tokname == 'error':

	762 self.log.error("Rule '%s' must be defined as a function", na me)

	763 self.error = 1

	764 continue

	765

	766 if not tokname in self.tokens and tokname.find("ignore_") < 0:

	767 self.log.error("Rule '%s' defined for an unspecified token % s",name,tokname)

	768 self.error = 1

	769 continue

	770

	771 try:

	772 c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE \| self.ref lags)

	773 if (c.match("")):

	774 self.log.error("Regular expression for rule '%s' matche s empty string",name)

	775 self.error = 1

	776 except re.error:

	777 _etype, e, _etrace = sys.exc_info()

	778 self.log.error("Invalid regular expression for rule '%s'. %s ",name,e)

	779 if '#' in r:

	780 self.log.error("Make sure '#' in rule '%s' is escaped w ith '\\#'",name)

	781 self.error = 1

	782

	783 if not self.funcsym[state] and not self.strsym[state]:

	784 self.log.error("No rules defined for state '%s'",state)

	785 self.error = 1

	786

	787 # Validate the error function

	788 efunc = self.errorf.get(state,None)

	789 if efunc:

	790 f = efunc

	791 line = func_code(f).co_firstlineno

	792 file = func_code(f).co_filename

	793 self.files[file] = 1

	794

	795 if isinstance(f, types.MethodType):

	796 reqargs = 2

	797 else:

	798 reqargs = 1

	799 nargs = func_code(f).co_argcount

	800 if nargs > reqargs:

	801 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)

	802 self.error = 1

	803

	804 if nargs < reqargs:

	805 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)

	806 self.error = 1

	807

	808 for f in self.files:

	809 self.validate_file(f)

	810

	811

	812 # -------------------------------------------------------------------------- ---

	813 # validate_file()

	814 #

	815 # This checks to see if there are duplicated t_rulename() functions or strin gs

	816 # in the parser input file. This is done using a simple regular expression

	817 # match on each line in the given file.

	818 # -------------------------------------------------------------------------- ---

	819

	820 def validate_file(self,filename):

	821 import os.path

	822 base,ext = os.path.splitext(filename)

	823 if ext != '.py': return # No idea what the file is. Return OK

	824

	825 try:

	826 f = open(filename)

	827 lines = f.readlines()

	828 f.close()

	829 except IOError:

	830 return # Couldn't find the file. Don't worry a bout it

	831

	832 fre = re.compile(r'\sdef\s+(t_[a-zA-Z_0-9])\(')

	833 sre = re.compile(r'\s(t_[a-zA-Z_0-9])\s*=')

	834

	835 counthash = { }

	836 linen = 1

	837 for l in lines:

	838 m = fre.match(l)

	839 if not m:

	840 m = sre.match(l)

	841 if m:

	842 name = m.group(1)

	843 prev = counthash.get(name)

	844 if not prev:

	845 counthash[name] = linen

	846 else:

	847 self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)

	848 self.error = 1

	849 linen += 1

	850

	851 # -----------------------------------------------------------------------------

	852 # lex(module)

	853 #

	854 # Build all of the regular expression rules from definitions in the supplied mod ule

	855 # -----------------------------------------------------------------------------

	856 def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now arn=0,outputdir="", debuglog=None, errorlog=None):

	857 global lexer

	858 ldict = None

	859 stateinfo = { 'INITIAL' : 'inclusive'}

	860 lexobj = Lexer()

	861 lexobj.lexoptimize = optimize

	862 global token,input

	863

	864 if errorlog is None:

	865 errorlog = PlyLogger(sys.stderr)

	866

	867 if debug:

	868 if debuglog is None:

	869 debuglog = PlyLogger(sys.stderr)

	870

	871 # Get the module dictionary used for the lexer

	872 if object: module = object

	873

	874 if module:

	875 _items = [(k,getattr(module,k)) for k in dir(module)]

	876 ldict = dict(_items)

	877 else:

	878 ldict = get_caller_module_dict(2)

	879

	880 # Collect parser information from the dictionary

	881 linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)

	882 linfo.get_all()

	883 if not optimize:

	884 if linfo.validate_all():

	885 raise SyntaxError("Can't build lexer")

	886

	887 if optimize and lextab:

	888 try:

	889 lexobj.readtab(lextab,ldict)

	890 token = lexobj.token

	891 input = lexobj.input

	892 lexer = lexobj

	893 return lexobj

	894

	895 except ImportError:

	896 pass

	897

	898 # Dump some basic debugging information

	899 if debug:

	900 debuglog.info("lex: tokens = %r", linfo.tokens)

	901 debuglog.info("lex: literals = %r", linfo.literals)

	902 debuglog.info("lex: states = %r", linfo.stateinfo)

	903

	904 # Build a dictionary of valid token names

	905 lexobj.lextokens = { }

	906 for n in linfo.tokens:

	907 lexobj.lextokens[n] = 1

	908

	909 # Get literals specification

	910 if isinstance(linfo.literals,(list,tuple)):

	911 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)

	912 else:

	913 lexobj.lexliterals = linfo.literals

	914

	915 # Get the stateinfo dictionary

	916 stateinfo = linfo.stateinfo

	917

	918 regexs = { }

	919 # Build the master regular expressions

	920 for state in stateinfo:

	921 regex_list = []

	922

	923 # Add rules defined by functions first

	924 for fname, f in linfo.funcsym[state]:

	925 line = func_code(f).co_firstlineno

	926 file = func_code(f).co_filename

	927 regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))

	928 if debug:

	929 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f .__doc__, state)

	930

	931 # Now add all of the simple rules

	932 for name,r in linfo.strsym[state]:

	933 regex_list.append("(?P<%s>%s)" % (name,r))

	934 if debug:

	935 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)

	936

	937 regexs[state] = regex_list

	938

	939 # Build the master regular expressions

	940

	941 if debug:

	942 debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")

	943

	944 for state in regexs:

	945 lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,l info.toknames)

	946 lexobj.lexstatere[state] = lexre

	947 lexobj.lexstateretext[state] = re_text

	948 lexobj.lexstaterenames[state] = re_names

	949 if debug:

	950 for i in range(len(re_text)):

	951 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_ text[i])

	952

	953 # For inclusive states, we need to add the regular expressions from the INIT IAL state

	954 for state,stype in stateinfo.items():

	955 if state != "INITIAL" and stype == 'inclusive':

	956 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])

	957 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL' ])

	958 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIA L'])

	959

	960 lexobj.lexstateinfo = stateinfo

	961 lexobj.lexre = lexobj.lexstatere["INITIAL"]

	962 lexobj.lexretext = lexobj.lexstateretext["INITIAL"]

	963

	964 # Set up ignore variables

	965 lexobj.lexstateignore = linfo.ignore

	966 lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")

	967

	968 # Set up error functions

	969 lexobj.lexstateerrorf = linfo.errorf

	970 lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)

	971 if not lexobj.lexerrorf:

	972 errorlog.warning("No t_error rule is defined")

	973

	974 # Check state information for ignore and error rules

	975 for s,stype in stateinfo.items():

	976 if stype == 'exclusive':

	977 if not s in linfo.errorf:

	978 errorlog.warning("No error rule is defined for exclusive stat e '%s'", s)

	979 if not s in linfo.ignore and lexobj.lexignore:

	980 errorlog.warning("No ignore rule is defined for exclusive sta te '%s'", s)

	981 elif stype == 'inclusive':

	982 if not s in linfo.errorf:

	983 linfo.errorf[s] = linfo.errorf.get("INITIAL",None)

	984 if not s in linfo.ignore:

	985 linfo.ignore[s] = linfo.ignore.get("INITIAL","")

	986

	987 # Create global versions of the token() and input() functions

	988 token = lexobj.token

	989 input = lexobj.input

	990 lexer = lexobj

	991

	992 # If in optimize mode, we write the lextab

	993 if lextab and optimize:

	994 lexobj.writetab(lextab,outputdir)

	995

	996 return lexobj

	997

	998 # -----------------------------------------------------------------------------

	999 # runmain()

	1000 #

	1001 # This runs the lexer as a main program

	1002 # -----------------------------------------------------------------------------

	1003

	1004 def runmain(lexer=None,data=None):

	1005 if not data:

	1006 try:

	1007 filename = sys.argv[1]

	1008 f = open(filename)

	1009 data = f.read()

	1010 f.close()

	1011 except IndexError:

	1012 sys.stdout.write("Reading from standard input (type EOF to end):\n")

	1013 data = sys.stdin.read()

	1014

	1015 if lexer:

	1016 _input = lexer.input

	1017 else:

	1018 _input = input

	1019 _input(data)

	1020 if lexer:

	1021 _token = lexer.token

	1022 else:

	1023 _token = token

	1024

	1025 while 1:

	1026 tok = _token()

	1027 if not tok: break

	1028 sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,to k.lexpos))

	1029

	1030 # -----------------------------------------------------------------------------

	1031 # @TOKEN(regex)

	1032 #

	1033 # This decorator function can be used to set the regex expression on a function

	1034 # when its docstring might need to be set in an alternative way

	1035 # -----------------------------------------------------------------------------

	1036

	1037 def TOKEN(r):

	1038 def set_doc(f):

	1039 if hasattr(r,"__call__"):

	1040 f.__doc__ = r.__doc__

	1041 else:

	1042 f.__doc__ = r

	1043 return f

	1044 return set_doc

	1045

	1046 # Alternative spelling of the TOKEN decorator

	1047 Token = TOKEN

	1048

OLD	NEW

« no previous file with comments | « tools/nixysa/third_party/ply-3.1/ply/ctokens.py ('k') | tools/nixysa/third_party/ply-3.1/ply/yacc.py » ('j') | no next file with comments »