third_party/google-endpoints/ply/lex.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/ply/lex.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # -----------------------------------------------------------------------------

	2 # ply: lex.py

	3 #

	4 # Copyright (C) 2001-2016

	5 # David M. Beazley (Dabeaz LLC)

	6 # All rights reserved.

	7 #

	8 # Redistribution and use in source and binary forms, with or without

	9 # modification, are permitted provided that the following conditions are

	10 # met:

	11 #

	12 # * Redistributions of source code must retain the above copyright notice,

	13 # this list of conditions and the following disclaimer.

	14 # * Redistributions in binary form must reproduce the above copyright notice,

	15 # this list of conditions and the following disclaimer in the documentation

	16 # and/or other materials provided with the distribution.

	17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to

	18 # endorse or promote products derived from this software without

	19 # specific prior written permission.

	20 #

	21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	32 # -----------------------------------------------------------------------------

	33

	34 __version__ = '3.9'

	35 __tabversion__ = '3.8'

	36

	37 import re

	38 import sys

	39 import types

	40 import copy

	41 import os

	42 import inspect

	43

	44 # This tuple contains known string types

	45 try:

	46 # Python 2.6

	47 StringTypes = (types.StringType, types.UnicodeType)

	48 except AttributeError:

	49 # Python 3.0

	50 StringTypes = (str, bytes)

	51

	52 # This regular expression is used to match valid token names

	53 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

	54

	55 # Exception thrown when invalid token encountered and no default error

	56 # handler is defined.

	57 class LexError(Exception):

	58 def __init__(self, message, s):

	59 self.args = (message,)

	60 self.text = s

	61

	62

	63 # Token class. This class is used to represent the tokens produced.

	64 class LexToken(object):

	65 def __str__(self):

	66 return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, se lf.lexpos)

	67

	68 def __repr__(self):

	69 return str(self)

	70

	71

	72 # This object is a stand-in for a logging object created by the

	73 # logging module.

	74

	75 class PlyLogger(object):

	76 def __init__(self, f):

	77 self.f = f

	78

	79 def critical(self, msg, args, *kwargs):

	80 self.f.write((msg % args) + '\n')

	81

	82 def warning(self, msg, args, *kwargs):

	83 self.f.write('WARNING: ' + (msg % args) + '\n')

	84

	85 def error(self, msg, args, *kwargs):

	86 self.f.write('ERROR: ' + (msg % args) + '\n')

	87

	88 info = critical

	89 debug = critical

	90

	91

	92 # Null logger is used when no output is generated. Does nothing.

	93 class NullLogger(object):

	94 def __getattribute__(self, name):

	95 return self

	96

	97 def __call__(self, args, *kwargs):

	98 return self

	99

	100

	101 # -----------------------------------------------------------------------------

	102 # === Lexing Engine ===

	103 #

	104 # The following Lexer class implements the lexer runtime. There are only

	105 # a few public methods and attributes:

	106 #

	107 # input() - Store a new string in the lexer

	108 # token() - Get the next token

	109 # clone() - Clone the lexer

	110 #

	111 # lineno - Current line number

	112 # lexpos - Current position in the input string

	113 # -----------------------------------------------------------------------------

	114

	115 class Lexer:

	116 def __init__(self):

	117 self.lexre = None # Master regular expression. This is a lis t of

	118 # tuples (re, findex) where re is a compil ed

	119 # regular expression and findex is a list

	120 # mapping regex group numbers to rules

	121 self.lexretext = None # Current regular expression strings

	122 self.lexstatere = {} # Dictionary mapping lexer states to maste r regexs

	123 self.lexstateretext = {} # Dictionary mapping lexer states to regex strings

	124 self.lexstaterenames = {} # Dictionary mapping lexer states to symbo l names

	125 self.lexstate = 'INITIAL' # Current lexer state

	126 self.lexstatestack = [] # Stack of lexer states

	127 self.lexstateinfo = None # State information

	128 self.lexstateignore = {} # Dictionary of ignored characters for eac h state

	129 self.lexstateerrorf = {} # Dictionary of error functions for each s tate

	130 self.lexstateeoff = {} # Dictionary of eof functions for each sta te

	131 self.lexreflags = 0 # Optional re compile flags

	132 self.lexdata = None # Actual input data (as a string)

	133 self.lexpos = 0 # Current position in input text

	134 self.lexlen = 0 # Length of the input text

	135 self.lexerrorf = None # Error rule (if any)

	136 self.lexeoff = None # EOF rule (if any)

	137 self.lextokens = None # List of valid tokens

	138 self.lexignore = '' # Ignored characters

	139 self.lexliterals = '' # Literal characters that can be passed th rough

	140 self.lexmodule = None # Module

	141 self.lineno = 1 # Current line number

	142 self.lexoptimize = False # Optimized mode

	143

	144 def clone(self, object=None):

	145 c = copy.copy(self)

	146

	147 # If the object parameter has been supplied, it means we are attaching t he

	148 # lexer to a new object. In this case, we have to rebind all methods in

	149 # the lexstatere and lexstateerrorf tables.

	150

	151 if object:

	152 newtab = {}

	153 for key, ritem in self.lexstatere.items():

	154 newre = []

	155 for cre, findex in ritem:

	156 newfindex = []

	157 for f in findex:

	158 if not f or not f[0]:

	159 newfindex.append(f)

	160 continue

	161 newfindex.append((getattr(object, f[0].__name__), f[1]))

	162 newre.append((cre, newfindex))

	163 newtab[key] = newre

	164 c.lexstatere = newtab

	165 c.lexstateerrorf = {}

	166 for key, ef in self.lexstateerrorf.items():

	167 c.lexstateerrorf[key] = getattr(object, ef.__name__)

	168 c.lexmodule = object

	169 return c

	170

	171 # ------------------------------------------------------------

	172 # writetab() - Write lexer information to a table file

	173 # ------------------------------------------------------------

	174 def writetab(self, lextab, outputdir=''):

	175 if isinstance(lextab, types.ModuleType):

	176 raise IOError("Won't overwrite existing lextab module")

	177 basetabmodule = lextab.split('.')[-1]

	178 filename = os.path.join(outputdir, basetabmodule) + '.py'

	179 with open(filename, 'w') as tf:

	180 tf.write('# %s.py. This file automatically created by PLY (version % s). Don\'t edit!\n' % (basetabmodule, __version__))

	181 tf.write('_tabversion = %s\n' % repr(__tabversion__))

	182 tf.write('_lextokens = set(%s)\n' % repr(tuple(self.lextokens)))

	183 tf.write('_lexreflags = %s\n' % repr(self.lexreflags))

	184 tf.write('_lexliterals = %s\n' % repr(self.lexliterals))

	185 tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo))

	186

	187 # Rewrite the lexstatere table, replacing function objects with func tion names

	188 tabre = {}

	189 for statename, lre in self.lexstatere.items():

	190 titem = []

	191 for (pat, func), retext, renames in zip(lre, self.lexstateretext [statename], self.lexstaterenames[statename]):

	192 titem.append((retext, _funcs_to_names(func, renames)))

	193 tabre[statename] = titem

	194

	195 tf.write('_lexstatere = %s\n' % repr(tabre))

	196 tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore))

	197

	198 taberr = {}

	199 for statename, ef in self.lexstateerrorf.items():

	200 taberr[statename] = ef.__name__ if ef else None

	201 tf.write('_lexstateerrorf = %s\n' % repr(taberr))

	202

	203 tabeof = {}

	204 for statename, ef in self.lexstateeoff.items():

	205 tabeof[statename] = ef.__name__ if ef else None

	206 tf.write('_lexstateeoff = %s\n' % repr(tabeof))

	207

	208 # ------------------------------------------------------------

	209 # readtab() - Read lexer information from a tab file

	210 # ------------------------------------------------------------

	211 def readtab(self, tabfile, fdict):

	212 if isinstance(tabfile, types.ModuleType):

	213 lextab = tabfile

	214 else:

	215 exec('import %s' % tabfile)

	216 lextab = sys.modules[tabfile]

	217

	218 if getattr(lextab, '_tabversion', '0.0') != __tabversion__:

	219 raise ImportError('Inconsistent PLY version')

	220

	221 self.lextokens = lextab._lextokens

	222 self.lexreflags = lextab._lexreflags

	223 self.lexliterals = lextab._lexliterals

	224 self.lextokens_all = self.lextokens \| set(self.lexliterals)

	225 self.lexstateinfo = lextab._lexstateinfo

	226 self.lexstateignore = lextab._lexstateignore

	227 self.lexstatere = {}

	228 self.lexstateretext = {}

	229 for statename, lre in lextab._lexstatere.items():

	230 titem = []

	231 txtitem = []

	232 for pat, func_name in lre:

	233 titem.append((re.compile(pat, lextab._lexreflags \| re.VERBOSE), _names_to_funcs(func_name, fdict)))

	234

	235 self.lexstatere[statename] = titem

	236 self.lexstateretext[statename] = txtitem

	237

	238 self.lexstateerrorf = {}

	239 for statename, ef in lextab._lexstateerrorf.items():

	240 self.lexstateerrorf[statename] = fdict[ef]

	241

	242 self.lexstateeoff = {}

	243 for statename, ef in lextab._lexstateeoff.items():

	244 self.lexstateeoff[statename] = fdict[ef]

	245

	246 self.begin('INITIAL')

	247

	248 # ------------------------------------------------------------

	249 # input() - Push a new string into the lexer

	250 # ------------------------------------------------------------

	251 def input(self, s):

	252 # Pull off the first character to see if s looks like a string

	253 c = s[:1]

	254 if not isinstance(c, StringTypes):

	255 raise ValueError('Expected a string')

	256 self.lexdata = s

	257 self.lexpos = 0

	258 self.lexlen = len(s)

	259

	260 # ------------------------------------------------------------

	261 # begin() - Changes the lexing state

	262 # ------------------------------------------------------------

	263 def begin(self, state):

	264 if state not in self.lexstatere:

	265 raise ValueError('Undefined state')

	266 self.lexre = self.lexstatere[state]

	267 self.lexretext = self.lexstateretext[state]

	268 self.lexignore = self.lexstateignore.get(state, '')

	269 self.lexerrorf = self.lexstateerrorf.get(state, None)

	270 self.lexeoff = self.lexstateeoff.get(state, None)

	271 self.lexstate = state

	272

	273 # ------------------------------------------------------------

	274 # push_state() - Changes the lexing state and saves old on stack

	275 # ------------------------------------------------------------

	276 def push_state(self, state):

	277 self.lexstatestack.append(self.lexstate)

	278 self.begin(state)

	279

	280 # ------------------------------------------------------------

	281 # pop_state() - Restores the previous state

	282 # ------------------------------------------------------------

	283 def pop_state(self):

	284 self.begin(self.lexstatestack.pop())

	285

	286 # ------------------------------------------------------------

	287 # current_state() - Returns the current lexing state

	288 # ------------------------------------------------------------

	289 def current_state(self):

	290 return self.lexstate

	291

	292 # ------------------------------------------------------------

	293 # skip() - Skip ahead n characters

	294 # ------------------------------------------------------------

	295 def skip(self, n):

	296 self.lexpos += n

	297

	298 # ------------------------------------------------------------

	299 # opttoken() - Return the next token from the Lexer

	300 #

	301 # Note: This function has been carefully implemented to be as fast

	302 # as possible. Don't make changes unless you really know what

	303 # you are doing

	304 # ------------------------------------------------------------

	305 def token(self):

	306 # Make local copies of frequently referenced attributes

	307 lexpos = self.lexpos

	308 lexlen = self.lexlen

	309 lexignore = self.lexignore

	310 lexdata = self.lexdata

	311

	312 while lexpos < lexlen:

	313 # This code provides some short-circuit code for whitespace, tabs, a nd other ignored characters

	314 if lexdata[lexpos] in lexignore:

	315 lexpos += 1

	316 continue

	317

	318 # Look for a regular expression match

	319 for lexre, lexindexfunc in self.lexre:

	320 m = lexre.match(lexdata, lexpos)

	321 if not m:

	322 continue

	323

	324 # Create a token for return

	325 tok = LexToken()

	326 tok.value = m.group()

	327 tok.lineno = self.lineno

	328 tok.lexpos = lexpos

	329

	330 i = m.lastindex

	331 func, tok.type = lexindexfunc[i]

	332

	333 if not func:

	334 # If no token type was set, it's an ignored token

	335 if tok.type:

	336 self.lexpos = m.end()

	337 return tok

	338 else:

	339 lexpos = m.end()

	340 break

	341

	342 lexpos = m.end()

	343

	344 # If token is processed by a function, call it

	345

	346 tok.lexer = self # Set additional attributes useful in toke n rules

	347 self.lexmatch = m

	348 self.lexpos = lexpos

	349

	350 newtok = func(tok)

	351

	352 # Every function must return a token, if nothing, we just move t o next token

	353 if not newtok:

	354 lexpos = self.lexpos # This is here in case user has updated lexpos.

	355 lexignore = self.lexignore # This is here in case there was a state change

	356 break

	357

	358 # Verify type of the token. If not in the token map, raise an e rror

	359 if not self.lexoptimize:

	360 if newtok.type not in self.lextokens_all:

	361 raise LexError("%s:%d: Rule '%s' returned an unknown tok en type '%s'" % (

	362 func.__code__.co_filename, func.__code__.co_firstlin eno,

	363 func.__name__, newtok.type), lexdata[lexpos:])

	364

	365 return newtok

	366 else:

	367 # No match, see if in literals

	368 if lexdata[lexpos] in self.lexliterals:

	369 tok = LexToken()

	370 tok.value = lexdata[lexpos]

	371 tok.lineno = self.lineno

	372 tok.type = tok.value

	373 tok.lexpos = lexpos

	374 self.lexpos = lexpos + 1

	375 return tok

	376

	377 # No match. Call t_error() if defined.

	378 if self.lexerrorf:

	379 tok = LexToken()

	380 tok.value = self.lexdata[lexpos:]

	381 tok.lineno = self.lineno

	382 tok.type = 'error'

	383 tok.lexer = self

	384 tok.lexpos = lexpos

	385 self.lexpos = lexpos

	386 newtok = self.lexerrorf(tok)

	387 if lexpos == self.lexpos:

	388 # Error method didn't change text position at all. This is an error.

	389 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])

	390 lexpos = self.lexpos

	391 if not newtok:

	392 continue

	393 return newtok

	394

	395 self.lexpos = lexpos

	396 raise LexError("Illegal character '%s' at index %d" % (lexdata[l expos], lexpos), lexdata[lexpos:])

	397

	398 if self.lexeoff:

	399 tok = LexToken()

	400 tok.type = 'eof'

	401 tok.value = ''

	402 tok.lineno = self.lineno

	403 tok.lexpos = lexpos

	404 tok.lexer = self

	405 self.lexpos = lexpos

	406 newtok = self.lexeoff(tok)

	407 return newtok

	408

	409 self.lexpos = lexpos + 1

	410 if self.lexdata is None:

	411 raise RuntimeError('No input string given with input()')

	412 return None

	413

	414 # Iterator interface

	415 def __iter__(self):

	416 return self

	417

	418 def next(self):

	419 t = self.token()

	420 if t is None:

	421 raise StopIteration

	422 return t

	423

	424 __next__ = next

	425

	426 # -----------------------------------------------------------------------------

	427 # ==== Lex Builder ===

	428 #

	429 # The functions and classes below are used to collect lexing information

	430 # and build a Lexer object from it.

	431 # -----------------------------------------------------------------------------

	432

	433 # -----------------------------------------------------------------------------

	434 # _get_regex(func)

	435 #

	436 # Returns the regular expression assigned to a function either as a doc string

	437 # or as a .regex attribute attached by the @TOKEN decorator.

	438 # -----------------------------------------------------------------------------

	439 def _get_regex(func):

	440 return getattr(func, 'regex', func.__doc__)

	441

	442 # -----------------------------------------------------------------------------

	443 # get_caller_module_dict()

	444 #

	445 # This function returns a dictionary containing all of the symbols defined withi n

	446 # a caller further down the call stack. This is used to get the environment

	447 # associated with the yacc() call if none was provided.

	448 # -----------------------------------------------------------------------------

	449 def get_caller_module_dict(levels):

	450 f = sys._getframe(levels)

	451 ldict = f.f_globals.copy()

	452 if f.f_globals != f.f_locals:

	453 ldict.update(f.f_locals)

	454 return ldict

	455

	456 # -----------------------------------------------------------------------------

	457 # _funcs_to_names()

	458 #

	459 # Given a list of regular expression functions, this converts it to a list

	460 # suitable for output to a table file

	461 # -----------------------------------------------------------------------------

	462 def _funcs_to_names(funclist, namelist):

	463 result = []

	464 for f, name in zip(funclist, namelist):

	465 if f and f[0]:

	466 result.append((name, f[1]))

	467 else:

	468 result.append(f)

	469 return result

	470

	471 # -----------------------------------------------------------------------------

	472 # _names_to_funcs()

	473 #

	474 # Given a list of regular expression function names, this converts it back to

	475 # functions.

	476 # -----------------------------------------------------------------------------

	477 def _names_to_funcs(namelist, fdict):

	478 result = []

	479 for n in namelist:

	480 if n and n[0]:

	481 result.append((fdict[n[0]], n[1]))

	482 else:

	483 result.append(n)

	484 return result

	485

	486 # -----------------------------------------------------------------------------

	487 # _form_master_re()

	488 #

	489 # This function takes a list of all of the regex components and attempts to

	490 # form the master regular expression. Given limitations in the Python re

	491 # module, it may be necessary to break the master regex into separate expression s.

	492 # -----------------------------------------------------------------------------

	493 def _form_master_re(relist, reflags, ldict, toknames):

	494 if not relist:

	495 return []

	496 regex = '\|'.join(relist)

	497 try:

	498 lexre = re.compile(regex, re.VERBOSE \| reflags)

	499

	500 # Build the index to function map for the matching engine

	501 lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)

	502 lexindexnames = lexindexfunc[:]

	503

	504 for f, i in lexre.groupindex.items():

	505 handle = ldict.get(f, None)

	506 if type(handle) in (types.FunctionType, types.MethodType):

	507 lexindexfunc[i] = (handle, toknames[f])

	508 lexindexnames[i] = f

	509 elif handle is not None:

	510 lexindexnames[i] = f

	511 if f.find('ignore_') > 0:

	512 lexindexfunc[i] = (None, None)

	513 else:

	514 lexindexfunc[i] = (None, toknames[f])

	515

	516 return [(lexre, lexindexfunc)], [regex], [lexindexnames]

	517 except Exception:

	518 m = int(len(relist)/2)

	519 if m == 0:

	520 m = 1

	521 llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, tokname s)

	522 rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, tokname s)

	523 return (llist+rlist), (lre+rre), (lnames+rnames)

	524

	525 # -----------------------------------------------------------------------------

	526 # def _statetoken(s,names)

	527 #

	528 # Given a declaration name s of the form "t_" and a dictionary whose keys are

	529 # state names, this function returns a tuple (states,tokenname) where states

	530 # is a tuple of state names and tokenname is the name of the token. For example ,

	531 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')

	532 # -----------------------------------------------------------------------------

	533 def _statetoken(s, names):

	534 nonstate = 1

	535 parts = s.split('_')

	536 for i, part in enumerate(parts[1:], 1):

	537 if part not in names and part != 'ANY':

	538 break

	539

	540 if i > 1:

	541 states = tuple(parts[1:i])

	542 else:

	543 states = ('INITIAL',)

	544

	545 if 'ANY' in states:

	546 states = tuple(names)

	547

	548 tokenname = '_'.join(parts[i:])

	549 return (states, tokenname)

	550

	551

	552 # -----------------------------------------------------------------------------

	553 # LexerReflect()

	554 #

	555 # This class represents information needed to build a lexer as extracted from a

	556 # user's input file.

	557 # -----------------------------------------------------------------------------

	558 class LexerReflect(object):

	559 def __init__(self, ldict, log=None, reflags=0):

	560 self.ldict = ldict

	561 self.error_func = None

	562 self.tokens = []

	563 self.reflags = reflags

	564 self.stateinfo = {'INITIAL': 'inclusive'}

	565 self.modules = set()

	566 self.error = False

	567 self.log = PlyLogger(sys.stderr) if log is None else log

	568

	569 # Get all of the basic information

	570 def get_all(self):

	571 self.get_tokens()

	572 self.get_literals()

	573 self.get_states()

	574 self.get_rules()

	575

	576 # Validate all of the information

	577 def validate_all(self):

	578 self.validate_tokens()

	579 self.validate_literals()

	580 self.validate_rules()

	581 return self.error

	582

	583 # Get the tokens map

	584 def get_tokens(self):

	585 tokens = self.ldict.get('tokens', None)

	586 if not tokens:

	587 self.log.error('No token list is defined')

	588 self.error = True

	589 return

	590

	591 if not isinstance(tokens, (list, tuple)):

	592 self.log.error('tokens must be a list or tuple')

	593 self.error = True

	594 return

	595

	596 if not tokens:

	597 self.log.error('tokens is empty')

	598 self.error = True

	599 return

	600

	601 self.tokens = tokens

	602

	603 # Validate the tokens

	604 def validate_tokens(self):

	605 terminals = {}

	606 for n in self.tokens:

	607 if not _is_identifier.match(n):

	608 self.log.error("Bad token name '%s'", n)

	609 self.error = True

	610 if n in terminals:

	611 self.log.warning("Token '%s' multiply defined", n)

	612 terminals[n] = 1

	613

	614 # Get the literals specifier

	615 def get_literals(self):

	616 self.literals = self.ldict.get('literals', '')

	617 if not self.literals:

	618 self.literals = ''

	619

	620 # Validate literals

	621 def validate_literals(self):

	622 try:

	623 for c in self.literals:

	624 if not isinstance(c, StringTypes) or len(c) > 1:

	625 self.log.error('Invalid literal %s. Must be a single charact er', repr(c))

	626 self.error = True

	627

	628 except TypeError:

	629 self.log.error('Invalid literals specification. literals must be a s equence of characters')

	630 self.error = True

	631

	632 def get_states(self):

	633 self.states = self.ldict.get('states', None)

	634 # Build statemap

	635 if self.states:

	636 if not isinstance(self.states, (tuple, list)):

	637 self.log.error('states must be defined as a tuple or list')

	638 self.error = True

	639 else:

	640 for s in self.states:

	641 if not isinstance(s, tuple) or len(s) != 2:

	642 self.log.error("Invalid state specifier %s. Must be a tu ple (statename,'exclusive\|inclusive')", repr(s))

	643 self.error = True

	644 continue

	645 name, statetype = s

	646 if not isinstance(name, StringTypes):

	647 self.log.error('State name %s must be a string', repr(na me))

	648 self.error = True

	649 continue

	650 if not (statetype == 'inclusive' or statetype == 'exclusive' ):

	651 self.log.error("State type for state %s must be 'inclusi ve' or 'exclusive'", name)

	652 self.error = True

	653 continue

	654 if name in self.stateinfo:

	655 self.log.error("State '%s' already defined", name)

	656 self.error = True

	657 continue

	658 self.stateinfo[name] = statetype

	659

	660 # Get all of the symbols with a t_ prefix and sort them into various

	661 # categories (functions, strings, error functions, and ignore characters)

	662

	663 def get_rules(self):

	664 tsymbols = [f for f in self.ldict if f[:2] == 't_']

	665

	666 # Now build up a list of functions and a list of strings

	667 self.toknames = {} # Mapping of symbols to token names

	668 self.funcsym = {} # Symbols defined as functions

	669 self.strsym = {} # Symbols defined as strings

	670 self.ignore = {} # Ignore strings by state

	671 self.errorf = {} # Error functions by state

	672 self.eoff = {} # EOF functions by state

	673

	674 for s in self.stateinfo:

	675 self.funcsym[s] = []

	676 self.strsym[s] = []

	677

	678 if len(tsymbols) == 0:

	679 self.log.error('No rules of the form t_rulename are defined')

	680 self.error = True

	681 return

	682

	683 for f in tsymbols:

	684 t = self.ldict[f]

	685 states, tokname = _statetoken(f, self.stateinfo)

	686 self.toknames[f] = tokname

	687

	688 if hasattr(t, '__call__'):

	689 if tokname == 'error':

	690 for s in states:

	691 self.errorf[s] = t

	692 elif tokname == 'eof':

	693 for s in states:

	694 self.eoff[s] = t

	695 elif tokname == 'ignore':

	696 line = t.__code__.co_firstlineno

	697 file = t.__code__.co_filename

	698 self.log.error("%s:%d: Rule '%s' must be defined as a string ", file, line, t.__name__)

	699 self.error = True

	700 else:

	701 for s in states:

	702 self.funcsym[s].append((f, t))

	703 elif isinstance(t, StringTypes):

	704 if tokname == 'ignore':

	705 for s in states:

	706 self.ignore[s] = t

	707 if '\\' in t:

	708 self.log.warning("%s contains a literal backslash '\\'", f)

	709

	710 elif tokname == 'error':

	711 self.log.error("Rule '%s' must be defined as a function", f)

	712 self.error = True

	713 else:

	714 for s in states:

	715 self.strsym[s].append((f, t))

	716 else:

	717 self.log.error('%s not defined as a function or string', f)

	718 self.error = True

	719

	720 # Sort the functions by line number

	721 for f in self.funcsym.values():

	722 f.sort(key=lambda x: x[1].__code__.co_firstlineno)

	723

	724 # Sort the strings by regular expression length

	725 for s in self.strsym.values():

	726 s.sort(key=lambda x: len(x[1]), reverse=True)

	727

	728 # Validate all of the t_rules collected

	729 def validate_rules(self):

	730 for state in self.stateinfo:

	731 # Validate all rules defined by functions

	732

	733 for fname, f in self.funcsym[state]:

	734 line = f.__code__.co_firstlineno

	735 file = f.__code__.co_filename

	736 module = inspect.getmodule(f)

	737 self.modules.add(module)

	738

	739 tokname = self.toknames[fname]

	740 if isinstance(f, types.MethodType):

	741 reqargs = 2

	742 else:

	743 reqargs = 1

	744 nargs = f.__code__.co_argcount

	745 if nargs > reqargs:

	746 self.log.error("%s:%d: Rule '%s' has too many arguments", fi le, line, f.__name__)

	747 self.error = True

	748 continue

	749

	750 if nargs < reqargs:

	751 self.log.error("%s:%d: Rule '%s' requires an argument", file , line, f.__name__)

	752 self.error = True

	753 continue

	754

	755 if not _get_regex(f):

	756 self.log.error("%s:%d: No regular expression defined for rul e '%s'", file, line, f.__name__)

	757 self.error = True

	758 continue

	759

	760 try:

	761 c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), re.VER BOSE \| self.reflags)

	762 if c.match(''):

	763 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__)

	764 self.error = True

	765 except re.error as e:

	766 self.log.error("%s:%d: Invalid regular expression for rule ' %s'. %s", file, line, f.__name__, e)

	767 if '#' in _get_regex(f):

	768 self.log.error("%s:%d. Make sure '#' in rule '%s' is esc aped with '\\#'", file, line, f.__name__)

	769 self.error = True

	770

	771 # Validate all rules defined by strings

	772 for name, r in self.strsym[state]:

	773 tokname = self.toknames[name]

	774 if tokname == 'error':

	775 self.log.error("Rule '%s' must be defined as a function", na me)

	776 self.error = True

	777 continue

	778

	779 if tokname not in self.tokens and tokname.find('ignore_') < 0:

	780 self.log.error("Rule '%s' defined for an unspecified token % s", name, tokname)

	781 self.error = True

	782 continue

	783

	784 try:

	785 c = re.compile('(?P<%s>%s)' % (name, r), re.VERBOSE \| self.r eflags)

	786 if (c.match('')):

	787 self.log.error("Regular expression for rule '%s' matches empty string", name)

	788 self.error = True

	789 except re.error as e:

	790 self.log.error("Invalid regular expression for rule '%s'. %s ", name, e)

	791 if '#' in r:

	792 self.log.error("Make sure '#' in rule '%s' is escaped wi th '\\#'", name)

	793 self.error = True

	794

	795 if not self.funcsym[state] and not self.strsym[state]:

	796 self.log.error("No rules defined for state '%s'", state)

	797 self.error = True

	798

	799 # Validate the error function

	800 efunc = self.errorf.get(state, None)

	801 if efunc:

	802 f = efunc

	803 line = f.__code__.co_firstlineno

	804 file = f.__code__.co_filename

	805 module = inspect.getmodule(f)

	806 self.modules.add(module)

	807

	808 if isinstance(f, types.MethodType):

	809 reqargs = 2

	810 else:

	811 reqargs = 1

	812 nargs = f.__code__.co_argcount

	813 if nargs > reqargs:

	814 self.log.error("%s:%d: Rule '%s' has too many arguments", fi le, line, f.__name__)

	815 self.error = True

	816

	817 if nargs < reqargs:

	818 self.log.error("%s:%d: Rule '%s' requires an argument", file , line, f.__name__)

	819 self.error = True

	820

	821 for module in self.modules:

	822 self.validate_module(module)

	823

	824 # -------------------------------------------------------------------------- ---

	825 # validate_module()

	826 #

	827 # This checks to see if there are duplicated t_rulename() functions or strin gs

	828 # in the parser input file. This is done using a simple regular expression

	829 # match on each line in the source code of the given module.

	830 # -------------------------------------------------------------------------- ---

	831

	832 def validate_module(self, module):

	833 try:

	834 lines, linen = inspect.getsourcelines(module)

	835 except IOError:

	836 return

	837

	838 fre = re.compile(r'\sdef\s+(t_[a-zA-Z_0-9])\(')

	839 sre = re.compile(r'\s(t_[a-zA-Z_0-9])\s*=')

	840

	841 counthash = {}

	842 linen += 1

	843 for line in lines:

	844 m = fre.match(line)

	845 if not m:

	846 m = sre.match(line)

	847 if m:

	848 name = m.group(1)

	849 prev = counthash.get(name)

	850 if not prev:

	851 counthash[name] = linen

	852 else:

	853 filename = inspect.getsourcefile(module)

	854 self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev)

	855 self.error = True

	856 linen += 1

	857

	858 # -----------------------------------------------------------------------------

	859 # lex(module)

	860 #

	861 # Build all of the regular expression rules from definitions in the supplied mod ule

	862 # -----------------------------------------------------------------------------

	863 def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab',

	864 reflags=0, nowarn=False, outputdir=None, debuglog=None, errorlog=None):

	865

	866 if lextab is None:

	867 lextab = 'lextab'

	868

	869 global lexer

	870

	871 ldict = None

	872 stateinfo = {'INITIAL': 'inclusive'}

	873 lexobj = Lexer()

	874 lexobj.lexoptimize = optimize

	875 global token, input

	876

	877 if errorlog is None:

	878 errorlog = PlyLogger(sys.stderr)

	879

	880 if debug:

	881 if debuglog is None:

	882 debuglog = PlyLogger(sys.stderr)

	883

	884 # Get the module dictionary used for the lexer

	885 if object:

	886 module = object

	887

	888 # Get the module dictionary used for the parser

	889 if module:

	890 _items = [(k, getattr(module, k)) for k in dir(module)]

	891 ldict = dict(_items)

	892 # If no __file__ attribute is available, try to obtain it from the __mod ule__ instead

	893 if '__file__' not in ldict:

	894 ldict['__file__'] = sys.modules[ldict['__module__']].__file__

	895 else:

	896 ldict = get_caller_module_dict(2)

	897

	898 # Determine if the module is package of a package or not.

	899 # If so, fix the tabmodule setting so that tables load correctly

	900 pkg = ldict.get('__package__')

	901 if pkg and isinstance(lextab, str):

	902 if '.' not in lextab:

	903 lextab = pkg + '.' + lextab

	904

	905 # Collect parser information from the dictionary

	906 linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)

	907 linfo.get_all()

	908 if not optimize:

	909 if linfo.validate_all():

	910 raise SyntaxError("Can't build lexer")

	911

	912 if optimize and lextab:

	913 try:

	914 lexobj.readtab(lextab, ldict)

	915 token = lexobj.token

	916 input = lexobj.input

	917 lexer = lexobj

	918 return lexobj

	919

	920 except ImportError:

	921 pass

	922

	923 # Dump some basic debugging information

	924 if debug:

	925 debuglog.info('lex: tokens = %r', linfo.tokens)

	926 debuglog.info('lex: literals = %r', linfo.literals)

	927 debuglog.info('lex: states = %r', linfo.stateinfo)

	928

	929 # Build a dictionary of valid token names

	930 lexobj.lextokens = set()

	931 for n in linfo.tokens:

	932 lexobj.lextokens.add(n)

	933

	934 # Get literals specification

	935 if isinstance(linfo.literals, (list, tuple)):

	936 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)

	937 else:

	938 lexobj.lexliterals = linfo.literals

	939

	940 lexobj.lextokens_all = lexobj.lextokens \| set(lexobj.lexliterals)

	941

	942 # Get the stateinfo dictionary

	943 stateinfo = linfo.stateinfo

	944

	945 regexs = {}

	946 # Build the master regular expressions

	947 for state in stateinfo:

	948 regex_list = []

	949

	950 # Add rules defined by functions first

	951 for fname, f in linfo.funcsym[state]:

	952 line = f.__code__.co_firstlineno

	953 file = f.__code__.co_filename

	954 regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f)))

	955 if debug:

	956 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)

	957

	958 # Now add all of the simple rules

	959 for name, r in linfo.strsym[state]:

	960 regex_list.append('(?P<%s>%s)' % (name, r))

	961 if debug:

	962 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)

	963

	964 regexs[state] = regex_list

	965

	966 # Build the master regular expressions

	967

	968 if debug:

	969 debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')

	970

	971 for state in regexs:

	972 lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict , linfo.toknames)

	973 lexobj.lexstatere[state] = lexre

	974 lexobj.lexstateretext[state] = re_text

	975 lexobj.lexstaterenames[state] = re_names

	976 if debug:

	977 for i, text in enumerate(re_text):

	978 debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, te xt)

	979

	980 # For inclusive states, we need to add the regular expressions from the INIT IAL state

	981 for state, stype in stateinfo.items():

	982 if state != 'INITIAL' and stype == 'inclusive':

	983 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])

	984 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'] )

	985 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL '])

	986

	987 lexobj.lexstateinfo = stateinfo

	988 lexobj.lexre = lexobj.lexstatere['INITIAL']

	989 lexobj.lexretext = lexobj.lexstateretext['INITIAL']

	990 lexobj.lexreflags = reflags

	991

	992 # Set up ignore variables

	993 lexobj.lexstateignore = linfo.ignore

	994 lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')

	995

	996 # Set up error functions

	997 lexobj.lexstateerrorf = linfo.errorf

	998 lexobj.lexerrorf = linfo.errorf.get('INITIAL', None)

	999 if not lexobj.lexerrorf:

	1000 errorlog.warning('No t_error rule is defined')

	1001

	1002 # Set up eof functions

	1003 lexobj.lexstateeoff = linfo.eoff

	1004 lexobj.lexeoff = linfo.eoff.get('INITIAL', None)

	1005

	1006 # Check state information for ignore and error rules

	1007 for s, stype in stateinfo.items():

	1008 if stype == 'exclusive':

	1009 if s not in linfo.errorf:

	1010 errorlog.warning("No error rule is defined for exclusive state ' %s'", s)

	1011 if s not in linfo.ignore and lexobj.lexignore:

	1012 errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)

	1013 elif stype == 'inclusive':

	1014 if s not in linfo.errorf:

	1015 linfo.errorf[s] = linfo.errorf.get('INITIAL', None)

	1016 if s not in linfo.ignore:

	1017 linfo.ignore[s] = linfo.ignore.get('INITIAL', '')

	1018

	1019 # Create global versions of the token() and input() functions

	1020 token = lexobj.token

	1021 input = lexobj.input

	1022 lexer = lexobj

	1023

	1024 # If in optimize mode, we write the lextab

	1025 if lextab and optimize:

	1026 if outputdir is None:

	1027 # If no output directory is set, the location of the output files

	1028 # is determined according to the following rules:

	1029 # - If lextab specifies a package, files go into that package di rectory

	1030 # - Otherwise, files go in the same directory as the specifying module

	1031 if isinstance(lextab, types.ModuleType):

	1032 srcfile = lextab.__file__

	1033 else:

	1034 if '.' not in lextab:

	1035 srcfile = ldict['__file__']

	1036 else:

	1037 parts = lextab.split('.')

	1038 pkgname = '.'.join(parts[:-1])

	1039 exec('import %s' % pkgname)

	1040 srcfile = getattr(sys.modules[pkgname], '__file__', '')

	1041 outputdir = os.path.dirname(srcfile)

	1042 try:

	1043 lexobj.writetab(lextab, outputdir)

	1044 except IOError as e:

	1045 errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e) )

	1046

	1047 return lexobj

	1048

	1049 # -----------------------------------------------------------------------------

	1050 # runmain()

	1051 #

	1052 # This runs the lexer as a main program

	1053 # -----------------------------------------------------------------------------

	1054

	1055 def runmain(lexer=None, data=None):

	1056 if not data:

	1057 try:

	1058 filename = sys.argv[1]

	1059 f = open(filename)

	1060 data = f.read()

	1061 f.close()

	1062 except IndexError:

	1063 sys.stdout.write('Reading from standard input (type EOF to end):\n')

	1064 data = sys.stdin.read()

	1065

	1066 if lexer:

	1067 _input = lexer.input

	1068 else:

	1069 _input = input

	1070 _input(data)

	1071 if lexer:

	1072 _token = lexer.token

	1073 else:

	1074 _token = token

	1075

	1076 while True:

	1077 tok = _token()

	1078 if not tok:

	1079 break

	1080 sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, t ok.lexpos))

	1081

	1082 # -----------------------------------------------------------------------------

	1083 # @TOKEN(regex)

	1084 #

	1085 # This decorator function can be used to set the regex expression on a function

	1086 # when its docstring might need to be set in an alternative way

	1087 # -----------------------------------------------------------------------------

	1088

	1089 def TOKEN(r):

	1090 def set_regex(f):

	1091 if hasattr(r, '__call__'):

	1092 f.regex = _get_regex(r)

	1093 else:

	1094 f.regex = r

	1095 return f

	1096 return set_regex

	1097

	1098 # Alternative spelling of the TOKEN decorator

	1099 Token = TOKEN

	1100

OLD	NEW

« no previous file with comments | « third_party/google-endpoints/ply/ctokens.py ('k') | third_party/google-endpoints/ply/yacc.py » ('j') | no next file with comments »