Index: third_party/google-endpoints/ply/lex.py |
diff --git a/third_party/google-endpoints/ply/lex.py b/third_party/google-endpoints/ply/lex.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..0f3e464a994675dc3c0295404374121760bd4ac8 |
--- /dev/null |
+++ b/third_party/google-endpoints/ply/lex.py |
@@ -0,0 +1,1100 @@ |
+# ----------------------------------------------------------------------------- |
+# ply: lex.py |
+# |
+# Copyright (C) 2001-2016 |
+# David M. Beazley (Dabeaz LLC) |
+# All rights reserved. |
+# |
+# Redistribution and use in source and binary forms, with or without |
+# modification, are permitted provided that the following conditions are |
+# met: |
+# |
+# * Redistributions of source code must retain the above copyright notice, |
+# this list of conditions and the following disclaimer. |
+# * Redistributions in binary form must reproduce the above copyright notice, |
+# this list of conditions and the following disclaimer in the documentation |
+# and/or other materials provided with the distribution. |
+# * Neither the name of the David Beazley or Dabeaz LLC may be used to |
+# endorse or promote products derived from this software without |
+# specific prior written permission. |
+# |
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+# ----------------------------------------------------------------------------- |
+ |
+__version__ = '3.9' |
+__tabversion__ = '3.8' |
+ |
+import re |
+import sys |
+import types |
+import copy |
+import os |
+import inspect |
+ |
+# This tuple contains known string types |
+try: |
+ # Python 2.6 |
+ StringTypes = (types.StringType, types.UnicodeType) |
+except AttributeError: |
+ # Python 3.0 |
+ StringTypes = (str, bytes) |
+ |
+# This regular expression is used to match valid token names |
+_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') |
+ |
+# Exception thrown when invalid token encountered and no default error |
+# handler is defined. |
+class LexError(Exception): |
+ def __init__(self, message, s): |
+ self.args = (message,) |
+ self.text = s |
+ |
+ |
+# Token class. This class is used to represent the tokens produced. |
+class LexToken(object): |
+ def __str__(self): |
+ return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) |
+ |
+ def __repr__(self): |
+ return str(self) |
+ |
+ |
+# This object is a stand-in for a logging object created by the |
+# logging module. |
+ |
+class PlyLogger(object): |
+ def __init__(self, f): |
+ self.f = f |
+ |
+ def critical(self, msg, *args, **kwargs): |
+ self.f.write((msg % args) + '\n') |
+ |
+ def warning(self, msg, *args, **kwargs): |
+ self.f.write('WARNING: ' + (msg % args) + '\n') |
+ |
+ def error(self, msg, *args, **kwargs): |
+ self.f.write('ERROR: ' + (msg % args) + '\n') |
+ |
+ info = critical |
+ debug = critical |
+ |
+ |
+# Null logger is used when no output is generated. Does nothing. |
+class NullLogger(object): |
+ def __getattribute__(self, name): |
+ return self |
+ |
+ def __call__(self, *args, **kwargs): |
+ return self |
+ |
+ |
+# ----------------------------------------------------------------------------- |
+# === Lexing Engine === |
+# |
+# The following Lexer class implements the lexer runtime. There are only |
+# a few public methods and attributes: |
+# |
+# input() - Store a new string in the lexer |
+# token() - Get the next token |
+# clone() - Clone the lexer |
+# |
+# lineno - Current line number |
+# lexpos - Current position in the input string |
+# ----------------------------------------------------------------------------- |
+ |
+class Lexer: |
+ def __init__(self): |
+ self.lexre = None # Master regular expression. This is a list of |
+ # tuples (re, findex) where re is a compiled |
+ # regular expression and findex is a list |
+ # mapping regex group numbers to rules |
+ self.lexretext = None # Current regular expression strings |
+ self.lexstatere = {} # Dictionary mapping lexer states to master regexs |
+ self.lexstateretext = {} # Dictionary mapping lexer states to regex strings |
+ self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names |
+ self.lexstate = 'INITIAL' # Current lexer state |
+ self.lexstatestack = [] # Stack of lexer states |
+ self.lexstateinfo = None # State information |
+ self.lexstateignore = {} # Dictionary of ignored characters for each state |
+ self.lexstateerrorf = {} # Dictionary of error functions for each state |
+ self.lexstateeoff = {} # Dictionary of eof functions for each state |
+ self.lexreflags = 0 # Optional re compile flags |
+ self.lexdata = None # Actual input data (as a string) |
+ self.lexpos = 0 # Current position in input text |
+ self.lexlen = 0 # Length of the input text |
+ self.lexerrorf = None # Error rule (if any) |
+ self.lexeoff = None # EOF rule (if any) |
+ self.lextokens = None # List of valid tokens |
+ self.lexignore = '' # Ignored characters |
+ self.lexliterals = '' # Literal characters that can be passed through |
+ self.lexmodule = None # Module |
+ self.lineno = 1 # Current line number |
+ self.lexoptimize = False # Optimized mode |
+ |
+ def clone(self, object=None): |
+ c = copy.copy(self) |
+ |
+ # If the object parameter has been supplied, it means we are attaching the |
+ # lexer to a new object. In this case, we have to rebind all methods in |
+ # the lexstatere and lexstateerrorf tables. |
+ |
+ if object: |
+ newtab = {} |
+ for key, ritem in self.lexstatere.items(): |
+ newre = [] |
+ for cre, findex in ritem: |
+ newfindex = [] |
+ for f in findex: |
+ if not f or not f[0]: |
+ newfindex.append(f) |
+ continue |
+ newfindex.append((getattr(object, f[0].__name__), f[1])) |
+ newre.append((cre, newfindex)) |
+ newtab[key] = newre |
+ c.lexstatere = newtab |
+ c.lexstateerrorf = {} |
+ for key, ef in self.lexstateerrorf.items(): |
+ c.lexstateerrorf[key] = getattr(object, ef.__name__) |
+ c.lexmodule = object |
+ return c |
+ |
+ # ------------------------------------------------------------ |
+ # writetab() - Write lexer information to a table file |
+ # ------------------------------------------------------------ |
+ def writetab(self, lextab, outputdir=''): |
+ if isinstance(lextab, types.ModuleType): |
+ raise IOError("Won't overwrite existing lextab module") |
+ basetabmodule = lextab.split('.')[-1] |
+ filename = os.path.join(outputdir, basetabmodule) + '.py' |
+ with open(filename, 'w') as tf: |
+ tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) |
+ tf.write('_tabversion = %s\n' % repr(__tabversion__)) |
+ tf.write('_lextokens = set(%s)\n' % repr(tuple(self.lextokens))) |
+ tf.write('_lexreflags = %s\n' % repr(self.lexreflags)) |
+ tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) |
+ tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) |
+ |
+ # Rewrite the lexstatere table, replacing function objects with function names |
+ tabre = {} |
+ for statename, lre in self.lexstatere.items(): |
+ titem = [] |
+ for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): |
+ titem.append((retext, _funcs_to_names(func, renames))) |
+ tabre[statename] = titem |
+ |
+ tf.write('_lexstatere = %s\n' % repr(tabre)) |
+ tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) |
+ |
+ taberr = {} |
+ for statename, ef in self.lexstateerrorf.items(): |
+ taberr[statename] = ef.__name__ if ef else None |
+ tf.write('_lexstateerrorf = %s\n' % repr(taberr)) |
+ |
+ tabeof = {} |
+ for statename, ef in self.lexstateeoff.items(): |
+ tabeof[statename] = ef.__name__ if ef else None |
+ tf.write('_lexstateeoff = %s\n' % repr(tabeof)) |
+ |
+ # ------------------------------------------------------------ |
+ # readtab() - Read lexer information from a tab file |
+ # ------------------------------------------------------------ |
+ def readtab(self, tabfile, fdict): |
+ if isinstance(tabfile, types.ModuleType): |
+ lextab = tabfile |
+ else: |
+ exec('import %s' % tabfile) |
+ lextab = sys.modules[tabfile] |
+ |
+ if getattr(lextab, '_tabversion', '0.0') != __tabversion__: |
+ raise ImportError('Inconsistent PLY version') |
+ |
+ self.lextokens = lextab._lextokens |
+ self.lexreflags = lextab._lexreflags |
+ self.lexliterals = lextab._lexliterals |
+ self.lextokens_all = self.lextokens | set(self.lexliterals) |
+ self.lexstateinfo = lextab._lexstateinfo |
+ self.lexstateignore = lextab._lexstateignore |
+ self.lexstatere = {} |
+ self.lexstateretext = {} |
+ for statename, lre in lextab._lexstatere.items(): |
+ titem = [] |
+ txtitem = [] |
+ for pat, func_name in lre: |
+ titem.append((re.compile(pat, lextab._lexreflags | re.VERBOSE), _names_to_funcs(func_name, fdict))) |
+ |
+ self.lexstatere[statename] = titem |
+ self.lexstateretext[statename] = txtitem |
+ |
+ self.lexstateerrorf = {} |
+ for statename, ef in lextab._lexstateerrorf.items(): |
+ self.lexstateerrorf[statename] = fdict[ef] |
+ |
+ self.lexstateeoff = {} |
+ for statename, ef in lextab._lexstateeoff.items(): |
+ self.lexstateeoff[statename] = fdict[ef] |
+ |
+ self.begin('INITIAL') |
+ |
+ # ------------------------------------------------------------ |
+ # input() - Push a new string into the lexer |
+ # ------------------------------------------------------------ |
+ def input(self, s): |
+ # Pull off the first character to see if s looks like a string |
+ c = s[:1] |
+ if not isinstance(c, StringTypes): |
+ raise ValueError('Expected a string') |
+ self.lexdata = s |
+ self.lexpos = 0 |
+ self.lexlen = len(s) |
+ |
+ # ------------------------------------------------------------ |
+ # begin() - Changes the lexing state |
+ # ------------------------------------------------------------ |
+ def begin(self, state): |
+ if state not in self.lexstatere: |
+ raise ValueError('Undefined state') |
+ self.lexre = self.lexstatere[state] |
+ self.lexretext = self.lexstateretext[state] |
+ self.lexignore = self.lexstateignore.get(state, '') |
+ self.lexerrorf = self.lexstateerrorf.get(state, None) |
+ self.lexeoff = self.lexstateeoff.get(state, None) |
+ self.lexstate = state |
+ |
+ # ------------------------------------------------------------ |
+ # push_state() - Changes the lexing state and saves old on stack |
+ # ------------------------------------------------------------ |
+ def push_state(self, state): |
+ self.lexstatestack.append(self.lexstate) |
+ self.begin(state) |
+ |
+ # ------------------------------------------------------------ |
+ # pop_state() - Restores the previous state |
+ # ------------------------------------------------------------ |
+ def pop_state(self): |
+ self.begin(self.lexstatestack.pop()) |
+ |
+ # ------------------------------------------------------------ |
+ # current_state() - Returns the current lexing state |
+ # ------------------------------------------------------------ |
+ def current_state(self): |
+ return self.lexstate |
+ |
+ # ------------------------------------------------------------ |
+ # skip() - Skip ahead n characters |
+ # ------------------------------------------------------------ |
+ def skip(self, n): |
+ self.lexpos += n |
+ |
+ # ------------------------------------------------------------ |
+ # opttoken() - Return the next token from the Lexer |
+ # |
+ # Note: This function has been carefully implemented to be as fast |
+ # as possible. Don't make changes unless you really know what |
+ # you are doing |
+ # ------------------------------------------------------------ |
+ def token(self): |
+ # Make local copies of frequently referenced attributes |
+ lexpos = self.lexpos |
+ lexlen = self.lexlen |
+ lexignore = self.lexignore |
+ lexdata = self.lexdata |
+ |
+ while lexpos < lexlen: |
+ # This code provides some short-circuit code for whitespace, tabs, and other ignored characters |
+ if lexdata[lexpos] in lexignore: |
+ lexpos += 1 |
+ continue |
+ |
+ # Look for a regular expression match |
+ for lexre, lexindexfunc in self.lexre: |
+ m = lexre.match(lexdata, lexpos) |
+ if not m: |
+ continue |
+ |
+ # Create a token for return |
+ tok = LexToken() |
+ tok.value = m.group() |
+ tok.lineno = self.lineno |
+ tok.lexpos = lexpos |
+ |
+ i = m.lastindex |
+ func, tok.type = lexindexfunc[i] |
+ |
+ if not func: |
+ # If no token type was set, it's an ignored token |
+ if tok.type: |
+ self.lexpos = m.end() |
+ return tok |
+ else: |
+ lexpos = m.end() |
+ break |
+ |
+ lexpos = m.end() |
+ |
+ # If token is processed by a function, call it |
+ |
+ tok.lexer = self # Set additional attributes useful in token rules |
+ self.lexmatch = m |
+ self.lexpos = lexpos |
+ |
+ newtok = func(tok) |
+ |
+ # Every function must return a token, if nothing, we just move to next token |
+ if not newtok: |
+ lexpos = self.lexpos # This is here in case user has updated lexpos. |
+ lexignore = self.lexignore # This is here in case there was a state change |
+ break |
+ |
+ # Verify type of the token. If not in the token map, raise an error |
+ if not self.lexoptimize: |
+ if newtok.type not in self.lextokens_all: |
+ raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( |
+ func.__code__.co_filename, func.__code__.co_firstlineno, |
+ func.__name__, newtok.type), lexdata[lexpos:]) |
+ |
+ return newtok |
+ else: |
+ # No match, see if in literals |
+ if lexdata[lexpos] in self.lexliterals: |
+ tok = LexToken() |
+ tok.value = lexdata[lexpos] |
+ tok.lineno = self.lineno |
+ tok.type = tok.value |
+ tok.lexpos = lexpos |
+ self.lexpos = lexpos + 1 |
+ return tok |
+ |
+ # No match. Call t_error() if defined. |
+ if self.lexerrorf: |
+ tok = LexToken() |
+ tok.value = self.lexdata[lexpos:] |
+ tok.lineno = self.lineno |
+ tok.type = 'error' |
+ tok.lexer = self |
+ tok.lexpos = lexpos |
+ self.lexpos = lexpos |
+ newtok = self.lexerrorf(tok) |
+ if lexpos == self.lexpos: |
+ # Error method didn't change text position at all. This is an error. |
+ raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) |
+ lexpos = self.lexpos |
+ if not newtok: |
+ continue |
+ return newtok |
+ |
+ self.lexpos = lexpos |
+ raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) |
+ |
+ if self.lexeoff: |
+ tok = LexToken() |
+ tok.type = 'eof' |
+ tok.value = '' |
+ tok.lineno = self.lineno |
+ tok.lexpos = lexpos |
+ tok.lexer = self |
+ self.lexpos = lexpos |
+ newtok = self.lexeoff(tok) |
+ return newtok |
+ |
+ self.lexpos = lexpos + 1 |
+ if self.lexdata is None: |
+ raise RuntimeError('No input string given with input()') |
+ return None |
+ |
+ # Iterator interface |
+ def __iter__(self): |
+ return self |
+ |
+ def next(self): |
+ t = self.token() |
+ if t is None: |
+ raise StopIteration |
+ return t |
+ |
+ __next__ = next |
+ |
+# ----------------------------------------------------------------------------- |
+# ==== Lex Builder === |
+# |
+# The functions and classes below are used to collect lexing information |
+# and build a Lexer object from it. |
+# ----------------------------------------------------------------------------- |
+ |
+# ----------------------------------------------------------------------------- |
+# _get_regex(func) |
+# |
+# Returns the regular expression assigned to a function either as a doc string |
+# or as a .regex attribute attached by the @TOKEN decorator. |
+# ----------------------------------------------------------------------------- |
+def _get_regex(func): |
+ return getattr(func, 'regex', func.__doc__) |
+ |
+# ----------------------------------------------------------------------------- |
+# get_caller_module_dict() |
+# |
+# This function returns a dictionary containing all of the symbols defined within |
+# a caller further down the call stack. This is used to get the environment |
+# associated with the yacc() call if none was provided. |
+# ----------------------------------------------------------------------------- |
+def get_caller_module_dict(levels): |
+ f = sys._getframe(levels) |
+ ldict = f.f_globals.copy() |
+ if f.f_globals != f.f_locals: |
+ ldict.update(f.f_locals) |
+ return ldict |
+ |
+# ----------------------------------------------------------------------------- |
+# _funcs_to_names() |
+# |
+# Given a list of regular expression functions, this converts it to a list |
+# suitable for output to a table file |
+# ----------------------------------------------------------------------------- |
+def _funcs_to_names(funclist, namelist): |
+ result = [] |
+ for f, name in zip(funclist, namelist): |
+ if f and f[0]: |
+ result.append((name, f[1])) |
+ else: |
+ result.append(f) |
+ return result |
+ |
+# ----------------------------------------------------------------------------- |
+# _names_to_funcs() |
+# |
+# Given a list of regular expression function names, this converts it back to |
+# functions. |
+# ----------------------------------------------------------------------------- |
+def _names_to_funcs(namelist, fdict): |
+ result = [] |
+ for n in namelist: |
+ if n and n[0]: |
+ result.append((fdict[n[0]], n[1])) |
+ else: |
+ result.append(n) |
+ return result |
+ |
+# ----------------------------------------------------------------------------- |
+# _form_master_re() |
+# |
+# This function takes a list of all of the regex components and attempts to |
+# form the master regular expression. Given limitations in the Python re |
+# module, it may be necessary to break the master regex into separate expressions. |
+# ----------------------------------------------------------------------------- |
+def _form_master_re(relist, reflags, ldict, toknames): |
+ if not relist: |
+ return [] |
+ regex = '|'.join(relist) |
+ try: |
+ lexre = re.compile(regex, re.VERBOSE | reflags) |
+ |
+ # Build the index to function map for the matching engine |
+ lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) |
+ lexindexnames = lexindexfunc[:] |
+ |
+ for f, i in lexre.groupindex.items(): |
+ handle = ldict.get(f, None) |
+ if type(handle) in (types.FunctionType, types.MethodType): |
+ lexindexfunc[i] = (handle, toknames[f]) |
+ lexindexnames[i] = f |
+ elif handle is not None: |
+ lexindexnames[i] = f |
+ if f.find('ignore_') > 0: |
+ lexindexfunc[i] = (None, None) |
+ else: |
+ lexindexfunc[i] = (None, toknames[f]) |
+ |
+ return [(lexre, lexindexfunc)], [regex], [lexindexnames] |
+ except Exception: |
+ m = int(len(relist)/2) |
+ if m == 0: |
+ m = 1 |
+ llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) |
+ rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) |
+ return (llist+rlist), (lre+rre), (lnames+rnames) |
+ |
+# ----------------------------------------------------------------------------- |
+# def _statetoken(s,names) |
+# |
+# Given a declaration name s of the form "t_" and a dictionary whose keys are |
+# state names, this function returns a tuple (states,tokenname) where states |
+# is a tuple of state names and tokenname is the name of the token. For example, |
+# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') |
+# ----------------------------------------------------------------------------- |
+def _statetoken(s, names): |
+ nonstate = 1 |
+ parts = s.split('_') |
+ for i, part in enumerate(parts[1:], 1): |
+ if part not in names and part != 'ANY': |
+ break |
+ |
+ if i > 1: |
+ states = tuple(parts[1:i]) |
+ else: |
+ states = ('INITIAL',) |
+ |
+ if 'ANY' in states: |
+ states = tuple(names) |
+ |
+ tokenname = '_'.join(parts[i:]) |
+ return (states, tokenname) |
+ |
+ |
+# ----------------------------------------------------------------------------- |
+# LexerReflect() |
+# |
+# This class represents information needed to build a lexer as extracted from a |
+# user's input file. |
+# ----------------------------------------------------------------------------- |
+class LexerReflect(object): |
+ def __init__(self, ldict, log=None, reflags=0): |
+ self.ldict = ldict |
+ self.error_func = None |
+ self.tokens = [] |
+ self.reflags = reflags |
+ self.stateinfo = {'INITIAL': 'inclusive'} |
+ self.modules = set() |
+ self.error = False |
+ self.log = PlyLogger(sys.stderr) if log is None else log |
+ |
+ # Get all of the basic information |
+ def get_all(self): |
+ self.get_tokens() |
+ self.get_literals() |
+ self.get_states() |
+ self.get_rules() |
+ |
+ # Validate all of the information |
+ def validate_all(self): |
+ self.validate_tokens() |
+ self.validate_literals() |
+ self.validate_rules() |
+ return self.error |
+ |
+ # Get the tokens map |
+ def get_tokens(self): |
+ tokens = self.ldict.get('tokens', None) |
+ if not tokens: |
+ self.log.error('No token list is defined') |
+ self.error = True |
+ return |
+ |
+ if not isinstance(tokens, (list, tuple)): |
+ self.log.error('tokens must be a list or tuple') |
+ self.error = True |
+ return |
+ |
+ if not tokens: |
+ self.log.error('tokens is empty') |
+ self.error = True |
+ return |
+ |
+ self.tokens = tokens |
+ |
+ # Validate the tokens |
+ def validate_tokens(self): |
+ terminals = {} |
+ for n in self.tokens: |
+ if not _is_identifier.match(n): |
+ self.log.error("Bad token name '%s'", n) |
+ self.error = True |
+ if n in terminals: |
+ self.log.warning("Token '%s' multiply defined", n) |
+ terminals[n] = 1 |
+ |
+ # Get the literals specifier |
+ def get_literals(self): |
+ self.literals = self.ldict.get('literals', '') |
+ if not self.literals: |
+ self.literals = '' |
+ |
+ # Validate literals |
+ def validate_literals(self): |
+ try: |
+ for c in self.literals: |
+ if not isinstance(c, StringTypes) or len(c) > 1: |
+ self.log.error('Invalid literal %s. Must be a single character', repr(c)) |
+ self.error = True |
+ |
+ except TypeError: |
+ self.log.error('Invalid literals specification. literals must be a sequence of characters') |
+ self.error = True |
+ |
+ def get_states(self): |
+ self.states = self.ldict.get('states', None) |
+ # Build statemap |
+ if self.states: |
+ if not isinstance(self.states, (tuple, list)): |
+ self.log.error('states must be defined as a tuple or list') |
+ self.error = True |
+ else: |
+ for s in self.states: |
+ if not isinstance(s, tuple) or len(s) != 2: |
+ self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) |
+ self.error = True |
+ continue |
+ name, statetype = s |
+ if not isinstance(name, StringTypes): |
+ self.log.error('State name %s must be a string', repr(name)) |
+ self.error = True |
+ continue |
+ if not (statetype == 'inclusive' or statetype == 'exclusive'): |
+ self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) |
+ self.error = True |
+ continue |
+ if name in self.stateinfo: |
+ self.log.error("State '%s' already defined", name) |
+ self.error = True |
+ continue |
+ self.stateinfo[name] = statetype |
+ |
+ # Get all of the symbols with a t_ prefix and sort them into various |
+ # categories (functions, strings, error functions, and ignore characters) |
+ |
+ def get_rules(self): |
+ tsymbols = [f for f in self.ldict if f[:2] == 't_'] |
+ |
+ # Now build up a list of functions and a list of strings |
+ self.toknames = {} # Mapping of symbols to token names |
+ self.funcsym = {} # Symbols defined as functions |
+ self.strsym = {} # Symbols defined as strings |
+ self.ignore = {} # Ignore strings by state |
+ self.errorf = {} # Error functions by state |
+ self.eoff = {} # EOF functions by state |
+ |
+ for s in self.stateinfo: |
+ self.funcsym[s] = [] |
+ self.strsym[s] = [] |
+ |
+ if len(tsymbols) == 0: |
+ self.log.error('No rules of the form t_rulename are defined') |
+ self.error = True |
+ return |
+ |
+ for f in tsymbols: |
+ t = self.ldict[f] |
+ states, tokname = _statetoken(f, self.stateinfo) |
+ self.toknames[f] = tokname |
+ |
+ if hasattr(t, '__call__'): |
+ if tokname == 'error': |
+ for s in states: |
+ self.errorf[s] = t |
+ elif tokname == 'eof': |
+ for s in states: |
+ self.eoff[s] = t |
+ elif tokname == 'ignore': |
+ line = t.__code__.co_firstlineno |
+ file = t.__code__.co_filename |
+ self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) |
+ self.error = True |
+ else: |
+ for s in states: |
+ self.funcsym[s].append((f, t)) |
+ elif isinstance(t, StringTypes): |
+ if tokname == 'ignore': |
+ for s in states: |
+ self.ignore[s] = t |
+ if '\\' in t: |
+ self.log.warning("%s contains a literal backslash '\\'", f) |
+ |
+ elif tokname == 'error': |
+ self.log.error("Rule '%s' must be defined as a function", f) |
+ self.error = True |
+ else: |
+ for s in states: |
+ self.strsym[s].append((f, t)) |
+ else: |
+ self.log.error('%s not defined as a function or string', f) |
+ self.error = True |
+ |
+ # Sort the functions by line number |
+ for f in self.funcsym.values(): |
+ f.sort(key=lambda x: x[1].__code__.co_firstlineno) |
+ |
+ # Sort the strings by regular expression length |
+ for s in self.strsym.values(): |
+ s.sort(key=lambda x: len(x[1]), reverse=True) |
+ |
+ # Validate all of the t_rules collected |
+ def validate_rules(self): |
+ for state in self.stateinfo: |
+ # Validate all rules defined by functions |
+ |
+ for fname, f in self.funcsym[state]: |
+ line = f.__code__.co_firstlineno |
+ file = f.__code__.co_filename |
+ module = inspect.getmodule(f) |
+ self.modules.add(module) |
+ |
+ tokname = self.toknames[fname] |
+ if isinstance(f, types.MethodType): |
+ reqargs = 2 |
+ else: |
+ reqargs = 1 |
+ nargs = f.__code__.co_argcount |
+ if nargs > reqargs: |
+ self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) |
+ self.error = True |
+ continue |
+ |
+ if nargs < reqargs: |
+ self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) |
+ self.error = True |
+ continue |
+ |
+ if not _get_regex(f): |
+ self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) |
+ self.error = True |
+ continue |
+ |
+ try: |
+ c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), re.VERBOSE | self.reflags) |
+ if c.match(''): |
+ self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) |
+ self.error = True |
+ except re.error as e: |
+ self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) |
+ if '#' in _get_regex(f): |
+ self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) |
+ self.error = True |
+ |
+ # Validate all rules defined by strings |
+ for name, r in self.strsym[state]: |
+ tokname = self.toknames[name] |
+ if tokname == 'error': |
+ self.log.error("Rule '%s' must be defined as a function", name) |
+ self.error = True |
+ continue |
+ |
+ if tokname not in self.tokens and tokname.find('ignore_') < 0: |
+ self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) |
+ self.error = True |
+ continue |
+ |
+ try: |
+ c = re.compile('(?P<%s>%s)' % (name, r), re.VERBOSE | self.reflags) |
+ if (c.match('')): |
+ self.log.error("Regular expression for rule '%s' matches empty string", name) |
+ self.error = True |
+ except re.error as e: |
+ self.log.error("Invalid regular expression for rule '%s'. %s", name, e) |
+ if '#' in r: |
+ self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) |
+ self.error = True |
+ |
+ if not self.funcsym[state] and not self.strsym[state]: |
+ self.log.error("No rules defined for state '%s'", state) |
+ self.error = True |
+ |
+ # Validate the error function |
+ efunc = self.errorf.get(state, None) |
+ if efunc: |
+ f = efunc |
+ line = f.__code__.co_firstlineno |
+ file = f.__code__.co_filename |
+ module = inspect.getmodule(f) |
+ self.modules.add(module) |
+ |
+ if isinstance(f, types.MethodType): |
+ reqargs = 2 |
+ else: |
+ reqargs = 1 |
+ nargs = f.__code__.co_argcount |
+ if nargs > reqargs: |
+ self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) |
+ self.error = True |
+ |
+ if nargs < reqargs: |
+ self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) |
+ self.error = True |
+ |
+ for module in self.modules: |
+ self.validate_module(module) |
+ |
+ # ----------------------------------------------------------------------------- |
+ # validate_module() |
+ # |
+ # This checks to see if there are duplicated t_rulename() functions or strings |
+ # in the parser input file. This is done using a simple regular expression |
+ # match on each line in the source code of the given module. |
+ # ----------------------------------------------------------------------------- |
+ |
+ def validate_module(self, module): |
+ try: |
+ lines, linen = inspect.getsourcelines(module) |
+ except IOError: |
+ return |
+ |
+ fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') |
+ sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') |
+ |
+ counthash = {} |
+ linen += 1 |
+ for line in lines: |
+ m = fre.match(line) |
+ if not m: |
+ m = sre.match(line) |
+ if m: |
+ name = m.group(1) |
+ prev = counthash.get(name) |
+ if not prev: |
+ counthash[name] = linen |
+ else: |
+ filename = inspect.getsourcefile(module) |
+ self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) |
+ self.error = True |
+ linen += 1 |
+ |
+# ----------------------------------------------------------------------------- |
+# lex(module) |
+# |
+# Build all of the regular expression rules from definitions in the supplied module |
+# ----------------------------------------------------------------------------- |
+def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', |
+ reflags=0, nowarn=False, outputdir=None, debuglog=None, errorlog=None): |
+ |
+ if lextab is None: |
+ lextab = 'lextab' |
+ |
+ global lexer |
+ |
+ ldict = None |
+ stateinfo = {'INITIAL': 'inclusive'} |
+ lexobj = Lexer() |
+ lexobj.lexoptimize = optimize |
+ global token, input |
+ |
+ if errorlog is None: |
+ errorlog = PlyLogger(sys.stderr) |
+ |
+ if debug: |
+ if debuglog is None: |
+ debuglog = PlyLogger(sys.stderr) |
+ |
+ # Get the module dictionary used for the lexer |
+ if object: |
+ module = object |
+ |
+ # Get the module dictionary used for the parser |
+ if module: |
+ _items = [(k, getattr(module, k)) for k in dir(module)] |
+ ldict = dict(_items) |
+ # If no __file__ attribute is available, try to obtain it from the __module__ instead |
+ if '__file__' not in ldict: |
+ ldict['__file__'] = sys.modules[ldict['__module__']].__file__ |
+ else: |
+ ldict = get_caller_module_dict(2) |
+ |
+ # Determine if the module is package of a package or not. |
+ # If so, fix the tabmodule setting so that tables load correctly |
+ pkg = ldict.get('__package__') |
+ if pkg and isinstance(lextab, str): |
+ if '.' not in lextab: |
+ lextab = pkg + '.' + lextab |
+ |
+ # Collect parser information from the dictionary |
+ linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) |
+ linfo.get_all() |
+ if not optimize: |
+ if linfo.validate_all(): |
+ raise SyntaxError("Can't build lexer") |
+ |
+ if optimize and lextab: |
+ try: |
+ lexobj.readtab(lextab, ldict) |
+ token = lexobj.token |
+ input = lexobj.input |
+ lexer = lexobj |
+ return lexobj |
+ |
+ except ImportError: |
+ pass |
+ |
+ # Dump some basic debugging information |
+ if debug: |
+ debuglog.info('lex: tokens = %r', linfo.tokens) |
+ debuglog.info('lex: literals = %r', linfo.literals) |
+ debuglog.info('lex: states = %r', linfo.stateinfo) |
+ |
+ # Build a dictionary of valid token names |
+ lexobj.lextokens = set() |
+ for n in linfo.tokens: |
+ lexobj.lextokens.add(n) |
+ |
+ # Get literals specification |
+ if isinstance(linfo.literals, (list, tuple)): |
+ lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) |
+ else: |
+ lexobj.lexliterals = linfo.literals |
+ |
+ lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) |
+ |
+ # Get the stateinfo dictionary |
+ stateinfo = linfo.stateinfo |
+ |
+ regexs = {} |
+ # Build the master regular expressions |
+ for state in stateinfo: |
+ regex_list = [] |
+ |
+ # Add rules defined by functions first |
+ for fname, f in linfo.funcsym[state]: |
+ line = f.__code__.co_firstlineno |
+ file = f.__code__.co_filename |
+ regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) |
+ if debug: |
+ debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) |
+ |
+ # Now add all of the simple rules |
+ for name, r in linfo.strsym[state]: |
+ regex_list.append('(?P<%s>%s)' % (name, r)) |
+ if debug: |
+ debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) |
+ |
+ regexs[state] = regex_list |
+ |
+ # Build the master regular expressions |
+ |
+ if debug: |
+ debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') |
+ |
+ for state in regexs: |
+ lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) |
+ lexobj.lexstatere[state] = lexre |
+ lexobj.lexstateretext[state] = re_text |
+ lexobj.lexstaterenames[state] = re_names |
+ if debug: |
+ for i, text in enumerate(re_text): |
+ debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) |
+ |
+ # For inclusive states, we need to add the regular expressions from the INITIAL state |
+ for state, stype in stateinfo.items(): |
+ if state != 'INITIAL' and stype == 'inclusive': |
+ lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) |
+ lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) |
+ lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) |
+ |
+ lexobj.lexstateinfo = stateinfo |
+ lexobj.lexre = lexobj.lexstatere['INITIAL'] |
+ lexobj.lexretext = lexobj.lexstateretext['INITIAL'] |
+ lexobj.lexreflags = reflags |
+ |
+ # Set up ignore variables |
+ lexobj.lexstateignore = linfo.ignore |
+ lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') |
+ |
+ # Set up error functions |
+ lexobj.lexstateerrorf = linfo.errorf |
+ lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) |
+ if not lexobj.lexerrorf: |
+ errorlog.warning('No t_error rule is defined') |
+ |
+ # Set up eof functions |
+ lexobj.lexstateeoff = linfo.eoff |
+ lexobj.lexeoff = linfo.eoff.get('INITIAL', None) |
+ |
+ # Check state information for ignore and error rules |
+ for s, stype in stateinfo.items(): |
+ if stype == 'exclusive': |
+ if s not in linfo.errorf: |
+ errorlog.warning("No error rule is defined for exclusive state '%s'", s) |
+ if s not in linfo.ignore and lexobj.lexignore: |
+ errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) |
+ elif stype == 'inclusive': |
+ if s not in linfo.errorf: |
+ linfo.errorf[s] = linfo.errorf.get('INITIAL', None) |
+ if s not in linfo.ignore: |
+ linfo.ignore[s] = linfo.ignore.get('INITIAL', '') |
+ |
+ # Create global versions of the token() and input() functions |
+ token = lexobj.token |
+ input = lexobj.input |
+ lexer = lexobj |
+ |
+ # If in optimize mode, we write the lextab |
+ if lextab and optimize: |
+ if outputdir is None: |
+ # If no output directory is set, the location of the output files |
+ # is determined according to the following rules: |
+ # - If lextab specifies a package, files go into that package directory |
+ # - Otherwise, files go in the same directory as the specifying module |
+ if isinstance(lextab, types.ModuleType): |
+ srcfile = lextab.__file__ |
+ else: |
+ if '.' not in lextab: |
+ srcfile = ldict['__file__'] |
+ else: |
+ parts = lextab.split('.') |
+ pkgname = '.'.join(parts[:-1]) |
+ exec('import %s' % pkgname) |
+ srcfile = getattr(sys.modules[pkgname], '__file__', '') |
+ outputdir = os.path.dirname(srcfile) |
+ try: |
+ lexobj.writetab(lextab, outputdir) |
+ except IOError as e: |
+ errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) |
+ |
+ return lexobj |
+ |
+# ----------------------------------------------------------------------------- |
+# runmain() |
+# |
+# This runs the lexer as a main program |
+# ----------------------------------------------------------------------------- |
+ |
+def runmain(lexer=None, data=None): |
+ if not data: |
+ try: |
+ filename = sys.argv[1] |
+ f = open(filename) |
+ data = f.read() |
+ f.close() |
+ except IndexError: |
+ sys.stdout.write('Reading from standard input (type EOF to end):\n') |
+ data = sys.stdin.read() |
+ |
+ if lexer: |
+ _input = lexer.input |
+ else: |
+ _input = input |
+ _input(data) |
+ if lexer: |
+ _token = lexer.token |
+ else: |
+ _token = token |
+ |
+ while True: |
+ tok = _token() |
+ if not tok: |
+ break |
+ sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) |
+ |
+# ----------------------------------------------------------------------------- |
+# @TOKEN(regex) |
+# |
+# This decorator function can be used to set the regex expression on a function |
+# when its docstring might need to be set in an alternative way |
+# ----------------------------------------------------------------------------- |
+ |
+def TOKEN(r): |
+ def set_regex(f): |
+ if hasattr(r, '__call__'): |
+ f.regex = _get_regex(r) |
+ else: |
+ f.regex = r |
+ return f |
+ return set_regex |
+ |
+# Alternative spelling of the TOKEN decorator |
+Token = TOKEN |
+ |