Index: mojo/public/third_party/ply/lex.py |
diff --git a/mojo/public/third_party/ply/lex.py b/mojo/public/third_party/ply/lex.py |
deleted file mode 100644 |
index bd32da932762709505415b185ee0d8c496c9b0e9..0000000000000000000000000000000000000000 |
--- a/mojo/public/third_party/ply/lex.py |
+++ /dev/null |
@@ -1,1058 +0,0 @@ |
-# ----------------------------------------------------------------------------- |
-# ply: lex.py |
-# |
-# Copyright (C) 2001-2011, |
-# David M. Beazley (Dabeaz LLC) |
-# All rights reserved. |
-# |
-# Redistribution and use in source and binary forms, with or without |
-# modification, are permitted provided that the following conditions are |
-# met: |
-# |
-# * Redistributions of source code must retain the above copyright notice, |
-# this list of conditions and the following disclaimer. |
-# * Redistributions in binary form must reproduce the above copyright notice, |
-# this list of conditions and the following disclaimer in the documentation |
-# and/or other materials provided with the distribution. |
-# * Neither the name of the David Beazley or Dabeaz LLC may be used to |
-# endorse or promote products derived from this software without |
-# specific prior written permission. |
-# |
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
-# ----------------------------------------------------------------------------- |
- |
-__version__ = "3.4" |
-__tabversion__ = "3.2" # Version of table file used |
- |
-import re, sys, types, copy, os |
- |
-# This tuple contains known string types |
-try: |
- # Python 2.6 |
- StringTypes = (types.StringType, types.UnicodeType) |
-except AttributeError: |
- # Python 3.0 |
- StringTypes = (str, bytes) |
- |
-# Extract the code attribute of a function. Different implementations |
-# are for Python 2/3 compatibility. |
- |
-if sys.version_info[0] < 3: |
- def func_code(f): |
- return f.func_code |
-else: |
- def func_code(f): |
- return f.__code__ |
- |
-# This regular expression is used to match valid token names |
-_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') |
- |
-# Exception thrown when invalid token encountered and no default error |
-# handler is defined. |
- |
-class LexError(Exception): |
- def __init__(self,message,s): |
- self.args = (message,) |
- self.text = s |
- |
-# Token class. This class is used to represent the tokens produced. |
-class LexToken(object): |
- def __str__(self): |
- return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) |
- def __repr__(self): |
- return str(self) |
- |
-# This object is a stand-in for a logging object created by the |
-# logging module. |
- |
-class PlyLogger(object): |
- def __init__(self,f): |
- self.f = f |
- def critical(self,msg,*args,**kwargs): |
- self.f.write((msg % args) + "\n") |
- |
- def warning(self,msg,*args,**kwargs): |
- self.f.write("WARNING: "+ (msg % args) + "\n") |
- |
- def error(self,msg,*args,**kwargs): |
- self.f.write("ERROR: " + (msg % args) + "\n") |
- |
- info = critical |
- debug = critical |
- |
-# Null logger is used when no output is generated. Does nothing. |
-class NullLogger(object): |
- def __getattribute__(self,name): |
- return self |
- def __call__(self,*args,**kwargs): |
- return self |
- |
-# ----------------------------------------------------------------------------- |
-# === Lexing Engine === |
-# |
-# The following Lexer class implements the lexer runtime. There are only |
-# a few public methods and attributes: |
-# |
-# input() - Store a new string in the lexer |
-# token() - Get the next token |
-# clone() - Clone the lexer |
-# |
-# lineno - Current line number |
-# lexpos - Current position in the input string |
-# ----------------------------------------------------------------------------- |
- |
-class Lexer: |
- def __init__(self): |
- self.lexre = None # Master regular expression. This is a list of |
- # tuples (re,findex) where re is a compiled |
- # regular expression and findex is a list |
- # mapping regex group numbers to rules |
- self.lexretext = None # Current regular expression strings |
- self.lexstatere = {} # Dictionary mapping lexer states to master regexs |
- self.lexstateretext = {} # Dictionary mapping lexer states to regex strings |
- self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names |
- self.lexstate = "INITIAL" # Current lexer state |
- self.lexstatestack = [] # Stack of lexer states |
- self.lexstateinfo = None # State information |
- self.lexstateignore = {} # Dictionary of ignored characters for each state |
- self.lexstateerrorf = {} # Dictionary of error functions for each state |
- self.lexreflags = 0 # Optional re compile flags |
- self.lexdata = None # Actual input data (as a string) |
- self.lexpos = 0 # Current position in input text |
- self.lexlen = 0 # Length of the input text |
- self.lexerrorf = None # Error rule (if any) |
- self.lextokens = None # List of valid tokens |
- self.lexignore = "" # Ignored characters |
- self.lexliterals = "" # Literal characters that can be passed through |
- self.lexmodule = None # Module |
- self.lineno = 1 # Current line number |
- self.lexoptimize = 0 # Optimized mode |
- |
- def clone(self,object=None): |
- c = copy.copy(self) |
- |
- # If the object parameter has been supplied, it means we are attaching the |
- # lexer to a new object. In this case, we have to rebind all methods in |
- # the lexstatere and lexstateerrorf tables. |
- |
- if object: |
- newtab = { } |
- for key, ritem in self.lexstatere.items(): |
- newre = [] |
- for cre, findex in ritem: |
- newfindex = [] |
- for f in findex: |
- if not f or not f[0]: |
- newfindex.append(f) |
- continue |
- newfindex.append((getattr(object,f[0].__name__),f[1])) |
- newre.append((cre,newfindex)) |
- newtab[key] = newre |
- c.lexstatere = newtab |
- c.lexstateerrorf = { } |
- for key, ef in self.lexstateerrorf.items(): |
- c.lexstateerrorf[key] = getattr(object,ef.__name__) |
- c.lexmodule = object |
- return c |
- |
- # ------------------------------------------------------------ |
- # writetab() - Write lexer information to a table file |
- # ------------------------------------------------------------ |
- def writetab(self,tabfile,outputdir=""): |
- if isinstance(tabfile,types.ModuleType): |
- return |
- basetabfilename = tabfile.split(".")[-1] |
- filename = os.path.join(outputdir,basetabfilename)+".py" |
- tf = open(filename,"w") |
- tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) |
- tf.write("_tabversion = %s\n" % repr(__version__)) |
- tf.write("_lextokens = %s\n" % repr(self.lextokens)) |
- tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) |
- tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) |
- tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) |
- |
- tabre = { } |
- # Collect all functions in the initial state |
- initial = self.lexstatere["INITIAL"] |
- initialfuncs = [] |
- for part in initial: |
- for f in part[1]: |
- if f and f[0]: |
- initialfuncs.append(f) |
- |
- for key, lre in self.lexstatere.items(): |
- titem = [] |
- for i in range(len(lre)): |
- titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) |
- tabre[key] = titem |
- |
- tf.write("_lexstatere = %s\n" % repr(tabre)) |
- tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) |
- |
- taberr = { } |
- for key, ef in self.lexstateerrorf.items(): |
- if ef: |
- taberr[key] = ef.__name__ |
- else: |
- taberr[key] = None |
- tf.write("_lexstateerrorf = %s\n" % repr(taberr)) |
- tf.close() |
- |
- # ------------------------------------------------------------ |
- # readtab() - Read lexer information from a tab file |
- # ------------------------------------------------------------ |
- def readtab(self,tabfile,fdict): |
- if isinstance(tabfile,types.ModuleType): |
- lextab = tabfile |
- else: |
- if sys.version_info[0] < 3: |
- exec("import %s as lextab" % tabfile) |
- else: |
- env = { } |
- exec("import %s as lextab" % tabfile, env,env) |
- lextab = env['lextab'] |
- |
- if getattr(lextab,"_tabversion","0.0") != __version__: |
- raise ImportError("Inconsistent PLY version") |
- |
- self.lextokens = lextab._lextokens |
- self.lexreflags = lextab._lexreflags |
- self.lexliterals = lextab._lexliterals |
- self.lexstateinfo = lextab._lexstateinfo |
- self.lexstateignore = lextab._lexstateignore |
- self.lexstatere = { } |
- self.lexstateretext = { } |
- for key,lre in lextab._lexstatere.items(): |
- titem = [] |
- txtitem = [] |
- for i in range(len(lre)): |
- titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) |
- txtitem.append(lre[i][0]) |
- self.lexstatere[key] = titem |
- self.lexstateretext[key] = txtitem |
- self.lexstateerrorf = { } |
- for key,ef in lextab._lexstateerrorf.items(): |
- self.lexstateerrorf[key] = fdict[ef] |
- self.begin('INITIAL') |
- |
- # ------------------------------------------------------------ |
- # input() - Push a new string into the lexer |
- # ------------------------------------------------------------ |
- def input(self,s): |
- # Pull off the first character to see if s looks like a string |
- c = s[:1] |
- if not isinstance(c,StringTypes): |
- raise ValueError("Expected a string") |
- self.lexdata = s |
- self.lexpos = 0 |
- self.lexlen = len(s) |
- |
- # ------------------------------------------------------------ |
- # begin() - Changes the lexing state |
- # ------------------------------------------------------------ |
- def begin(self,state): |
- if not state in self.lexstatere: |
- raise ValueError("Undefined state") |
- self.lexre = self.lexstatere[state] |
- self.lexretext = self.lexstateretext[state] |
- self.lexignore = self.lexstateignore.get(state,"") |
- self.lexerrorf = self.lexstateerrorf.get(state,None) |
- self.lexstate = state |
- |
- # ------------------------------------------------------------ |
- # push_state() - Changes the lexing state and saves old on stack |
- # ------------------------------------------------------------ |
- def push_state(self,state): |
- self.lexstatestack.append(self.lexstate) |
- self.begin(state) |
- |
- # ------------------------------------------------------------ |
- # pop_state() - Restores the previous state |
- # ------------------------------------------------------------ |
- def pop_state(self): |
- self.begin(self.lexstatestack.pop()) |
- |
- # ------------------------------------------------------------ |
- # current_state() - Returns the current lexing state |
- # ------------------------------------------------------------ |
- def current_state(self): |
- return self.lexstate |
- |
- # ------------------------------------------------------------ |
- # skip() - Skip ahead n characters |
- # ------------------------------------------------------------ |
- def skip(self,n): |
- self.lexpos += n |
- |
- # ------------------------------------------------------------ |
- # opttoken() - Return the next token from the Lexer |
- # |
- # Note: This function has been carefully implemented to be as fast |
- # as possible. Don't make changes unless you really know what |
- # you are doing |
- # ------------------------------------------------------------ |
- def token(self): |
- # Make local copies of frequently referenced attributes |
- lexpos = self.lexpos |
- lexlen = self.lexlen |
- lexignore = self.lexignore |
- lexdata = self.lexdata |
- |
- while lexpos < lexlen: |
- # This code provides some short-circuit code for whitespace, tabs, and other ignored characters |
- if lexdata[lexpos] in lexignore: |
- lexpos += 1 |
- continue |
- |
- # Look for a regular expression match |
- for lexre,lexindexfunc in self.lexre: |
- m = lexre.match(lexdata,lexpos) |
- if not m: continue |
- |
- # Create a token for return |
- tok = LexToken() |
- tok.value = m.group() |
- tok.lineno = self.lineno |
- tok.lexpos = lexpos |
- |
- i = m.lastindex |
- func,tok.type = lexindexfunc[i] |
- |
- if not func: |
- # If no token type was set, it's an ignored token |
- if tok.type: |
- self.lexpos = m.end() |
- return tok |
- else: |
- lexpos = m.end() |
- break |
- |
- lexpos = m.end() |
- |
- # If token is processed by a function, call it |
- |
- tok.lexer = self # Set additional attributes useful in token rules |
- self.lexmatch = m |
- self.lexpos = lexpos |
- |
- newtok = func(tok) |
- |
- # Every function must return a token, if nothing, we just move to next token |
- if not newtok: |
- lexpos = self.lexpos # This is here in case user has updated lexpos. |
- lexignore = self.lexignore # This is here in case there was a state change |
- break |
- |
- # Verify type of the token. If not in the token map, raise an error |
- if not self.lexoptimize: |
- if not newtok.type in self.lextokens: |
- raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( |
- func_code(func).co_filename, func_code(func).co_firstlineno, |
- func.__name__, newtok.type),lexdata[lexpos:]) |
- |
- return newtok |
- else: |
- # No match, see if in literals |
- if lexdata[lexpos] in self.lexliterals: |
- tok = LexToken() |
- tok.value = lexdata[lexpos] |
- tok.lineno = self.lineno |
- tok.type = tok.value |
- tok.lexpos = lexpos |
- self.lexpos = lexpos + 1 |
- return tok |
- |
- # No match. Call t_error() if defined. |
- if self.lexerrorf: |
- tok = LexToken() |
- tok.value = self.lexdata[lexpos:] |
- tok.lineno = self.lineno |
- tok.type = "error" |
- tok.lexer = self |
- tok.lexpos = lexpos |
- self.lexpos = lexpos |
- newtok = self.lexerrorf(tok) |
- if lexpos == self.lexpos: |
- # Error method didn't change text position at all. This is an error. |
- raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) |
- lexpos = self.lexpos |
- if not newtok: continue |
- return newtok |
- |
- self.lexpos = lexpos |
- raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) |
- |
- self.lexpos = lexpos + 1 |
- if self.lexdata is None: |
- raise RuntimeError("No input string given with input()") |
- return None |
- |
- # Iterator interface |
- def __iter__(self): |
- return self |
- |
- def next(self): |
- t = self.token() |
- if t is None: |
- raise StopIteration |
- return t |
- |
- __next__ = next |
- |
-# ----------------------------------------------------------------------------- |
-# ==== Lex Builder === |
-# |
-# The functions and classes below are used to collect lexing information |
-# and build a Lexer object from it. |
-# ----------------------------------------------------------------------------- |
- |
-# ----------------------------------------------------------------------------- |
-# get_caller_module_dict() |
-# |
-# This function returns a dictionary containing all of the symbols defined within |
-# a caller further down the call stack. This is used to get the environment |
-# associated with the yacc() call if none was provided. |
-# ----------------------------------------------------------------------------- |
- |
-def get_caller_module_dict(levels): |
- try: |
- raise RuntimeError |
- except RuntimeError: |
- e,b,t = sys.exc_info() |
- f = t.tb_frame |
- while levels > 0: |
- f = f.f_back |
- levels -= 1 |
- ldict = f.f_globals.copy() |
- if f.f_globals != f.f_locals: |
- ldict.update(f.f_locals) |
- |
- return ldict |
- |
-# ----------------------------------------------------------------------------- |
-# _funcs_to_names() |
-# |
-# Given a list of regular expression functions, this converts it to a list |
-# suitable for output to a table file |
-# ----------------------------------------------------------------------------- |
- |
-def _funcs_to_names(funclist,namelist): |
- result = [] |
- for f,name in zip(funclist,namelist): |
- if f and f[0]: |
- result.append((name, f[1])) |
- else: |
- result.append(f) |
- return result |
- |
-# ----------------------------------------------------------------------------- |
-# _names_to_funcs() |
-# |
-# Given a list of regular expression function names, this converts it back to |
-# functions. |
-# ----------------------------------------------------------------------------- |
- |
-def _names_to_funcs(namelist,fdict): |
- result = [] |
- for n in namelist: |
- if n and n[0]: |
- result.append((fdict[n[0]],n[1])) |
- else: |
- result.append(n) |
- return result |
- |
-# ----------------------------------------------------------------------------- |
-# _form_master_re() |
-# |
-# This function takes a list of all of the regex components and attempts to |
-# form the master regular expression. Given limitations in the Python re |
-# module, it may be necessary to break the master regex into separate expressions. |
-# ----------------------------------------------------------------------------- |
- |
-def _form_master_re(relist,reflags,ldict,toknames): |
- if not relist: return [] |
- regex = "|".join(relist) |
- try: |
- lexre = re.compile(regex,re.VERBOSE | reflags) |
- |
- # Build the index to function map for the matching engine |
- lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) |
- lexindexnames = lexindexfunc[:] |
- |
- for f,i in lexre.groupindex.items(): |
- handle = ldict.get(f,None) |
- if type(handle) in (types.FunctionType, types.MethodType): |
- lexindexfunc[i] = (handle,toknames[f]) |
- lexindexnames[i] = f |
- elif handle is not None: |
- lexindexnames[i] = f |
- if f.find("ignore_") > 0: |
- lexindexfunc[i] = (None,None) |
- else: |
- lexindexfunc[i] = (None, toknames[f]) |
- |
- return [(lexre,lexindexfunc)],[regex],[lexindexnames] |
- except Exception: |
- m = int(len(relist)/2) |
- if m == 0: m = 1 |
- llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) |
- rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) |
- return llist+rlist, lre+rre, lnames+rnames |
- |
-# ----------------------------------------------------------------------------- |
-# def _statetoken(s,names) |
-# |
-# Given a declaration name s of the form "t_" and a dictionary whose keys are |
-# state names, this function returns a tuple (states,tokenname) where states |
-# is a tuple of state names and tokenname is the name of the token. For example, |
-# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') |
-# ----------------------------------------------------------------------------- |
- |
-def _statetoken(s,names): |
- nonstate = 1 |
- parts = s.split("_") |
- for i in range(1,len(parts)): |
- if not parts[i] in names and parts[i] != 'ANY': break |
- if i > 1: |
- states = tuple(parts[1:i]) |
- else: |
- states = ('INITIAL',) |
- |
- if 'ANY' in states: |
- states = tuple(names) |
- |
- tokenname = "_".join(parts[i:]) |
- return (states,tokenname) |
- |
- |
-# ----------------------------------------------------------------------------- |
-# LexerReflect() |
-# |
-# This class represents information needed to build a lexer as extracted from a |
-# user's input file. |
-# ----------------------------------------------------------------------------- |
-class LexerReflect(object): |
- def __init__(self,ldict,log=None,reflags=0): |
- self.ldict = ldict |
- self.error_func = None |
- self.tokens = [] |
- self.reflags = reflags |
- self.stateinfo = { 'INITIAL' : 'inclusive'} |
- self.files = {} |
- self.error = 0 |
- |
- if log is None: |
- self.log = PlyLogger(sys.stderr) |
- else: |
- self.log = log |
- |
- # Get all of the basic information |
- def get_all(self): |
- self.get_tokens() |
- self.get_literals() |
- self.get_states() |
- self.get_rules() |
- |
- # Validate all of the information |
- def validate_all(self): |
- self.validate_tokens() |
- self.validate_literals() |
- self.validate_rules() |
- return self.error |
- |
- # Get the tokens map |
- def get_tokens(self): |
- tokens = self.ldict.get("tokens",None) |
- if not tokens: |
- self.log.error("No token list is defined") |
- self.error = 1 |
- return |
- |
- if not isinstance(tokens,(list, tuple)): |
- self.log.error("tokens must be a list or tuple") |
- self.error = 1 |
- return |
- |
- if not tokens: |
- self.log.error("tokens is empty") |
- self.error = 1 |
- return |
- |
- self.tokens = tokens |
- |
- # Validate the tokens |
- def validate_tokens(self): |
- terminals = {} |
- for n in self.tokens: |
- if not _is_identifier.match(n): |
- self.log.error("Bad token name '%s'",n) |
- self.error = 1 |
- if n in terminals: |
- self.log.warning("Token '%s' multiply defined", n) |
- terminals[n] = 1 |
- |
- # Get the literals specifier |
- def get_literals(self): |
- self.literals = self.ldict.get("literals","") |
- |
- # Validate literals |
- def validate_literals(self): |
- try: |
- for c in self.literals: |
- if not isinstance(c,StringTypes) or len(c) > 1: |
- self.log.error("Invalid literal %s. Must be a single character", repr(c)) |
- self.error = 1 |
- continue |
- |
- except TypeError: |
- self.log.error("Invalid literals specification. literals must be a sequence of characters") |
- self.error = 1 |
- |
- def get_states(self): |
- self.states = self.ldict.get("states",None) |
- # Build statemap |
- if self.states: |
- if not isinstance(self.states,(tuple,list)): |
- self.log.error("states must be defined as a tuple or list") |
- self.error = 1 |
- else: |
- for s in self.states: |
- if not isinstance(s,tuple) or len(s) != 2: |
- self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) |
- self.error = 1 |
- continue |
- name, statetype = s |
- if not isinstance(name,StringTypes): |
- self.log.error("State name %s must be a string", repr(name)) |
- self.error = 1 |
- continue |
- if not (statetype == 'inclusive' or statetype == 'exclusive'): |
- self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) |
- self.error = 1 |
- continue |
- if name in self.stateinfo: |
- self.log.error("State '%s' already defined",name) |
- self.error = 1 |
- continue |
- self.stateinfo[name] = statetype |
- |
- # Get all of the symbols with a t_ prefix and sort them into various |
- # categories (functions, strings, error functions, and ignore characters) |
- |
- def get_rules(self): |
- tsymbols = [f for f in self.ldict if f[:2] == 't_' ] |
- |
- # Now build up a list of functions and a list of strings |
- |
- self.toknames = { } # Mapping of symbols to token names |
- self.funcsym = { } # Symbols defined as functions |
- self.strsym = { } # Symbols defined as strings |
- self.ignore = { } # Ignore strings by state |
- self.errorf = { } # Error functions by state |
- |
- for s in self.stateinfo: |
- self.funcsym[s] = [] |
- self.strsym[s] = [] |
- |
- if len(tsymbols) == 0: |
- self.log.error("No rules of the form t_rulename are defined") |
- self.error = 1 |
- return |
- |
- for f in tsymbols: |
- t = self.ldict[f] |
- states, tokname = _statetoken(f,self.stateinfo) |
- self.toknames[f] = tokname |
- |
- if hasattr(t,"__call__"): |
- if tokname == 'error': |
- for s in states: |
- self.errorf[s] = t |
- elif tokname == 'ignore': |
- line = func_code(t).co_firstlineno |
- file = func_code(t).co_filename |
- self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) |
- self.error = 1 |
- else: |
- for s in states: |
- self.funcsym[s].append((f,t)) |
- elif isinstance(t, StringTypes): |
- if tokname == 'ignore': |
- for s in states: |
- self.ignore[s] = t |
- if "\\" in t: |
- self.log.warning("%s contains a literal backslash '\\'",f) |
- |
- elif tokname == 'error': |
- self.log.error("Rule '%s' must be defined as a function", f) |
- self.error = 1 |
- else: |
- for s in states: |
- self.strsym[s].append((f,t)) |
- else: |
- self.log.error("%s not defined as a function or string", f) |
- self.error = 1 |
- |
- # Sort the functions by line number |
- for f in self.funcsym.values(): |
- if sys.version_info[0] < 3: |
- f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) |
- else: |
- # Python 3.0 |
- f.sort(key=lambda x: func_code(x[1]).co_firstlineno) |
- |
- # Sort the strings by regular expression length |
- for s in self.strsym.values(): |
- if sys.version_info[0] < 3: |
- s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) |
- else: |
- # Python 3.0 |
- s.sort(key=lambda x: len(x[1]),reverse=True) |
- |
- # Validate all of the t_rules collected |
- def validate_rules(self): |
- for state in self.stateinfo: |
- # Validate all rules defined by functions |
- |
- |
- |
- for fname, f in self.funcsym[state]: |
- line = func_code(f).co_firstlineno |
- file = func_code(f).co_filename |
- self.files[file] = 1 |
- |
- tokname = self.toknames[fname] |
- if isinstance(f, types.MethodType): |
- reqargs = 2 |
- else: |
- reqargs = 1 |
- nargs = func_code(f).co_argcount |
- if nargs > reqargs: |
- self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) |
- self.error = 1 |
- continue |
- |
- if nargs < reqargs: |
- self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) |
- self.error = 1 |
- continue |
- |
- if not f.__doc__: |
- self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) |
- self.error = 1 |
- continue |
- |
- try: |
- c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) |
- if c.match(""): |
- self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) |
- self.error = 1 |
- except re.error: |
- _etype, e, _etrace = sys.exc_info() |
- self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) |
- if '#' in f.__doc__: |
- self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) |
- self.error = 1 |
- |
- # Validate all rules defined by strings |
- for name,r in self.strsym[state]: |
- tokname = self.toknames[name] |
- if tokname == 'error': |
- self.log.error("Rule '%s' must be defined as a function", name) |
- self.error = 1 |
- continue |
- |
- if not tokname in self.tokens and tokname.find("ignore_") < 0: |
- self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) |
- self.error = 1 |
- continue |
- |
- try: |
- c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) |
- if (c.match("")): |
- self.log.error("Regular expression for rule '%s' matches empty string",name) |
- self.error = 1 |
- except re.error: |
- _etype, e, _etrace = sys.exc_info() |
- self.log.error("Invalid regular expression for rule '%s'. %s",name,e) |
- if '#' in r: |
- self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) |
- self.error = 1 |
- |
- if not self.funcsym[state] and not self.strsym[state]: |
- self.log.error("No rules defined for state '%s'",state) |
- self.error = 1 |
- |
- # Validate the error function |
- efunc = self.errorf.get(state,None) |
- if efunc: |
- f = efunc |
- line = func_code(f).co_firstlineno |
- file = func_code(f).co_filename |
- self.files[file] = 1 |
- |
- if isinstance(f, types.MethodType): |
- reqargs = 2 |
- else: |
- reqargs = 1 |
- nargs = func_code(f).co_argcount |
- if nargs > reqargs: |
- self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) |
- self.error = 1 |
- |
- if nargs < reqargs: |
- self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) |
- self.error = 1 |
- |
- for f in self.files: |
- self.validate_file(f) |
- |
- |
- # ----------------------------------------------------------------------------- |
- # validate_file() |
- # |
- # This checks to see if there are duplicated t_rulename() functions or strings |
- # in the parser input file. This is done using a simple regular expression |
- # match on each line in the given file. |
- # ----------------------------------------------------------------------------- |
- |
- def validate_file(self,filename): |
- import os.path |
- base,ext = os.path.splitext(filename) |
- if ext != '.py': return # No idea what the file is. Return OK |
- |
- try: |
- f = open(filename) |
- lines = f.readlines() |
- f.close() |
- except IOError: |
- return # Couldn't find the file. Don't worry about it |
- |
- fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') |
- sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') |
- |
- counthash = { } |
- linen = 1 |
- for l in lines: |
- m = fre.match(l) |
- if not m: |
- m = sre.match(l) |
- if m: |
- name = m.group(1) |
- prev = counthash.get(name) |
- if not prev: |
- counthash[name] = linen |
- else: |
- self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) |
- self.error = 1 |
- linen += 1 |
- |
-# ----------------------------------------------------------------------------- |
-# lex(module) |
-# |
-# Build all of the regular expression rules from definitions in the supplied module |
-# ----------------------------------------------------------------------------- |
-def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): |
- global lexer |
- ldict = None |
- stateinfo = { 'INITIAL' : 'inclusive'} |
- lexobj = Lexer() |
- lexobj.lexoptimize = optimize |
- global token,input |
- |
- if errorlog is None: |
- errorlog = PlyLogger(sys.stderr) |
- |
- if debug: |
- if debuglog is None: |
- debuglog = PlyLogger(sys.stderr) |
- |
- # Get the module dictionary used for the lexer |
- if object: module = object |
- |
- if module: |
- _items = [(k,getattr(module,k)) for k in dir(module)] |
- ldict = dict(_items) |
- else: |
- ldict = get_caller_module_dict(2) |
- |
- # Collect parser information from the dictionary |
- linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) |
- linfo.get_all() |
- if not optimize: |
- if linfo.validate_all(): |
- raise SyntaxError("Can't build lexer") |
- |
- if optimize and lextab: |
- try: |
- lexobj.readtab(lextab,ldict) |
- token = lexobj.token |
- input = lexobj.input |
- lexer = lexobj |
- return lexobj |
- |
- except ImportError: |
- pass |
- |
- # Dump some basic debugging information |
- if debug: |
- debuglog.info("lex: tokens = %r", linfo.tokens) |
- debuglog.info("lex: literals = %r", linfo.literals) |
- debuglog.info("lex: states = %r", linfo.stateinfo) |
- |
- # Build a dictionary of valid token names |
- lexobj.lextokens = { } |
- for n in linfo.tokens: |
- lexobj.lextokens[n] = 1 |
- |
- # Get literals specification |
- if isinstance(linfo.literals,(list,tuple)): |
- lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) |
- else: |
- lexobj.lexliterals = linfo.literals |
- |
- # Get the stateinfo dictionary |
- stateinfo = linfo.stateinfo |
- |
- regexs = { } |
- # Build the master regular expressions |
- for state in stateinfo: |
- regex_list = [] |
- |
- # Add rules defined by functions first |
- for fname, f in linfo.funcsym[state]: |
- line = func_code(f).co_firstlineno |
- file = func_code(f).co_filename |
- regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) |
- if debug: |
- debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) |
- |
- # Now add all of the simple rules |
- for name,r in linfo.strsym[state]: |
- regex_list.append("(?P<%s>%s)" % (name,r)) |
- if debug: |
- debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) |
- |
- regexs[state] = regex_list |
- |
- # Build the master regular expressions |
- |
- if debug: |
- debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") |
- |
- for state in regexs: |
- lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) |
- lexobj.lexstatere[state] = lexre |
- lexobj.lexstateretext[state] = re_text |
- lexobj.lexstaterenames[state] = re_names |
- if debug: |
- for i in range(len(re_text)): |
- debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) |
- |
- # For inclusive states, we need to add the regular expressions from the INITIAL state |
- for state,stype in stateinfo.items(): |
- if state != "INITIAL" and stype == 'inclusive': |
- lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) |
- lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) |
- lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) |
- |
- lexobj.lexstateinfo = stateinfo |
- lexobj.lexre = lexobj.lexstatere["INITIAL"] |
- lexobj.lexretext = lexobj.lexstateretext["INITIAL"] |
- lexobj.lexreflags = reflags |
- |
- # Set up ignore variables |
- lexobj.lexstateignore = linfo.ignore |
- lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") |
- |
- # Set up error functions |
- lexobj.lexstateerrorf = linfo.errorf |
- lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) |
- if not lexobj.lexerrorf: |
- errorlog.warning("No t_error rule is defined") |
- |
- # Check state information for ignore and error rules |
- for s,stype in stateinfo.items(): |
- if stype == 'exclusive': |
- if not s in linfo.errorf: |
- errorlog.warning("No error rule is defined for exclusive state '%s'", s) |
- if not s in linfo.ignore and lexobj.lexignore: |
- errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) |
- elif stype == 'inclusive': |
- if not s in linfo.errorf: |
- linfo.errorf[s] = linfo.errorf.get("INITIAL",None) |
- if not s in linfo.ignore: |
- linfo.ignore[s] = linfo.ignore.get("INITIAL","") |
- |
- # Create global versions of the token() and input() functions |
- token = lexobj.token |
- input = lexobj.input |
- lexer = lexobj |
- |
- # If in optimize mode, we write the lextab |
- if lextab and optimize: |
- lexobj.writetab(lextab,outputdir) |
- |
- return lexobj |
- |
-# ----------------------------------------------------------------------------- |
-# runmain() |
-# |
-# This runs the lexer as a main program |
-# ----------------------------------------------------------------------------- |
- |
-def runmain(lexer=None,data=None): |
- if not data: |
- try: |
- filename = sys.argv[1] |
- f = open(filename) |
- data = f.read() |
- f.close() |
- except IndexError: |
- sys.stdout.write("Reading from standard input (type EOF to end):\n") |
- data = sys.stdin.read() |
- |
- if lexer: |
- _input = lexer.input |
- else: |
- _input = input |
- _input(data) |
- if lexer: |
- _token = lexer.token |
- else: |
- _token = token |
- |
- while 1: |
- tok = _token() |
- if not tok: break |
- sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) |
- |
-# ----------------------------------------------------------------------------- |
-# @TOKEN(regex) |
-# |
-# This decorator function can be used to set the regex expression on a function |
-# when its docstring might need to be set in an alternative way |
-# ----------------------------------------------------------------------------- |
- |
-def TOKEN(r): |
- def set_doc(f): |
- if hasattr(r,"__call__"): |
- f.__doc__ = r.__doc__ |
- else: |
- f.__doc__ = r |
- return f |
- return set_doc |
- |
-# Alternative spelling of the TOKEN decorator |
-Token = TOKEN |
- |