third_party/google-endpoints/pyparsing.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/pyparsing.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/google-endpoints/pylru-1.0.9-py2.7.egg-info/top_level.txt ('k') | third_party/google-endpoints/pyparsing-2.1.10.dist-info/DESCRIPTION.rst » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # module pyparsing.py

	2 #

	3 # Copyright (c) 2003-2016 Paul T. McGuire

	4 #

	5 # Permission is hereby granted, free of charge, to any person obtaining

	6 # a copy of this software and associated documentation files (the

	7 # "Software"), to deal in the Software without restriction, including

	8 # without limitation the rights to use, copy, modify, merge, publish,

	9 # distribute, sublicense, and/or sell copies of the Software, and to

	10 # permit persons to whom the Software is furnished to do so, subject to

	11 # the following conditions:

	12 #

	13 # The above copyright notice and this permission notice shall be

	14 # included in all copies or substantial portions of the Software.

	15 #

	16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

	17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

	18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

	19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

	20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

	21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

	22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	23 #

	24

	25 __doc__ = \

	26 """

	27 pyparsing module - Classes and methods to define and execute parsing grammars

	28

	29 The pyparsing module is an alternative approach to creating and executing simple grammars,

	30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you

	31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module

	32 provides a library of classes that you use to construct the grammar directly in Python.

	33

	34 Here is a program to parse "Hello, World!" (or any greeting of the form

	35 C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements

	36 (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are au to-converted to

	37 L{Literal} expressions)::

	38

	39 from pyparsing import Word, alphas

	40

	41 # define grammar of a greeting

	42 greet = Word(alphas) + "," + Word(alphas) + "!"

	43

	44 hello = "Hello, World!"

	45 print (hello, "->", greet.parseString(hello))

	46

	47 The program outputs the following::

	48

	49 Hello, World! -> ['Hello', ',', 'World', '!']

	50

	51 The Python representation of the grammar is quite readable, owing to the self-ex planatory

	52 class names, and the use of '+', '\|' and '^' operators.

	53

	54 The L{ParseResults} object returned from L{ParserElement.parseString<ParserEleme nt.parseString>} can be accessed as a nested list, a dictionary, or an

	55 object with named attributes.

	56

	57 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:

	58 - extra or missing whitespace (the above program will also handle "Hello,World! ", "Hello , World !", etc.)

	59 - quoted strings

	60 - embedded comments

	61 """

	62

	63 __version__ = "2.1.10"

	64 __versionTime__ = "07 Oct 2016 01:31 UTC"

	65 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

	66

	67 import string

	68 from weakref import ref as wkref

	69 import copy

	70 import sys

	71 import warnings

	72 import re

	73 import sre_constants

	74 import collections

	75 import pprint

	76 import traceback

	77 import types

	78 from datetime import datetime

	79

	80 try:

	81 from _thread import RLock

	82 except ImportError:

	83 from threading import RLock

	84

	85 try:

	86 from collections import OrderedDict as _OrderedDict

	87 except ImportError:

	88 try:

	89 from ordereddict import OrderedDict as _OrderedDict

	90 except ImportError:

	91 _OrderedDict = None

	92

	93 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__ ,__versionTime__ ) )

	94

	95 __all__ = [

	96 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'E ach', 'Empty',

	97 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart ', 'Literal',

	98 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',

	99 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression' , 'ParseFatalException',

	100 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'Recurs iveGrammarException',

	101 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConver ter',

	102 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',

	103 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment ', 'col',

	104 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'db lQuotedString',

	105 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnum s',

	106 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',

	107 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPrev iousLiteral',

	108 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence ', 'printables',

	109 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEn tity',

	110 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',

	111 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribu te',

	112 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'w ithClass',

	113 'CloseMatch', 'tokenMap', 'pyparsing_common',

	114 ]

	115

	116 system_version = tuple(sys.version_info)[:3]

	117 PY_3 = system_version[0] == 3

	118 if PY_3:

	119 _MAX_INT = sys.maxsize

	120 basestring = str

	121 unichr = chr

	122 _ustr = str

	123

	124 # build list of single arg builtins, that can be used as parse actions

	125 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]

	126

	127 else:

	128 _MAX_INT = sys.maxint

	129 range = xrange

	130

	131 def _ustr(obj):

	132 """Drop-in replacement for str(obj) that tries to be Unicode friendly. I t first tries

	133 str(obj). If that fails with a UnicodeEncodeError, then it tries unic ode(obj). It

	134 then < returns the unicode object \| encodes it with the default encod ing \| ... >.

	135 """

	136 if isinstance(obj,unicode):

	137 return obj

	138

	139 try:

	140 # If this works, then _ustr(obj) has the same behaviour as str(obj), so

	141 # it won't break any existing code.

	142 return str(obj)

	143

	144 except UnicodeEncodeError:

	145 # Else encode it

	146 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefrepla ce')

	147 xmlcharref = Regex('&#\d+;')

	148 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:] )

	149 return xmlcharref.transformString(ret)

	150

	151 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions

	152 singleArgBuiltins = []

	153 import __builtin__

	154 for fname in "sum len sorted reversed list tuple set any all min max".split( ):

	155 try:

	156 singleArgBuiltins.append(getattr(__builtin__,fname))

	157 except AttributeError:

	158 continue

	159

	160 _generatorType = type((y for y in range(1)))

	161

	162 def _xml_escape(data):

	163 """Escape &, <, >, ", ', etc. in a string of data."""

	164

	165 # ampersand must be replaced first

	166 from_symbols = '&><"\''

	167 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())

	168 for from_,to_ in zip(from_symbols, to_symbols):

	169 data = data.replace(from_, to_)

	170 return data

	171

	172 class _Constants(object):

	173 pass

	174

	175 alphas = string.ascii_uppercase + string.ascii_lowercase

	176 nums = "0123456789"

	177 hexnums = nums + "ABCDEFabcdef"

	178 alphanums = alphas + nums

	179 _bslash = chr(92)

	180 printables = "".join(c for c in string.printable if c not in string.whitespace)

	181

	182 class ParseBaseException(Exception):

	183 """base exception class for all parsing runtime exceptions"""

	184 # Performance tuning: we construct a lot of these, so keep this

	185 # constructor as small and fast as possible

	186 def __init__( self, pstr, loc=0, msg=None, elem=None ):

	187 self.loc = loc

	188 if msg is None:

	189 self.msg = pstr

	190 self.pstr = ""

	191 else:

	192 self.msg = msg

	193 self.pstr = pstr

	194 self.parserElement = elem

	195 self.args = (pstr, loc, msg)

	196

	197 @classmethod

	198 def _from_exception(cls, pe):

	199 """

	200 internal factory method to simplify creating one type of ParseException

	201 from another - avoids having __init__ signature conflicts among subclass es

	202 """

	203 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)

	204

	205 def __getattr__( self, aname ):

	206 """supported attributes by name are:

	207 - lineno - returns the line number of the exception text

	208 - col - returns the column number of the exception text

	209 - line - returns the line containing the exception text

	210 """

	211 if( aname == "lineno" ):

	212 return lineno( self.loc, self.pstr )

	213 elif( aname in ("col", "column") ):

	214 return col( self.loc, self.pstr )

	215 elif( aname == "line" ):

	216 return line( self.loc, self.pstr )

	217 else:

	218 raise AttributeError(aname)

	219

	220 def __str__( self ):

	221 return "%s (at char %d), (line:%d, col:%d)" % \

	222 ( self.msg, self.loc, self.lineno, self.column )

	223 def __repr__( self ):

	224 return _ustr(self)

	225 def markInputline( self, markerString = ">!<" ):

	226 """Extracts the exception line from the input string, and marks

	227 the location of the exception with a special symbol.

	228 """

	229 line_str = self.line

	230 line_column = self.column - 1

	231 if markerString:

	232 line_str = "".join((line_str[:line_column],

	233 markerString, line_str[line_column:]))

	234 return line_str.strip()

	235 def __dir__(self):

	236 return "lineno col line".split() + dir(type(self))

	237

	238 class ParseException(ParseBaseException):

	239 """

	240 Exception thrown when parse expressions don't match class;

	241 supported attributes by name are:

	242 - lineno - returns the line number of the exception text

	243 - col - returns the column number of the exception text

	244 - line - returns the line containing the exception text

	245

	246 Example::

	247 try:

	248 Word(nums).setName("integer").parseString("ABC")

	249 except ParseException as pe:

	250 print(pe)

	251 print("column: {}".format(pe.col))

	252

	253 prints::

	254 Expected integer (at char 0), (line:1, col:1)

	255 column: 1

	256 """

	257 pass

	258

	259 class ParseFatalException(ParseBaseException):

	260 """user-throwable exception thrown when inconsistent parse content

	261 is found; stops all parsing immediately"""

	262 pass

	263

	264 class ParseSyntaxException(ParseFatalException):

	265 """just like L{ParseFatalException}, but thrown internally when an

	266 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop

	267 immediately because an unbacktrackable syntax error has been found"""

	268 pass

	269

	270 #~ class ReparseException(ParseBaseException):

	271 #~ """Experimental class - parse actions can raise this exception to cause

	272 #~ pyparsing to reparse the input string:

	273 #~ - with a modified input string, and/or

	274 #~ - with a modified start location

	275 #~ Set the values of the ReparseException in the constructor, and raise t he

	276 #~ exception in a parse action to cause pyparsing to use the new string/l ocation.

	277 #~ Setting the values as None causes no change to be made.

	278 #~ """

	279 #~ def __init_( self, newstring, restartLoc ):

	280 #~ self.newParseText = newstring

	281 #~ self.reparseLoc = restartLoc

	282

	283 class RecursiveGrammarException(Exception):

	284 """exception thrown by L{ParserElement.validate} if the grammar could be imp roperly recursive"""

	285 def __init__( self, parseElementList ):

	286 self.parseElementTrace = parseElementList

	287

	288 def __str__( self ):

	289 return "RecursiveGrammarException: %s" % self.parseElementTrace

	290

	291 class _ParseResultsWithOffset(object):

	292 def __init__(self,p1,p2):

	293 self.tup = (p1,p2)

	294 def __getitem__(self,i):

	295 return self.tup[i]

	296 def __repr__(self):

	297 return repr(self.tup[0])

	298 def setOffset(self,i):

	299 self.tup = (self.tup[0],i)

	300

	301 class ParseResults(object):

	302 """

	303 Structured parse results, to provide multiple means of access to the parsed data:

	304 - as a list (C{len(results)})

	305 - by list index (C{results[0], results[1]}, etc.)

	306 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResults Name})

	307

	308 Example::

	309 integer = Word(nums)

	310 date_str = (integer.setResultsName("year") + '/'

	311 + integer.setResultsName("month") + '/'

	312 + integer.setResultsName("day"))

	313 # equivalent form:

	314 # date_str = integer("year") + '/' + integer("month") + '/' + integer("d ay")

	315

	316 # parseString returns a ParseResults object

	317 result = date_str.parseString("1999/12/31")

	318

	319 def test(s, fn=repr):

	320 print("%s -> %s" % (s, fn(eval(s))))

	321 test("list(result)")

	322 test("result[0]")

	323 test("result['month']")

	324 test("result.day")

	325 test("'month' in result")

	326 test("'minutes' in result")

	327 test("result.dump()", str)

	328 prints::

	329 list(result) -> ['1999', '/', '12', '/', '31']

	330 result[0] -> '1999'

	331 result['month'] -> '12'

	332 result.day -> '31'

	333 'month' in result -> True

	334 'minutes' in result -> False

	335 result.dump() -> ['1999', '/', '12', '/', '31']

	336 - day: 31

	337 - month: 12

	338 - year: 1999

	339 """

	340 def __new__(cls, toklist=None, name=None, asList=True, modal=True ):

	341 if isinstance(toklist, cls):

	342 return toklist

	343 retobj = object.__new__(cls)

	344 retobj.__doinit = True

	345 return retobj

	346

	347 # Performance tuning: we construct a lot of these, so keep this

	348 # constructor as small and fast as possible

	349 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinst ance=isinstance ):

	350 if self.__doinit:

	351 self.__doinit = False

	352 self.__name = None

	353 self.__parent = None

	354 self.__accumNames = {}

	355 self.__asList = asList

	356 self.__modal = modal

	357 if toklist is None:

	358 toklist = []

	359 if isinstance(toklist, list):

	360 self.__toklist = toklist[:]

	361 elif isinstance(toklist, _generatorType):

	362 self.__toklist = list(toklist)

	363 else:

	364 self.__toklist = [toklist]

	365 self.__tokdict = dict()

	366

	367 if name is not None and name:

	368 if not modal:

	369 self.__accumNames[name] = 0

	370 if isinstance(name,int):

	371 name = _ustr(name) # will always return a str, but use _ustr for consistency

	372 self.__name = name

	373 if not (isinstance(toklist, (type(None), basestring, list)) and tokl ist in (None,'',[])):

	374 if isinstance(toklist,basestring):

	375 toklist = [ toklist ]

	376 if asList:

	377 if isinstance(toklist,ParseResults):

	378 self[name] = _ParseResultsWithOffset(toklist.copy(),0)

	379 else:

	380 self[name] = _ParseResultsWithOffset(ParseResults(toklis t[0]),0)

	381 self[name].__name = name

	382 else:

	383 try:

	384 self[name] = toklist[0]

	385 except (KeyError,TypeError,IndexError):

	386 self[name] = toklist

	387

	388 def __getitem__( self, i ):

	389 if isinstance( i, (int,slice) ):

	390 return self.__toklist[i]

	391 else:

	392 if i not in self.__accumNames:

	393 return self.__tokdict[i][-1][0]

	394 else:

	395 return ParseResults([ v[0] for v in self.__tokdict[i] ])

	396

	397 def __setitem__( self, k, v, isinstance=isinstance ):

	398 if isinstance(v,_ParseResultsWithOffset):

	399 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]

	400 sub = v[0]

	401 elif isinstance(k,(int,slice)):

	402 self.__toklist[k] = v

	403 sub = v

	404 else:

	405 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWit hOffset(v,0)]

	406 sub = v

	407 if isinstance(sub,ParseResults):

	408 sub.__parent = wkref(self)

	409

	410 def __delitem__( self, i ):

	411 if isinstance(i,(int,slice)):

	412 mylen = len( self.__toklist )

	413 del self.__toklist[i]

	414

	415 # convert int to slice

	416 if isinstance(i, int):

	417 if i < 0:

	418 i += mylen

	419 i = slice(i, i+1)

	420 # get removed indices

	421 removed = list(range(*i.indices(mylen)))

	422 removed.reverse()

	423 # fixup indices in token dictionary

	424 for name,occurrences in self.__tokdict.items():

	425 for j in removed:

	426 for k, (value, position) in enumerate(occurrences):

	427 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))

	428 else:

	429 del self.__tokdict[i]

	430

	431 def __contains__( self, k ):

	432 return k in self.__tokdict

	433

	434 def __len__( self ): return len( self.__toklist )

	435 def __bool__(self): return ( not not self.__toklist )

	436 __nonzero__ = __bool__

	437 def __iter__( self ): return iter( self.__toklist )

	438 def __reversed__( self ): return iter( self.__toklist[::-1] )

	439 def _iterkeys( self ):

	440 if hasattr(self.__tokdict, "iterkeys"):

	441 return self.__tokdict.iterkeys()

	442 else:

	443 return iter(self.__tokdict)

	444

	445 def _itervalues( self ):

	446 return (self[k] for k in self._iterkeys())

	447

	448 def _iteritems( self ):

	449 return ((k, self[k]) for k in self._iterkeys())

	450

	451 if PY_3:

	452 keys = _iterkeys

	453 """Returns an iterator of all named result keys (Python 3.x only)."""

	454

	455 values = _itervalues

	456 """Returns an iterator of all named result values (Python 3.x only)."""

	457

	458 items = _iteritems

	459 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""

	460

	461 else:

	462 iterkeys = _iterkeys

	463 """Returns an iterator of all named result keys (Python 2.x only)."""

	464

	465 itervalues = _itervalues

	466 """Returns an iterator of all named result values (Python 2.x only)."""

	467

	468 iteritems = _iteritems

	469 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""

	470

	471 def keys( self ):

	472 """Returns all named result keys (as a list in Python 2.x, as an ite rator in Python 3.x)."""

	473 return list(self.iterkeys())

	474

	475 def values( self ):

	476 """Returns all named result values (as a list in Python 2.x, as an i terator in Python 3.x)."""

	477 return list(self.itervalues())

	478

	479 def items( self ):

	480 """Returns all named result key-values (as a list of tuples in Pytho n 2.x, as an iterator in Python 3.x)."""

	481 return list(self.iteritems())

	482

	483 def haskeys( self ):

	484 """Since keys() returns an iterator, this method is helpful in bypassing

	485 code that looks for the existence of any defined results names."""

	486 return bool(self.__tokdict)

	487

	488 def pop( self, args, *kwargs):

	489 """

	490 Removes and returns item at specified index (default=C{last}).

	491 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no

	492 argument or an integer argument, it will use C{list} semantics

	493 and pop tokens from the list of parsed tokens. If passed a

	494 non-integer argument (most likely a string), it will use C{dict}

	495 semantics and pop the corresponding value from any defined

	496 results names. A second default return value argument is

	497 supported, just as in C{dict.pop()}.

	498

	499 Example::

	500 def remove_first(tokens):

	501 tokens.pop(0)

	502 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12 3', '321']

	503 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString ("0 123 321")) # -> ['123', '321']

	504

	505 label = Word(alphas)

	506 patt = label("LABEL") + OneOrMore(Word(nums))

	507 print(patt.parseString("AAB 123 321").dump())

	508

	509 # Use pop() in a parse action to remove named result (note that corr esponding value is not

	510 # removed from list form of results)

	511 def remove_LABEL(tokens):

	512 tokens.pop("LABEL")

	513 return tokens

	514 patt.addParseAction(remove_LABEL)

	515 print(patt.parseString("AAB 123 321").dump())

	516 prints::

	517 ['AAB', '123', '321']

	518 - LABEL: AAB

	519

	520 ['AAB', '123', '321']

	521 """

	522 if not args:

	523 args = [-1]

	524 for k,v in kwargs.items():

	525 if k == 'default':

	526 args = (args[0], v)

	527 else:

	528 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)

	529 if (isinstance(args[0], int) or

	530 len(args) == 1 or

	531 args[0] in self):

	532 index = args[0]

	533 ret = self[index]

	534 del self[index]

	535 return ret

	536 else:

	537 defaultvalue = args[1]

	538 return defaultvalue

	539

	540 def get(self, key, defaultValue=None):

	541 """

	542 Returns named result matching the given key, or if there is no

	543 such name, then returns the given C{defaultValue} or C{None} if no

	544 C{defaultValue} is specified.

	545

	546 Similar to C{dict.get()}.

	547

	548 Example::

	549 integer = Word(nums)

	550 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	551

	552 result = date_str.parseString("1999/12/31")

	553 print(result.get("year")) # -> '1999'

	554 print(result.get("hour", "not specified")) # -> 'not specified'

	555 print(result.get("hour")) # -> None

	556 """

	557 if key in self:

	558 return self[key]

	559 else:

	560 return defaultValue

	561

	562 def insert( self, index, insStr ):

	563 """

	564 Inserts new element at location index in the list of parsed tokens.

	565

	566 Similar to C{list.insert()}.

	567

	568 Example::

	569 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12 3', '321']

	570

	571 # use a parse action to insert the parse location in the front of th e parsed results

	572 def insert_locn(locn, tokens):

	573 tokens.insert(0, locn)

	574 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString( "0 123 321")) # -> [0, '0', '123', '321']

	575 """

	576 self.__toklist.insert(index, insStr)

	577 # fixup indices in token dictionary

	578 for name,occurrences in self.__tokdict.items():

	579 for k, (value, position) in enumerate(occurrences):

	580 occurrences[k] = _ParseResultsWithOffset(value, position + (posi tion > index))

	581

	582 def append( self, item ):

	583 """

	584 Add single element to end of ParseResults list of elements.

	585

	586 Example::

	587 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12 3', '321']

	588

	589 # use a parse action to compute the sum of the parsed integers, and add it to the end

	590 def append_sum(tokens):

	591 tokens.append(sum(map(int, tokens)))

	592 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString(" 0 123 321")) # -> ['0', '123', '321', 444]

	593 """

	594 self.__toklist.append(item)

	595

	596 def extend( self, itemseq ):

	597 """

	598 Add sequence of elements to end of ParseResults list of elements.

	599

	600 Example::

	601 patt = OneOrMore(Word(alphas))

	602

	603 # use a parse action to append the reverse of the matched strings, t o make a palindrome

	604 def make_palindrome(tokens):

	605 tokens.extend(reversed([t[::-1] for t in tokens]))

	606 return ''.join(tokens)

	607 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'

	608 """

	609 if isinstance(itemseq, ParseResults):

	610 self += itemseq

	611 else:

	612 self.__toklist.extend(itemseq)

	613

	614 def clear( self ):

	615 """

	616 Clear all elements and results names.

	617 """

	618 del self.__toklist[:]

	619 self.__tokdict.clear()

	620

	621 def __getattr__( self, name ):

	622 try:

	623 return self[name]

	624 except KeyError:

	625 return ""

	626

	627 if name in self.__tokdict:

	628 if name not in self.__accumNames:

	629 return self.__tokdict[name][-1][0]

	630 else:

	631 return ParseResults([ v[0] for v in self.__tokdict[name] ])

	632 else:

	633 return ""

	634

	635 def __add__( self, other ):

	636 ret = self.copy()

	637 ret += other

	638 return ret

	639

	640 def __iadd__( self, other ):

	641 if other.__tokdict:

	642 offset = len(self.__toklist)

	643 addoffset = lambda a: offset if a<0 else a+offset

	644 otheritems = other.__tokdict.items()

	645 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )

	646 for (k,vlist) in otheritems for v in vlist]

	647 for k,v in otherdictitems:

	648 self[k] = v

	649 if isinstance(v[0],ParseResults):

	650 v[0].__parent = wkref(self)

	651

	652 self.__toklist += other.__toklist

	653 self.__accumNames.update( other.__accumNames )

	654 return self

	655

	656 def __radd__(self, other):

	657 if isinstance(other,int) and other == 0:

	658 # useful for merging many ParseResults using sum() builtin

	659 return self.copy()

	660 else:

	661 # this may raise a TypeError - so be it

	662 return other + self

	663

	664 def __repr__( self ):

	665 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )

	666

	667 def __str__( self ):

	668 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr (i) for i in self.__toklist) + ']'

	669

	670 def _asStringList( self, sep='' ):

	671 out = []

	672 for item in self.__toklist:

	673 if out and sep:

	674 out.append(sep)

	675 if isinstance( item, ParseResults ):

	676 out += item._asStringList()

	677 else:

	678 out.append( _ustr(item) )

	679 return out

	680

	681 def asList( self ):

	682 """

	683 Returns the parse results as a nested list of matching tokens, all conve rted to strings.

	684

	685 Example::

	686 patt = OneOrMore(Word(alphas))

	687 result = patt.parseString("sldkj lsdkj sldkj")

	688 # even though the result prints in string-like form, it is actually a pyparsing ParseResults

	689 print(type(result), result) # -> <class 'pyparsing.ParseResults'> [' sldkj', 'lsdkj', 'sldkj']

	690

	691 # Use asList() to create an actual list

	692 result_list = result.asList()

	693 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']

	694 """

	695 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]

	696

	697 def asDict( self ):

	698 """

	699 Returns the named parse results as a nested dictionary.

	700

	701 Example::

	702 integer = Word(nums)

	703 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	704

	705 result = date_str.parseString('12/31/1999')

	706 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResult s'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})

	707

	708 result_dict = result.asDict()

	709 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'da y': '1999', 'year': '12', 'month': '31'}

	710

	711 # even though a ParseResults supports dict-like access, sometime you just need to have a dict

	712 import json

	713 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable

	714 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999 ", "year": "12"}

	715 """

	716 if PY_3:

	717 item_fn = self.items

	718 else:

	719 item_fn = self.iteritems

	720

	721 def toItem(obj):

	722 if isinstance(obj, ParseResults):

	723 if obj.haskeys():

	724 return obj.asDict()

	725 else:

	726 return [toItem(v) for v in obj]

	727 else:

	728 return obj

	729

	730 return dict((k,toItem(v)) for k,v in item_fn())

	731

	732 def copy( self ):

	733 """

	734 Returns a new copy of a C{ParseResults} object.

	735 """

	736 ret = ParseResults( self.__toklist )

	737 ret.__tokdict = self.__tokdict.copy()

	738 ret.__parent = self.__parent

	739 ret.__accumNames.update( self.__accumNames )

	740 ret.__name = self.__name

	741 return ret

	742

	743 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=Tru e ):

	744 """

	745 (Deprecated) Returns the parse results as XML. Tags are created for toke ns and lists that have defined results names.

	746 """

	747 nl = "\n"

	748 out = []

	749 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()

	750 for v in vlist)

	751 nextLevelIndent = indent + " "

	752

	753 # collapse out indents if formatting is not desired

	754 if not formatted:

	755 indent = ""

	756 nextLevelIndent = ""

	757 nl = ""

	758

	759 selfTag = None

	760 if doctag is not None:

	761 selfTag = doctag

	762 else:

	763 if self.__name:

	764 selfTag = self.__name

	765

	766 if not selfTag:

	767 if namedItemsOnly:

	768 return ""

	769 else:

	770 selfTag = "ITEM"

	771

	772 out += [ nl, indent, "<", selfTag, ">" ]

	773

	774 for i,res in enumerate(self.__toklist):

	775 if isinstance(res,ParseResults):

	776 if i in namedItems:

	777 out += [ res.asXML(namedItems[i],

	778 namedItemsOnly and doctag is None,

	779 nextLevelIndent,

	780 formatted)]

	781 else:

	782 out += [ res.asXML(None,

	783 namedItemsOnly and doctag is None,

	784 nextLevelIndent,

	785 formatted)]

	786 else:

	787 # individual token, see if there is a name for it

	788 resTag = None

	789 if i in namedItems:

	790 resTag = namedItems[i]

	791 if not resTag:

	792 if namedItemsOnly:

	793 continue

	794 else:

	795 resTag = "ITEM"

	796 xmlBodyText = _xml_escape(_ustr(res))

	797 out += [ nl, nextLevelIndent, "<", resTag, ">",

	798 xmlBodyText,

	799 "</", resTag, ">" ]

	800

	801 out += [ nl, indent, "</", selfTag, ">" ]

	802 return "".join(out)

	803

	804 def __lookup(self,sub):

	805 for k,vlist in self.__tokdict.items():

	806 for v,loc in vlist:

	807 if sub is v:

	808 return k

	809 return None

	810

	811 def getName(self):

	812 """

	813 Returns the results name for this token expression. Useful when several

	814 different expressions might match at a particular location.

	815

	816 Example::

	817 integer = Word(nums)

	818 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")

	819 house_number_expr = Suppress('#') + Word(nums, alphanums)

	820 user_data = (Group(house_number_expr)("house_number")

	821 \| Group(ssn_expr)("ssn")

	822 \| Group(integer)("age"))

	823 user_info = OneOrMore(user_data)

	824

	825 result = user_info.parseString("22 111-22-3333 #221B")

	826 for item in result:

	827 print(item.getName(), ':', item[0])

	828 prints::

	829 age : 22

	830 ssn : 111-22-3333

	831 house_number : 221B

	832 """

	833 if self.__name:

	834 return self.__name

	835 elif self.__parent:

	836 par = self.__parent()

	837 if par:

	838 return par.__lookup(self)

	839 else:

	840 return None

	841 elif (len(self) == 1 and

	842 len(self.__tokdict) == 1 and

	843 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):

	844 return next(iter(self.__tokdict.keys()))

	845 else:

	846 return None

	847

	848 def dump(self, indent='', depth=0, full=True):

	849 """

	850 Diagnostic method for listing out the contents of a C{ParseResults}.

	851 Accepts an optional C{indent} argument so that this string can be embedd ed

	852 in a nested display of other data.

	853

	854 Example::

	855 integer = Word(nums)

	856 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	857

	858 result = date_str.parseString('12/31/1999')

	859 print(result.dump())

	860 prints::

	861 ['12', '/', '31', '/', '1999']

	862 - day: 1999

	863 - month: 31

	864 - year: 12

	865 """

	866 out = []

	867 NL = '\n'

	868 out.append( indent+_ustr(self.asList()) )

	869 if full:

	870 if self.haskeys():

	871 items = sorted((str(k), v) for k,v in self.items())

	872 for k,v in items:

	873 if out:

	874 out.append(NL)

	875 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )

	876 if isinstance(v,ParseResults):

	877 if v:

	878 out.append( v.dump(indent,depth+1) )

	879 else:

	880 out.append(_ustr(v))

	881 else:

	882 out.append(repr(v))

	883 elif any(isinstance(vv,ParseResults) for vv in self):

	884 v = self

	885 for i,vv in enumerate(v):

	886 if isinstance(vv,ParseResults):

	887 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '(depth) ),i,indent,(' '(depth+1)),vv.dump(indent,depth+1) ))

	888 else:

	889 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '(depth) ),i,indent,(' '(depth+1)),_ustr(vv)))

	890

	891 return "".join(out)

	892

	893 def pprint(self, args, *kwargs):

	894 """

	895 Pretty-printer for parsed results as a list, using the C{pprint} module.

	896 Accepts additional positional or keyword args as defined for the

	897 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html #pprint.pprint})

	898

	899 Example::

	900 ident = Word(alphas, alphanums)

	901 num = Word(nums)

	902 func = Forward()

	903 term = ident \| num \| Group('(' + func + ')')

	904 func <<= ident + Group(Optional(delimitedList(term)))

	905 result = func.parseString("fna a,b,(fnb c,d,200),100")

	906 result.pprint(width=40)

	907 prints::

	908 ['fna',

	909 ['a',

	910 'b',

	911 ['(', 'fnb', ['c', 'd', '200'], ')'],

	912 '100']]

	913 """

	914 pprint.pprint(self.asList(), args, *kwargs)

	915

	916 # add support for pickle protocol

	917 def __getstate__(self):

	918 return ( self.__toklist,

	919 ( self.__tokdict.copy(),

	920 self.__parent is not None and self.__parent() or None,

	921 self.__accumNames,

	922 self.__name ) )

	923

	924 def __setstate__(self,state):

	925 self.__toklist = state[0]

	926 (self.__tokdict,

	927 par,

	928 inAccumNames,

	929 self.__name) = state[1]

	930 self.__accumNames = {}

	931 self.__accumNames.update(inAccumNames)

	932 if par is not None:

	933 self.__parent = wkref(par)

	934 else:

	935 self.__parent = None

	936

	937 def __getnewargs__(self):

	938 return self.__toklist, self.__name, self.__asList, self.__modal

	939

	940 def __dir__(self):

	941 return (dir(type(self)) + list(self.keys()))

	942

	943 collections.MutableMapping.register(ParseResults)

	944

	945 def col (loc,strg):

	946 """Returns current column within a string, counting newlines as line separat ors.

	947 The first column is number 1.

	948

	949 Note: the default parsing behavior is to expand tabs in the input string

	950 before starting the parsing process. See L{I{ParserElement.parseString}<Pars erElement.parseString>} for more information

	951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

	952 consistent view of the parsed string, the parse location, and line and column

	953 positions within the parsed string.

	954 """

	955 s = strg

	956 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, lo c)

	957

	958 def lineno(loc,strg):

	959 """Returns current line number within a string, counting newlines as line se parators.

	960 The first line is number 1.

	961

	962 Note: the default parsing behavior is to expand tabs in the input string

	963 before starting the parsing process. See L{I{ParserElement.parseString}<Pars erElement.parseString>} for more information

	964 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

	965 consistent view of the parsed string, the parse location, and line and column

	966 positions within the parsed string.

	967 """

	968 return strg.count("\n",0,loc) + 1

	969

	970 def line( loc, strg ):

	971 """Returns the line of text containing loc within a string, counting newline s as line separators.

	972 """

	973 lastCR = strg.rfind("\n", 0, loc)

	974 nextCR = strg.find("\n", loc)

	975 if nextCR >= 0:

	976 return strg[lastCR+1:nextCR]

	977 else:

	978 return strg[lastCR+1:]

	979

	980 def _defaultStartDebugAction( instring, loc, expr ):

	981 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lin eno(loc,instring), col(loc,instring) )))

	982

	983 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):

	984 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))

	985

	986 def _defaultExceptionDebugAction( instring, loc, expr, exc ):

	987 print ("Exception raised:" + _ustr(exc))

	988

	989 def nullDebugAction(*args):

	990 """'Do-nothing' debug action, to suppress debugging output during parsing."" "

	991 pass

	992

	993 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs

	994 #~ 'decorator to trim function calls to match the arity of the target'

	995 #~ def _trim_arity(func, maxargs=3):

	996 #~ if func in singleArgBuiltins:

	997 #~ return lambda s,l,t: func(t)

	998 #~ limit = 0

	999 #~ foundArity = False

	1000 #~ def wrapper(*args):

	1001 #~ nonlocal limit,foundArity

	1002 #~ while 1:

	1003 #~ try:

	1004 #~ ret = func(*args[limit:])

	1005 #~ foundArity = True

	1006 #~ return ret

	1007 #~ except TypeError:

	1008 #~ if limit == maxargs or foundArity:

	1009 #~ raise

	1010 #~ limit += 1

	1011 #~ continue

	1012 #~ return wrapper

	1013

	1014 # this version is Python 2.x-3.x cross-compatible

	1015 'decorator to trim function calls to match the arity of the target'

	1016 def _trim_arity(func, maxargs=2):

	1017 if func in singleArgBuiltins:

	1018 return lambda s,l,t: func(t)

	1019 limit = [0]

	1020 foundArity = [False]

	1021

	1022 # traceback return data structure changed in Py3.5 - normalize back to plain tuples

	1023 if system_version[:2] >= (3,5):

	1024 def extract_stack(limit=0):

	1025 # special handling for Python 3.5.0 - extra deep call stack by 1

	1026 offset = -3 if system_version == (3,5,0) else -2

	1027 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offse t]

	1028 return [(frame_summary.filename, frame_summary.lineno)]

	1029 def extract_tb(tb, limit=0):

	1030 frames = traceback.extract_tb(tb, limit=limit)

	1031 frame_summary = frames[-1]

	1032 return [(frame_summary.filename, frame_summary.lineno)]

	1033 else:

	1034 extract_stack = traceback.extract_stack

	1035 extract_tb = traceback.extract_tb

	1036

	1037 # synthesize what would be returned by traceback.extract_stack at the call t o

	1038 # user's parse action 'func', so that we don't incur call penalty at parse t ime

	1039

	1040 LINE_DIFF = 6

	1041 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT L INE AND

	1042 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

	1043 this_line = extract_stack(limit=2)[-1]

	1044 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)

	1045

	1046 def wrapper(*args):

	1047 while 1:

	1048 try:

	1049 ret = func(*args[limit[0]:])

	1050 foundArity[0] = True

	1051 return ret

	1052 except TypeError:

	1053 # re-raise TypeErrors if they did not come from our arity testin g

	1054 if foundArity[0]:

	1055 raise

	1056 else:

	1057 try:

	1058 tb = sys.exc_info()[-1]

	1059 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_s ynth:

	1060 raise

	1061 finally:

	1062 del tb

	1063

	1064 if limit[0] <= maxargs:

	1065 limit[0] += 1

	1066 continue

	1067 raise

	1068

	1069 # copy func name to wrapper for sensible debug output

	1070 func_name = "<parse action>"

	1071 try:

	1072 func_name = getattr(func, '__name__',

	1073 getattr(func, '__class__').__name__)

	1074 except Exception:

	1075 func_name = str(func)

	1076 wrapper.__name__ = func_name

	1077

	1078 return wrapper

	1079

	1080 class ParserElement(object):

	1081 """Abstract base level parser element class."""

	1082 DEFAULT_WHITE_CHARS = " \n\t\r"

	1083 verbose_stacktrace = False

	1084

	1085 @staticmethod

	1086 def setDefaultWhitespaceChars( chars ):

	1087 r"""

	1088 Overrides the default whitespace chars

	1089

	1090 Example::

	1091 # default whitespace chars are space, <TAB> and newline

	1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc' , 'def', 'ghi', 'jkl']

	1093

	1094 # change to just treat newline as significant

	1095 ParserElement.setDefaultWhitespaceChars(" \t")

	1096 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc' , 'def']

	1097 """

	1098 ParserElement.DEFAULT_WHITE_CHARS = chars

	1099

	1100 @staticmethod

	1101 def inlineLiteralsUsing(cls):

	1102 """

	1103 Set class to be used for inclusion of string literals into a parser.

	1104

	1105 Example::

	1106 # default literal class used is Literal

	1107 integer = Word(nums)

	1108 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	1109

	1110 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '3 1']

	1111

	1112

	1113 # change to Suppress

	1114 ParserElement.inlineLiteralsUsing(Suppress)

	1115 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	1116

	1117 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']

	1118 """

	1119 ParserElement._literalStringClass = cls

	1120

	1121 def __init__( self, savelist=False ):

	1122 self.parseAction = list()

	1123 self.failAction = None

	1124 #~ self.name = "<unknown>" # don't define self.name, let subclasses try /except upcall

	1125 self.strRepr = None

	1126 self.resultsName = None

	1127 self.saveAsList = savelist

	1128 self.skipWhitespace = True

	1129 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

	1130 self.copyDefaultWhiteChars = True

	1131 self.mayReturnEmpty = False # used when checking for left-recursion

	1132 self.keepTabs = False

	1133 self.ignoreExprs = list()

	1134 self.debug = False

	1135 self.streamlined = False

	1136 self.mayIndexError = True # used to optimize exception handling for subc lasses that don't advance parse index

	1137 self.errmsg = ""

	1138 self.modalResults = True # used to mark results names as modal (report o nly last) or cumulative (list all)

	1139 self.debugActions = ( None, None, None ) #custom debug actions

	1140 self.re = None

	1141 self.callPreparse = True # used to avoid redundant calls to preParse

	1142 self.callDuringTry = False

	1143

	1144 def copy( self ):

	1145 """

	1146 Make a copy of this C{ParserElement}. Useful for defining different par se actions

	1147 for the same parsing pattern, using copies of the original parse element .

	1148

	1149 Example::

	1150 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

	1151 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")

	1152 integerM = integer.copy().addParseAction(lambda toks: toks[0]10241 024) + Suppress("M")

	1153

	1154 print(OneOrMore(integerK \| integerM \| integer).parseString("5K 100 6 40K 256M"))

	1155 prints::

	1156 [5120, 100, 655360, 268435456]

	1157 Equivalent form of C{expr.copy()} is just C{expr()}::

	1158 integerM = integer().addParseAction(lambda toks: toks[0]10241024) + Suppress("M")

	1159 """

	1160 cpy = copy.copy( self )

	1161 cpy.parseAction = self.parseAction[:]

	1162 cpy.ignoreExprs = self.ignoreExprs[:]

	1163 if self.copyDefaultWhiteChars:

	1164 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

	1165 return cpy

	1166

	1167 def setName( self, name ):

	1168 """

	1169 Define name for this expression, makes debugging and exception messages clearer.

	1170

	1171 Example::

	1172 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)

	1173 Word(nums).setName("integer").parseString("ABC") # -> Exception: Ex pected integer (at char 0), (line:1, col:1)

	1174 """

	1175 self.name = name

	1176 self.errmsg = "Expected " + self.name

	1177 if hasattr(self,"exception"):

	1178 self.exception.msg = self.errmsg

	1179 return self

	1180

	1181 def setResultsName( self, name, listAllMatches=False ):

	1182 """

	1183 Define name for referencing matching tokens as a nested attribute

	1184 of the returned parse results.

	1185 NOTE: this returns a copy of the original C{ParserElement} object;

	1186 this is so that the client can define a basic element, such as an

	1187 integer, and reference it in multiple places with different names.

	1188

	1189 You can also set results names using the abbreviated syntax,

	1190 C{expr("name")} in place of C{expr.setResultsName("name")} -

	1191 see L{I{__call__}<__call__>}.

	1192

	1193 Example::

	1194 date_str = (integer.setResultsName("year") + '/'

	1195 + integer.setResultsName("month") + '/'

	1196 + integer.setResultsName("day"))

	1197

	1198 # equivalent form:

	1199 date_str = integer("year") + '/' + integer("month") + '/' + integer( "day")

	1200 """

	1201 newself = self.copy()

	1202 if name.endswith("*"):

	1203 name = name[:-1]

	1204 listAllMatches=True

	1205 newself.resultsName = name

	1206 newself.modalResults = not listAllMatches

	1207 return newself

	1208

	1209 def setBreak(self,breakFlag = True):

	1210 """Method to invoke the Python pdb debugger when this element is

	1211 about to be parsed. Set C{breakFlag} to True to enable, False to

	1212 disable.

	1213 """

	1214 if breakFlag:

	1215 _parseMethod = self._parse

	1216 def breaker(instring, loc, doActions=True, callPreParse=True):

	1217 import pdb

	1218 pdb.set_trace()

	1219 return _parseMethod( instring, loc, doActions, callPreParse )

	1220 breaker._originalParseMethod = _parseMethod

	1221 self._parse = breaker

	1222 else:

	1223 if hasattr(self._parse,"_originalParseMethod"):

	1224 self._parse = self._parse._originalParseMethod

	1225 return self

	1226

	1227 def setParseAction( self, fns, *kwargs ):

	1228 """

	1229 Define action to perform when successfully matching parse element defini tion.

	1230 Parse action fn is a callable method with 0-3 arguments, called as C{fn( s,loc,toks)},

	1231 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:

	1232 - s = the original string being parsed (see note below)

	1233 - loc = the location of the matching substring

	1234 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object

	1235 If the functions in fns modify the tokens, they can return them as the r eturn

	1236 value from fn, and the modified list of tokens will replace the original .

	1237 Otherwise, fn does not need to return any value.

	1238

	1239 Optional keyword arguments:

	1240 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing

	1241

	1242 Note: the default parsing behavior is to expand tabs in the input string

	1243 before starting the parsing process. See L{I{parseString}<parseString>} for more information

	1244 on parsing strings containing C{<TAB>}s, and suggested methods to mainta in a

	1245 consistent view of the parsed string, the parse location, and line and c olumn

	1246 positions within the parsed string.

	1247

	1248 Example::

	1249 integer = Word(nums)

	1250 date_str = integer + '/' + integer + '/' + integer

	1251

	1252 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '3 1']

	1253

	1254 # use parse action to convert to ints at parse time

	1255 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

	1256 date_str = integer + '/' + integer + '/' + integer

	1257

	1258 # note that integer fields are now ints, not strings

	1259 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]

	1260 """

	1261 self.parseAction = list(map(_trim_arity, list(fns)))

	1262 self.callDuringTry = kwargs.get("callDuringTry", False)

	1263 return self

	1264

	1265 def addParseAction( self, fns, *kwargs ):

	1266 """

	1267 Add parse action to expression's list of parse actions. See L{I{setParse Action}<setParseAction>}.

	1268

	1269 See examples in L{I{copy}<copy>}.

	1270 """

	1271 self.parseAction += list(map(_trim_arity, list(fns)))

	1272 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", F alse)

	1273 return self

	1274

	1275 def addCondition(self, fns, *kwargs):

	1276 """Add a boolean predicate function to expression's list of parse action s. See

	1277 L{I{setParseAction}<setParseAction>} for function call signatures. Unlik e C{setParseAction},

	1278 functions passed to C{addCondition} need to return boolean success/fail of the condition.

	1279

	1280 Optional keyword arguments:

	1281 - message = define a custom message to be used in the raised exception

	1282 - fatal = if True, will raise ParseFatalException to stop parsing imm ediately; otherwise will raise ParseException

	1283

	1284 Example::

	1285 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

	1286 year_int = integer.copy()

	1287 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only su pport years 2000 and later")

	1288 date_str = year_int + '/' + integer + '/' + integer

	1289

	1290 result = date_str.parseString("1999/12/31") # -> Exception: Only su pport years 2000 and later (at char 0), (line:1, col:1)

	1291 """

	1292 msg = kwargs.get("message", "failed user-defined condition")

	1293 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseE xception

	1294 for fn in fns:

	1295 def pa(s,l,t):

	1296 if not bool(_trim_arity(fn)(s,l,t)):

	1297 raise exc_type(s,l,msg)

	1298 self.parseAction.append(pa)

	1299 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", F alse)

	1300 return self

	1301

	1302 def setFailAction( self, fn ):

	1303 """Define action to perform if parsing fails at this expression.

	1304 Fail acton fn is a callable function that takes the arguments

	1305 C{fn(s,loc,expr,err)} where:

	1306 - s = string being parsed

	1307 - loc = location where expression match was attempted and failed

	1308 - expr = the parse expression that failed

	1309 - err = the exception thrown

	1310 The function returns no value. It may throw C{L{ParseFatalException} }

	1311 if it is desired to stop parsing immediately."""

	1312 self.failAction = fn

	1313 return self

	1314

	1315 def _skipIgnorables( self, instring, loc ):

	1316 exprsFound = True

	1317 while exprsFound:

	1318 exprsFound = False

	1319 for e in self.ignoreExprs:

	1320 try:

	1321 while 1:

	1322 loc,dummy = e._parse( instring, loc )

	1323 exprsFound = True

	1324 except ParseException:

	1325 pass

	1326 return loc

	1327

	1328 def preParse( self, instring, loc ):

	1329 if self.ignoreExprs:

	1330 loc = self._skipIgnorables( instring, loc )

	1331

	1332 if self.skipWhitespace:

	1333 wt = self.whiteChars

	1334 instrlen = len(instring)

	1335 while loc < instrlen and instring[loc] in wt:

	1336 loc += 1

	1337

	1338 return loc

	1339

	1340 def parseImpl( self, instring, loc, doActions=True ):

	1341 return loc, []

	1342

	1343 def postParse( self, instring, loc, tokenlist ):

	1344 return tokenlist

	1345

	1346 #~ @profile

	1347 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):

	1348 debugging = ( self.debug ) #and doActions )

	1349

	1350 if debugging or self.failAction:

	1351 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instrin g), col(loc,instring) ))

	1352 if (self.debugActions[0] ):

	1353 self.debugActions[0]( instring, loc, self )

	1354 if callPreParse and self.callPreparse:

	1355 preloc = self.preParse( instring, loc )

	1356 else:

	1357 preloc = loc

	1358 tokensStart = preloc

	1359 try:

	1360 try:

	1361 loc,tokens = self.parseImpl( instring, preloc, doActions )

	1362 except IndexError:

	1363 raise ParseException( instring, len(instring), self.errmsg, self )

	1364 except ParseBaseException as err:

	1365 #~ print ("Exception raised:", err)

	1366 if self.debugActions[2]:

	1367 self.debugActions[2]( instring, tokensStart, self, err )

	1368 if self.failAction:

	1369 self.failAction( instring, tokensStart, self, err )

	1370 raise

	1371 else:

	1372 if callPreParse and self.callPreparse:

	1373 preloc = self.preParse( instring, loc )

	1374 else:

	1375 preloc = loc

	1376 tokensStart = preloc

	1377 if self.mayIndexError or loc >= len(instring):

	1378 try:

	1379 loc,tokens = self.parseImpl( instring, preloc, doActions )

	1380 except IndexError:

	1381 raise ParseException( instring, len(instring), self.errmsg, self )

	1382 else:

	1383 loc,tokens = self.parseImpl( instring, preloc, doActions )

	1384

	1385 tokens = self.postParse( instring, loc, tokens )

	1386

	1387 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsLi st, modal=self.modalResults )

	1388 if self.parseAction and (doActions or self.callDuringTry):

	1389 if debugging:

	1390 try:

	1391 for fn in self.parseAction:

	1392 tokens = fn( instring, tokensStart, retTokens )

	1393 if tokens is not None:

	1394 retTokens = ParseResults( tokens,

	1395 self.resultsName,

	1396 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

	1397 modal=self.modalResults )

	1398 except ParseBaseException as err:

	1399 #~ print "Exception raised in user parse action:", err

	1400 if (self.debugActions[2] ):

	1401 self.debugActions[2]( instring, tokensStart, self, err )

	1402 raise

	1403 else:

	1404 for fn in self.parseAction:

	1405 tokens = fn( instring, tokensStart, retTokens )

	1406 if tokens is not None:

	1407 retTokens = ParseResults( tokens,

	1408 self.resultsName,

	1409 asList=self.saveAsList and isi nstance(tokens,(ParseResults,list)),

	1410 modal=self.modalResults )

	1411

	1412 if debugging:

	1413 #~ print ("Matched",self,"->",retTokens.asList())

	1414 if (self.debugActions[1] ):

	1415 self.debugActions[1]( instring, tokensStart, loc, self, retToken s )

	1416

	1417 return loc, retTokens

	1418

	1419 def tryParse( self, instring, loc ):

	1420 try:

	1421 return self._parse( instring, loc, doActions=False )[0]

	1422 except ParseFatalException:

	1423 raise ParseException( instring, loc, self.errmsg, self)

	1424

	1425 def canParseNext(self, instring, loc):

	1426 try:

	1427 self.tryParse(instring, loc)

	1428 except (ParseException, IndexError):

	1429 return False

	1430 else:

	1431 return True

	1432

	1433 class _UnboundedCache(object):

	1434 def __init__(self):

	1435 cache = {}

	1436 self.not_in_cache = not_in_cache = object()

	1437

	1438 def get(self, key):

	1439 return cache.get(key, not_in_cache)

	1440

	1441 def set(self, key, value):

	1442 cache[key] = value

	1443

	1444 def clear(self):

	1445 cache.clear()

	1446

	1447 self.get = types.MethodType(get, self)

	1448 self.set = types.MethodType(set, self)

	1449 self.clear = types.MethodType(clear, self)

	1450

	1451 if _OrderedDict is not None:

	1452 class _FifoCache(object):

	1453 def __init__(self, size):

	1454 self.not_in_cache = not_in_cache = object()

	1455

	1456 cache = _OrderedDict()

	1457

	1458 def get(self, key):

	1459 return cache.get(key, not_in_cache)

	1460

	1461 def set(self, key, value):

	1462 cache[key] = value

	1463 if len(cache) > size:

	1464 cache.popitem(False)

	1465

	1466 def clear(self):

	1467 cache.clear()

	1468

	1469 self.get = types.MethodType(get, self)

	1470 self.set = types.MethodType(set, self)

	1471 self.clear = types.MethodType(clear, self)

	1472

	1473 else:

	1474 class _FifoCache(object):

	1475 def __init__(self, size):

	1476 self.not_in_cache = not_in_cache = object()

	1477

	1478 cache = {}

	1479 key_fifo = collections.deque([], size)

	1480

	1481 def get(self, key):

	1482 return cache.get(key, not_in_cache)

	1483

	1484 def set(self, key, value):

	1485 cache[key] = value

	1486 if len(cache) > size:

	1487 cache.pop(key_fifo.popleft(), None)

	1488 key_fifo.append(key)

	1489

	1490 def clear(self):

	1491 cache.clear()

	1492 key_fifo.clear()

	1493

	1494 self.get = types.MethodType(get, self)

	1495 self.set = types.MethodType(set, self)

	1496 self.clear = types.MethodType(clear, self)

	1497

	1498 # argument cache for optimizing repeated calls when backtracking through rec ursive expressions

	1499 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail

	1500 packrat_cache_lock = RLock()

	1501 packrat_cache_stats = [0, 0]

	1502

	1503 # this method gets repeatedly called during backtracking with the same argum ents -

	1504 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

	1505 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):

	1506 HIT, MISS = 0, 1

	1507 lookup = (self, instring, loc, callPreParse, doActions)

	1508 with ParserElement.packrat_cache_lock:

	1509 cache = ParserElement.packrat_cache

	1510 value = cache.get(lookup)

	1511 if value is cache.not_in_cache:

	1512 ParserElement.packrat_cache_stats[MISS] += 1

	1513 try:

	1514 value = self._parseNoCache(instring, loc, doActions, callPre Parse)

	1515 except ParseBaseException as pe:

	1516 # cache a copy of the exception, without the traceback

	1517 cache.set(lookup, pe.__class__(*pe.args))

	1518 raise

	1519 else:

	1520 cache.set(lookup, (value[0], value[1].copy()))

	1521 return value

	1522 else:

	1523 ParserElement.packrat_cache_stats[HIT] += 1

	1524 if isinstance(value, Exception):

	1525 raise value

	1526 return (value[0], value[1].copy())

	1527

	1528 _parse = _parseNoCache

	1529

	1530 @staticmethod

	1531 def resetCache():

	1532 ParserElement.packrat_cache.clear()

	1533 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_c ache_stats)

	1534

	1535 _packratEnabled = False

	1536 @staticmethod

	1537 def enablePackrat(cache_size_limit=128):

	1538 """Enables "packrat" parsing, which adds memoizing to the parsing logic.

	1539 Repeated parse attempts at the same string location (which happens

	1540 often in many complex grammars) can immediately return a cached value ,

	1541 instead of re-executing parsing/validating code. Memoizing is done o f

	1542 both valid results and parsing exceptions.

	1543

	1544 Parameters:

	1545 - cache_size_limit - (default=C{128}) - if an integer value is provi ded

	1546 will limit the size of the packrat cache; if None is passed, then

	1547 the cache size will be unbounded; if 0 is passed, the cache will

	1548 be effectively disabled.

	1549

	1550 This speedup may break existing programs that use parse actions that

	1551 have side-effects. For this reason, packrat parsing is disabled when

	1552 you first import pyparsing. To activate the packrat feature, your

	1553 program must call the class method C{ParserElement.enablePackrat()}. If

	1554 your program uses C{psyco} to "compile as you go", you must call

	1555 C{enablePackrat} before calling C{psyco.full()}. If you do not do th is,

	1556 Python will crash. For best results, call C{enablePackrat()} immedia tely

	1557 after importing pyparsing.

	1558

	1559 Example::

	1560 import pyparsing

	1561 pyparsing.ParserElement.enablePackrat()

	1562 """

	1563 if not ParserElement._packratEnabled:

	1564 ParserElement._packratEnabled = True

	1565 if cache_size_limit is None:

	1566 ParserElement.packrat_cache = ParserElement._UnboundedCache()

	1567 else:

	1568 ParserElement.packrat_cache = ParserElement._FifoCache(cache_siz e_limit)

	1569 ParserElement._parse = ParserElement._parseCache

	1570

	1571 def parseString( self, instring, parseAll=False ):

	1572 """

	1573 Execute the parse expression with the given string.

	1574 This is the main interface to the client code, once the complete

	1575 expression has been built.

	1576

	1577 If you want the grammar to require that the entire input string be

	1578 successfully parsed, then set C{parseAll} to True (equivalent to ending

	1579 the grammar with C{L{StringEnd()}}).

	1580

	1581 Note: C{parseString} implicitly calls C{expandtabs()} on the input strin g,

	1582 in order to report proper column numbers in parse actions.

	1583 If the input string contains tabs and

	1584 the grammar uses parse actions that use the C{loc} argument to index int o the

	1585 string being parsed, you can ensure you have a consistent view of the in put

	1586 string by:

	1587 - calling C{parseWithTabs} on your grammar before calling C{parseString }

	1588 (see L{I{parseWithTabs}<parseWithTabs>})

	1589 - define your parse action using the full C{(s,loc,toks)} signature, an d

	1590 reference the input string using the parse action's C{s} argument

	1591 - explictly expand the tabs in your input string before calling

	1592 C{parseString}

	1593

	1594 Example::

	1595 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']

	1596 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: E xpected end of text

	1597 """

	1598 ParserElement.resetCache()

	1599 if not self.streamlined:

	1600 self.streamline()

	1601 #~ self.saveAsList = True

	1602 for e in self.ignoreExprs:

	1603 e.streamline()

	1604 if not self.keepTabs:

	1605 instring = instring.expandtabs()

	1606 try:

	1607 loc, tokens = self._parse( instring, 0 )

	1608 if parseAll:

	1609 loc = self.preParse( instring, loc )

	1610 se = Empty() + StringEnd()

	1611 se._parse( instring, loc )

	1612 except ParseBaseException as exc:

	1613 if ParserElement.verbose_stacktrace:

	1614 raise

	1615 else:

	1616 # catch and re-raise exception from here, clears out pyparsing i nternal stack trace

	1617 raise exc

	1618 else:

	1619 return tokens

	1620

	1621 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):

	1622 """

	1623 Scan the input string for expression matches. Each match will return th e

	1624 matching tokens, start location, and end location. May be called with o ptional

	1625 C{maxMatches} argument, to clip scanning after 'n' matches are found. I f

	1626 C{overlap} is specified, then overlapping matches will be reported.

	1627

	1628 Note that the start and end locations are reported relative to the strin g

	1629 being parsed. See L{I{parseString}<parseString>} for more information o n parsing

	1630 strings with embedded tabs.

	1631

	1632 Example::

	1633 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

	1634 print(source)

	1635 for tokens,start,end in Word(alphas).scanString(source):

	1636 print(' 'start + '^'(end-start))

	1637 print(' '*start + tokens[0])

	1638

	1639 prints::

	1640

	1641 sldjf123lsdjjkf345sldkjf879lkjsfd987

	1642 ^^^^^

	1643 sldjf

	1644 ^^^^^^^

	1645 lsdjjkf

	1646 ^^^^^^

	1647 sldkjf

	1648 ^^^^^^

	1649 lkjsfd

	1650 """

	1651 if not self.streamlined:

	1652 self.streamline()

	1653 for e in self.ignoreExprs:

	1654 e.streamline()

	1655

	1656 if not self.keepTabs:

	1657 instring = _ustr(instring).expandtabs()

	1658 instrlen = len(instring)

	1659 loc = 0

	1660 preparseFn = self.preParse

	1661 parseFn = self._parse

	1662 ParserElement.resetCache()

	1663 matches = 0

	1664 try:

	1665 while loc <= instrlen and matches < maxMatches:

	1666 try:

	1667 preloc = preparseFn( instring, loc )

	1668 nextLoc,tokens = parseFn( instring, preloc, callPreParse=Fal se )

	1669 except ParseException:

	1670 loc = preloc+1

	1671 else:

	1672 if nextLoc > loc:

	1673 matches += 1

	1674 yield tokens, preloc, nextLoc

	1675 if overlap:

	1676 nextloc = preparseFn( instring, loc )

	1677 if nextloc > loc:

	1678 loc = nextLoc

	1679 else:

	1680 loc += 1

	1681 else:

	1682 loc = nextLoc

	1683 else:

	1684 loc = preloc+1

	1685 except ParseBaseException as exc:

	1686 if ParserElement.verbose_stacktrace:

	1687 raise

	1688 else:

	1689 # catch and re-raise exception from here, clears out pyparsing i nternal stack trace

	1690 raise exc

	1691

	1692 def transformString( self, instring ):

	1693 """

	1694 Extension to C{L{scanString}}, to modify matching text with modified tok ens that may

	1695 be returned from a parse action. To use C{transformString}, define a gr ammar and

	1696 attach a parse action to it that modifies the returned token list.

	1697 Invoking C{transformString()} on a target string will then scan for matc hes,

	1698 and replace the matched text patterns according to the logic in the pars e

	1699 action. C{transformString()} returns the resulting transformed string.

	1700

	1701 Example::

	1702 wd = Word(alphas)

	1703 wd.setParseAction(lambda toks: toks[0].title())

	1704

	1705 print(wd.transformString("now is the winter of our discontent made g lorious summer by this sun of york."))

	1706 Prints::

	1707 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.

	1708 """

	1709 out = []

	1710 lastE = 0

	1711 # force preservation of <TAB>s, to minimize unwanted transformation of s tring, and to

	1712 # keep string locs straight between transformString and scanString

	1713 self.keepTabs = True

	1714 try:

	1715 for t,s,e in self.scanString( instring ):

	1716 out.append( instring[lastE:s] )

	1717 if t:

	1718 if isinstance(t,ParseResults):

	1719 out += t.asList()

	1720 elif isinstance(t,list):

	1721 out += t

	1722 else:

	1723 out.append(t)

	1724 lastE = e

	1725 out.append(instring[lastE:])

	1726 out = [o for o in out if o]

	1727 return "".join(map(_ustr,_flatten(out)))

	1728 except ParseBaseException as exc:

	1729 if ParserElement.verbose_stacktrace:

	1730 raise

	1731 else:

	1732 # catch and re-raise exception from here, clears out pyparsing i nternal stack trace

	1733 raise exc

	1734

	1735 def searchString( self, instring, maxMatches=_MAX_INT ):

	1736 """

	1737 Another extension to C{L{scanString}}, simplifying the access to the tok ens found

	1738 to match the given parse expression. May be called with optional

	1739 C{maxMatches} argument, to clip searching after 'n' matches are found.

	1740

	1741 Example::

	1742 # a capitalized word starts with an uppercase letter, followed by ze ro or more lowercase letters

	1743 cap_word = Word(alphas.upper(), alphas.lower())

	1744

	1745 print(cap_word.searchString("More than Iron, more than Lead, more th an Gold I need Electricity"))

	1746 prints::

	1747 ['More', 'Iron', 'Lead', 'Gold', 'I']

	1748 """

	1749 try:

	1750 return ParseResults([ t for t,s,e in self.scanString( instring, maxM atches ) ])

	1751 except ParseBaseException as exc:

	1752 if ParserElement.verbose_stacktrace:

	1753 raise

	1754 else:

	1755 # catch and re-raise exception from here, clears out pyparsing i nternal stack trace

	1756 raise exc

	1757

	1758 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):

	1759 """

	1760 Generator method to split a string using the given expression as a separ ator.

	1761 May be called with optional C{maxsplit} argument, to limit the number of splits;

	1762 and the optional C{includeSeparators} argument (default=C{False}), if th e separating

	1763 matching text should be included in the split results.

	1764

	1765 Example::

	1766 punc = oneOf(list(".,;:/-!?"))

	1767 print(list(punc.split("This, this?, this sentence, is badly punctuat ed!")))

	1768 prints::

	1769 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

	1770 """

	1771 splits = 0

	1772 last = 0

	1773 for t,s,e in self.scanString(instring, maxMatches=maxsplit):

	1774 yield instring[last:s]

	1775 if includeSeparators:

	1776 yield t[0]

	1777 last = e

	1778 yield instring[last:]

	1779

	1780 def __add__(self, other ):

	1781 """

	1782 Implementation of + operator - returns C{L{And}}. Adding strings to a Pa rserElement

	1783 converts them to L{Literal}s by default.

	1784

	1785 Example::

	1786 greet = Word(alphas) + "," + Word(alphas) + "!"

	1787 hello = "Hello, World!"

	1788 print (hello, "->", greet.parseString(hello))

	1789 Prints::

	1790 Hello, World! -> ['Hello', ',', 'World', '!']

	1791 """

	1792 if isinstance( other, basestring ):

	1793 other = ParserElement._literalStringClass( other )

	1794 if not isinstance( other, ParserElement ):

	1795 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1796 SyntaxWarning, stacklevel=2)

	1797 return None

	1798 return And( [ self, other ] )

	1799

	1800 def __radd__(self, other ):

	1801 """

	1802 Implementation of + operator when left operand is not a C{L{ParserElemen t}}

	1803 """

	1804 if isinstance( other, basestring ):

	1805 other = ParserElement._literalStringClass( other )

	1806 if not isinstance( other, ParserElement ):

	1807 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1808 SyntaxWarning, stacklevel=2)

	1809 return None

	1810 return other + self

	1811

	1812 def __sub__(self, other):

	1813 """

	1814 Implementation of - operator, returns C{L{And}} with error stop

	1815 """

	1816 if isinstance( other, basestring ):

	1817 other = ParserElement._literalStringClass( other )

	1818 if not isinstance( other, ParserElement ):

	1819 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1820 SyntaxWarning, stacklevel=2)

	1821 return None

	1822 return And( [ self, And._ErrorStop(), other ] )

	1823

	1824 def __rsub__(self, other ):

	1825 """

	1826 Implementation of - operator when left operand is not a C{L{ParserElemen t}}

	1827 """

	1828 if isinstance( other, basestring ):

	1829 other = ParserElement._literalStringClass( other )

	1830 if not isinstance( other, ParserElement ):

	1831 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1832 SyntaxWarning, stacklevel=2)

	1833 return None

	1834 return other - self

	1835

	1836 def __mul__(self,other):

	1837 """

	1838 Implementation of * operator, allows use of C{expr * 3} in place of

	1839 C{expr + expr + expr}. Expressions may also me multiplied by a 2-intege r

	1840 tuple, similar to C{{min,max}} multipliers in regular expressions. Tupl es

	1841 may also include C{None} as in:

	1842 - C{expr(n,None)} or C{expr(n,)} is equivalent

	1843 to C{expr*n + L{ZeroOrMore}(expr)}

	1844 (read as "at least n instances of C{expr}")

	1845 - C{expr(None,n)} is equivalent to C{expr(0,n)}

	1846 (read as "0 to n instances of C{expr}")

	1847 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}

	1848 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}

	1849

	1850 Note that C{expr*(None,n)} does not raise an exception if

	1851 more than n exprs exist in the input stream; that is,

	1852 C{expr*(None,n)} does not enforce a maximum number of expr

	1853 occurrences. If this behavior is desired, then write

	1854 C{expr*(None,n) + ~expr}

	1855 """

	1856 if isinstance(other,int):

	1857 minElements, optElements = other,0

	1858 elif isinstance(other,tuple):

	1859 other = (other + (None, None))[:2]

	1860 if other[0] is None:

	1861 other = (0, other[1])

	1862 if isinstance(other[0],int) and other[1] is None:

	1863 if other[0] == 0:

	1864 return ZeroOrMore(self)

	1865 if other[0] == 1:

	1866 return OneOrMore(self)

	1867 else:

	1868 return self*other[0] + ZeroOrMore(self)

	1869 elif isinstance(other[0],int) and isinstance(other[1],int):

	1870 minElements, optElements = other

	1871 optElements -= minElements

	1872 else:

	1873 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))

	1874 else:

	1875 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

	1876

	1877 if minElements < 0:

	1878 raise ValueError("cannot multiply ParserElement by negative value")

	1879 if optElements < 0:

	1880 raise ValueError("second tuple value must be greater or equal to fir st tuple value")

	1881 if minElements == optElements == 0:

	1882 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")

	1883

	1884 if (optElements):

	1885 def makeOptionalList(n):

	1886 if n>1:

	1887 return Optional(self + makeOptionalList(n-1))

	1888 else:

	1889 return Optional(self)

	1890 if minElements:

	1891 if minElements == 1:

	1892 ret = self + makeOptionalList(optElements)

	1893 else:

	1894 ret = And([self]*minElements) + makeOptionalList(optElements )

	1895 else:

	1896 ret = makeOptionalList(optElements)

	1897 else:

	1898 if minElements == 1:

	1899 ret = self

	1900 else:

	1901 ret = And([self]*minElements)

	1902 return ret

	1903

	1904 def __rmul__(self, other):

	1905 return self.__mul__(other)

	1906

	1907 def __or__(self, other ):

	1908 """

	1909 Implementation of \| operator - returns C{L{MatchFirst}}

	1910 """

	1911 if isinstance( other, basestring ):

	1912 other = ParserElement._literalStringClass( other )

	1913 if not isinstance( other, ParserElement ):

	1914 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1915 SyntaxWarning, stacklevel=2)

	1916 return None

	1917 return MatchFirst( [ self, other ] )

	1918

	1919 def __ror__(self, other ):

	1920 """

	1921 Implementation of \| operator when left operand is not a C{L{ParserElemen t}}

	1922 """

	1923 if isinstance( other, basestring ):

	1924 other = ParserElement._literalStringClass( other )

	1925 if not isinstance( other, ParserElement ):

	1926 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1927 SyntaxWarning, stacklevel=2)

	1928 return None

	1929 return other \| self

	1930

	1931 def __xor__(self, other ):

	1932 """

	1933 Implementation of ^ operator - returns C{L{Or}}

	1934 """

	1935 if isinstance( other, basestring ):

	1936 other = ParserElement._literalStringClass( other )

	1937 if not isinstance( other, ParserElement ):

	1938 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1939 SyntaxWarning, stacklevel=2)

	1940 return None

	1941 return Or( [ self, other ] )

	1942

	1943 def __rxor__(self, other ):

	1944 """

	1945 Implementation of ^ operator when left operand is not a C{L{ParserElemen t}}

	1946 """

	1947 if isinstance( other, basestring ):

	1948 other = ParserElement._literalStringClass( other )

	1949 if not isinstance( other, ParserElement ):

	1950 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1951 SyntaxWarning, stacklevel=2)

	1952 return None

	1953 return other ^ self

	1954

	1955 def __and__(self, other ):

	1956 """

	1957 Implementation of & operator - returns C{L{Each}}

	1958 """

	1959 if isinstance( other, basestring ):

	1960 other = ParserElement._literalStringClass( other )

	1961 if not isinstance( other, ParserElement ):

	1962 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1963 SyntaxWarning, stacklevel=2)

	1964 return None

	1965 return Each( [ self, other ] )

	1966

	1967 def __rand__(self, other ):

	1968 """

	1969 Implementation of & operator when left operand is not a C{L{ParserElemen t}}

	1970 """

	1971 if isinstance( other, basestring ):

	1972 other = ParserElement._literalStringClass( other )

	1973 if not isinstance( other, ParserElement ):

	1974 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

	1975 SyntaxWarning, stacklevel=2)

	1976 return None

	1977 return other & self

	1978

	1979 def __invert__( self ):

	1980 """

	1981 Implementation of ~ operator - returns C{L{NotAny}}

	1982 """

	1983 return NotAny( self )

	1984

	1985 def __call__(self, name=None):

	1986 """

	1987 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.

	1988

	1989 If C{name} is given with a trailing C{'*'} character, then C{listAllMatc hes} will be

	1990 passed as C{True}.

	1991

	1992 If C{name} is omitted, same as calling C{L{copy}}.

	1993

	1994 Example::

	1995 # these are equivalent

	1996 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setR esultsName("socsecno")

	1997 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")

	1998 """

	1999 if name is not None:

	2000 return self.setResultsName(name)

	2001 else:

	2002 return self.copy()

	2003

	2004 def suppress( self ):

	2005 """

	2006 Suppresses the output of this C{ParserElement}; useful to keep punctuati on from

	2007 cluttering up returned output.

	2008 """

	2009 return Suppress( self )

	2010

	2011 def leaveWhitespace( self ):

	2012 """

	2013 Disables the skipping of whitespace before matching the characters in th e

	2014 C{ParserElement}'s defined pattern. This is normally only used internal ly by

	2015 the pyparsing module, but may be needed in some whitespace-sensitive gra mmars.

	2016 """

	2017 self.skipWhitespace = False

	2018 return self

	2019

	2020 def setWhitespaceChars( self, chars ):

	2021 """

	2022 Overrides the default whitespace chars

	2023 """

	2024 self.skipWhitespace = True

	2025 self.whiteChars = chars

	2026 self.copyDefaultWhiteChars = False

	2027 return self

	2028

	2029 def parseWithTabs( self ):

	2030 """

	2031 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.

	2032 Must be called before C{parseString} when the input grammar contains ele ments that

	2033 match C{<TAB>} characters.

	2034 """

	2035 self.keepTabs = True

	2036 return self

	2037

	2038 def ignore( self, other ):

	2039 """

	2040 Define expression to be ignored (e.g., comments) while doing pattern

	2041 matching; may be called repeatedly, to define multiple comment or other

	2042 ignorable patterns.

	2043

	2044 Example::

	2045 patt = OneOrMore(Word(alphas))

	2046 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']

	2047

	2048 patt.ignore(cStyleComment)

	2049 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd' ]

	2050 """

	2051 if isinstance(other, basestring):

	2052 other = Suppress(other)

	2053

	2054 if isinstance( other, Suppress ):

	2055 if other not in self.ignoreExprs:

	2056 self.ignoreExprs.append(other)

	2057 else:

	2058 self.ignoreExprs.append( Suppress( other.copy() ) )

	2059 return self

	2060

	2061 def setDebugActions( self, startAction, successAction, exceptionAction ):

	2062 """

	2063 Enable display of debugging messages while doing pattern matching.

	2064 """

	2065 self.debugActions = (startAction or _defaultStartDebugAction,

	2066 successAction or _defaultSuccessDebugAction,

	2067 exceptionAction or _defaultExceptionDebugAction)

	2068 self.debug = True

	2069 return self

	2070

	2071 def setDebug( self, flag=True ):

	2072 """

	2073 Enable display of debugging messages while doing pattern matching.

	2074 Set C{flag} to True to enable, False to disable.

	2075

	2076 Example::

	2077 wd = Word(alphas).setName("alphaword")

	2078 integer = Word(nums).setName("numword")

	2079 term = wd \| integer

	2080

	2081 # turn on debugging for wd

	2082 wd.setDebug()

	2083

	2084 OneOrMore(term).parseString("abc 123 xyz 890")

	2085

	2086 prints::

	2087 Match alphaword at loc 0(1,1)

	2088 Matched alphaword -> ['abc']

	2089 Match alphaword at loc 3(1,4)

	2090 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

	2091 Match alphaword at loc 7(1,8)

	2092 Matched alphaword -> ['xyz']

	2093 Match alphaword at loc 11(1,12)

	2094 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

	2095 Match alphaword at loc 15(1,16)

	2096 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

	2097

	2098 The output shown is that produced by the default debug actions - custom debug actions can be

	2099 specified using L{setDebugActions}. Prior to attempting

	2100 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}

	2101 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, o r an C{"Exception raised"}

	2102 message is shown. Also note the use of L{setName} to assign a human-read able name to the expression,

	2103 which makes debugging and exception messages easier to understand - for instance, the default

	2104 name created for the C{Word} expression without calling C{setName} is C{ "W:(ABCD...)"}.

	2105 """

	2106 if flag:

	2107 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebug Action, _defaultExceptionDebugAction )

	2108 else:

	2109 self.debug = False

	2110 return self

	2111

	2112 def __str__( self ):

	2113 return self.name

	2114

	2115 def __repr__( self ):

	2116 return _ustr(self)

	2117

	2118 def streamline( self ):

	2119 self.streamlined = True

	2120 self.strRepr = None

	2121 return self

	2122

	2123 def checkRecursion( self, parseElementList ):

	2124 pass

	2125

	2126 def validate( self, validateTrace=[] ):

	2127 """

	2128 Check defined expressions for valid structure, check for infinite recurs ive definitions.

	2129 """

	2130 self.checkRecursion( [] )

	2131

	2132 def parseFile( self, file_or_filename, parseAll=False ):

	2133 """

	2134 Execute the parse expression on the given file or filename.

	2135 If a filename is specified (instead of a file object),

	2136 the entire file is opened, read, and closed before parsing.

	2137 """

	2138 try:

	2139 file_contents = file_or_filename.read()

	2140 except AttributeError:

	2141 with open(file_or_filename, "r") as f:

	2142 file_contents = f.read()

	2143 try:

	2144 return self.parseString(file_contents, parseAll)

	2145 except ParseBaseException as exc:

	2146 if ParserElement.verbose_stacktrace:

	2147 raise

	2148 else:

	2149 # catch and re-raise exception from here, clears out pyparsing i nternal stack trace

	2150 raise exc

	2151

	2152 def __eq__(self,other):

	2153 if isinstance(other, ParserElement):

	2154 return self is other or vars(self) == vars(other)

	2155 elif isinstance(other, basestring):

	2156 return self.matches(other)

	2157 else:

	2158 return super(ParserElement,self)==other

	2159

	2160 def __ne__(self,other):

	2161 return not (self == other)

	2162

	2163 def __hash__(self):

	2164 return hash(id(self))

	2165

	2166 def __req__(self,other):

	2167 return self == other

	2168

	2169 def __rne__(self,other):

	2170 return not (self == other)

	2171

	2172 def matches(self, testString, parseAll=True):

	2173 """

	2174 Method for quick testing of a parser against a test string. Good for sim ple

	2175 inline microtests of sub expressions while building up larger parser.

	2176

	2177 Parameters:

	2178 - testString - to test against this expression for a match

	2179 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

	2180

	2181 Example::

	2182 expr = Word(nums)

	2183 assert expr.matches("100")

	2184 """

	2185 try:

	2186 self.parseString(_ustr(testString), parseAll=parseAll)

	2187 return True

	2188 except ParseBaseException:

	2189 return False

	2190

	2191 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printRe sults=True, failureTests=False):

	2192 """

	2193 Execute the parse expression on a series of test strings, showing each

	2194 test, the parsed results or where the parse failed. Quick and easy way t o

	2195 run a parse expression against a list of sample strings.

	2196

	2197 Parameters:

	2198 - tests - a list of separate test strings, or a multiline string of tes t strings

	2199 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

	2200 - comment - (default=C{'#'}) - expression for indicating embedded comme nts in the test

	2201 string; pass None to disable comment filtering

	2202 - fullDump - (default=C{True}) - dump results as list followed by resul ts names in nested outline;

	2203 if False, only dump nested list

	2204 - printResults - (default=C{True}) prints test output to stdout

	2205 - failureTests - (default=C{False}) indicates if these tests are expect ed to fail parsing

	2206

	2207 Returns: a (success, results) tuple, where success indicates that all te sts succeeded

	2208 (or failed if C{failureTests} is True), and the results contain a list o f lines of each

	2209 test's output

	2210

	2211 Example::

	2212 number_expr = pyparsing_common.number.copy()

	2213

	2214 result = number_expr.runTests('''

	2215 # unsigned integer

	2216 100

	2217 # negative integer

	2218 -100

	2219 # float with scientific notation

	2220 6.02e23

	2221 # integer with scientific notation

	2222 1e-12

	2223 ''')

	2224 print("Success" if result[0] else "Failed!")

	2225

	2226 result = number_expr.runTests('''

	2227 # stray character

	2228 100Z

	2229 # missing leading digit before '.'

	2230 -.100

	2231 # too many '.'

	2232 3.14.159

	2233 ''', failureTests=True)

	2234 print("Success" if result[0] else "Failed!")

	2235 prints::

	2236 # unsigned integer

	2237 100

	2238 [100]

	2239

	2240 # negative integer

	2241 -100

	2242 [-100]

	2243

	2244 # float with scientific notation

	2245 6.02e23

	2246 [6.02e+23]

	2247

	2248 # integer with scientific notation

	2249 1e-12

	2250 [1e-12]

	2251

	2252 Success

	2253

	2254 # stray character

	2255 100Z

	2256 ^

	2257 FAIL: Expected end of text (at char 3), (line:1, col:4)

	2258

	2259 # missing leading digit before '.'

	2260 -.100

	2261 ^

	2262 FAIL: Expected {real number with scientific notation \| real number \| signed integer} (at char 0), (line:1, col:1)

	2263

	2264 # too many '.'

	2265 3.14.159

	2266 ^

	2267 FAIL: Expected end of text (at char 4), (line:1, col:5)

	2268

	2269 Success

	2270

	2271 Each test string must be on a single line. If you want to test a string that spans multiple

	2272 lines, create a test like this::

	2273

	2274 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")

	2275

	2276 (Note that this is a raw string literal, you must include the leading 'r '.)

	2277 """

	2278 if isinstance(tests, basestring):

	2279 tests = list(map(str.strip, tests.rstrip().splitlines()))

	2280 if isinstance(comment, basestring):

	2281 comment = Literal(comment)

	2282 allResults = []

	2283 comments = []

	2284 success = True

	2285 for t in tests:

	2286 if comment is not None and comment.matches(t, False) or comments and not t:

	2287 comments.append(t)

	2288 continue

	2289 if not t:

	2290 continue

	2291 out = ['\n'.join(comments), t]

	2292 comments = []

	2293 try:

	2294 t = t.replace(r'\n','\n')

	2295 result = self.parseString(t, parseAll=parseAll)

	2296 out.append(result.dump(full=fullDump))

	2297 success = success and not failureTests

	2298 except ParseBaseException as pe:

	2299 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""

	2300 if '\n' in t:

	2301 out.append(line(pe.loc, t))

	2302 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)

	2303 else:

	2304 out.append(' '*pe.loc + '^' + fatal)

	2305 out.append("FAIL: " + str(pe))

	2306 success = success and failureTests

	2307 result = pe

	2308 except Exception as exc:

	2309 out.append("FAIL-EXCEPTION: " + str(exc))

	2310 success = success and failureTests

	2311 result = exc

	2312

	2313 if printResults:

	2314 if fullDump:

	2315 out.append('')

	2316 print('\n'.join(out))

	2317

	2318 allResults.append((t, result))

	2319

	2320 return success, allResults

	2321

	2322

	2323 class Token(ParserElement):

	2324 """

	2325 Abstract C{ParserElement} subclass, for defining atomic matching patterns.

	2326 """

	2327 def __init__( self ):

	2328 super(Token,self).__init__( savelist=False )

	2329

	2330

	2331 class Empty(Token):

	2332 """

	2333 An empty token, will always match.

	2334 """

	2335 def __init__( self ):

	2336 super(Empty,self).__init__()

	2337 self.name = "Empty"

	2338 self.mayReturnEmpty = True

	2339 self.mayIndexError = False

	2340

	2341

	2342 class NoMatch(Token):

	2343 """

	2344 A token that will never match.

	2345 """

	2346 def __init__( self ):

	2347 super(NoMatch,self).__init__()

	2348 self.name = "NoMatch"

	2349 self.mayReturnEmpty = True

	2350 self.mayIndexError = False

	2351 self.errmsg = "Unmatchable token"

	2352

	2353 def parseImpl( self, instring, loc, doActions=True ):

	2354 raise ParseException(instring, loc, self.errmsg, self)

	2355

	2356

	2357 class Literal(Token):

	2358 """

	2359 Token to exactly match a specified string.

	2360

	2361 Example::

	2362 Literal('blah').parseString('blah') # -> ['blah']

	2363 Literal('blah').parseString('blahfooblah') # -> ['blah']

	2364 Literal('blah').parseString('bla') # -> Exception: Expected "blah"

	2365

	2366 For case-insensitive matching, use L{CaselessLiteral}.

	2367

	2368 For keyword matching (force word break before and after the matched string),

	2369 use L{Keyword} or L{CaselessKeyword}.

	2370 """

	2371 def __init__( self, matchString ):

	2372 super(Literal,self).__init__()

	2373 self.match = matchString

	2374 self.matchLen = len(matchString)

	2375 try:

	2376 self.firstMatchChar = matchString[0]

	2377 except IndexError:

	2378 warnings.warn("null string passed to Literal; use Empty() instead",

	2379 SyntaxWarning, stacklevel=2)

	2380 self.__class__ = Empty

	2381 self.name = '"%s"' % _ustr(self.match)

	2382 self.errmsg = "Expected " + self.name

	2383 self.mayReturnEmpty = False

	2384 self.mayIndexError = False

	2385

	2386 # Performance tuning: this routine gets called a lot

	2387 # if this is a single character match string and the first character matche s,

	2388 # short-circuit as quickly as possible, and avoid calling startswith

	2389 #~ @profile

	2390 def parseImpl( self, instring, loc, doActions=True ):

	2391 if (instring[loc] == self.firstMatchChar and

	2392 (self.matchLen==1 or instring.startswith(self.match,loc)) ):

	2393 return loc+self.matchLen, self.match

	2394 raise ParseException(instring, loc, self.errmsg, self)

	2395 _L = Literal

	2396 ParserElement._literalStringClass = Literal

	2397

	2398 class Keyword(Token):

	2399 """

	2400 Token to exactly match a specified string as a keyword, that is, it must be

	2401 immediately followed by a non-keyword character. Compare with C{L{Literal}} :

	2402 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.

	2403 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'i f x=1'}, or C{'if(y==2)'}

	2404 Accepts two optional constructor arguments in addition to the keyword string :

	2405 - C{identChars} is a string of characters that would be valid identifier ch aracters,

	2406 defaulting to all alphanumerics + "_" and "$"

	2407 - C{caseless} allows case-insensitive matching, default is C{False}.

	2408

	2409 Example::

	2410 Keyword("start").parseString("start") # -> ['start']

	2411 Keyword("start").parseString("starting") # -> Exception

	2412

	2413 For case-insensitive matching, use L{CaselessKeyword}.

	2414 """

	2415 DEFAULT_KEYWORD_CHARS = alphanums+"_$"

	2416

	2417 def __init__( self, matchString, identChars=None, caseless=False ):

	2418 super(Keyword,self).__init__()

	2419 if identChars is None:

	2420 identChars = Keyword.DEFAULT_KEYWORD_CHARS

	2421 self.match = matchString

	2422 self.matchLen = len(matchString)

	2423 try:

	2424 self.firstMatchChar = matchString[0]

	2425 except IndexError:

	2426 warnings.warn("null string passed to Keyword; use Empty() instead",

	2427 SyntaxWarning, stacklevel=2)

	2428 self.name = '"%s"' % self.match

	2429 self.errmsg = "Expected " + self.name

	2430 self.mayReturnEmpty = False

	2431 self.mayIndexError = False

	2432 self.caseless = caseless

	2433 if caseless:

	2434 self.caselessmatch = matchString.upper()

	2435 identChars = identChars.upper()

	2436 self.identChars = set(identChars)

	2437

	2438 def parseImpl( self, instring, loc, doActions=True ):

	2439 if self.caseless:

	2440 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatc h) and

	2441 (loc >= len(instring)-self.matchLen or instring[loc+self.matchL en].upper() not in self.identChars) and

	2442 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):

	2443 return loc+self.matchLen, self.match

	2444 else:

	2445 if (instring[loc] == self.firstMatchChar and

	2446 (self.matchLen==1 or instring.startswith(self.match,loc)) and

	2447 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLe n] not in self.identChars) and

	2448 (loc == 0 or instring[loc-1] not in self.identChars) ):

	2449 return loc+self.matchLen, self.match

	2450 raise ParseException(instring, loc, self.errmsg, self)

	2451

	2452 def copy(self):

	2453 c = super(Keyword,self).copy()

	2454 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS

	2455 return c

	2456

	2457 @staticmethod

	2458 def setDefaultKeywordChars( chars ):

	2459 """Overrides the default Keyword chars

	2460 """

	2461 Keyword.DEFAULT_KEYWORD_CHARS = chars

	2462

	2463 class CaselessLiteral(Literal):

	2464 """

	2465 Token to match a specified string, ignoring case of letters.

	2466 Note: the matched results will always be in the case of the given

	2467 match string, NOT the case of the input text.

	2468

	2469 Example::

	2470 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CM D', 'CMD', 'CMD']

	2471

	2472 (Contrast with example for L{CaselessKeyword}.)

	2473 """

	2474 def __init__( self, matchString ):

	2475 super(CaselessLiteral,self).__init__( matchString.upper() )

	2476 # Preserve the defining literal.

	2477 self.returnString = matchString

	2478 self.name = "'%s'" % self.returnString

	2479 self.errmsg = "Expected " + self.name

	2480

	2481 def parseImpl( self, instring, loc, doActions=True ):

	2482 if instring[ loc:loc+self.matchLen ].upper() == self.match:

	2483 return loc+self.matchLen, self.returnString

	2484 raise ParseException(instring, loc, self.errmsg, self)

	2485

	2486 class CaselessKeyword(Keyword):

	2487 """

	2488 Caseless version of L{Keyword}.

	2489

	2490 Example::

	2491 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CM D', 'CMD']

	2492

	2493 (Contrast with example for L{CaselessLiteral}.)

	2494 """

	2495 def __init__( self, matchString, identChars=None ):

	2496 super(CaselessKeyword,self).__init__( matchString, identChars, caseless= True )

	2497

	2498 def parseImpl( self, instring, loc, doActions=True ):

	2499 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) a nd

	2500 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen]. upper() not in self.identChars) ):

	2501 return loc+self.matchLen, self.match

	2502 raise ParseException(instring, loc, self.errmsg, self)

	2503

	2504 class CloseMatch(Token):

	2505 """

	2506 A variation on L{Literal} which matches "close" matches, that is,

	2507 strings with at most 'n' mismatching characters. C{CloseMatch} takes paramet ers:

	2508 - C{match_string} - string to be matched

	2509 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match

	2510

	2511 The results from a successful parse will contain the matched text from the i nput string and the following named results:

	2512 - C{mismatches} - a list of the positions within the match_string where mis matches were found

	2513 - C{original} - the original match_string used to compare against the input string

	2514

	2515 If C{mismatches} is an empty list, then the match was an exact match.

	2516

	2517 Example::

	2518 patt = CloseMatch("ATCATCGAATGGA")

	2519 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches' : [[9]], 'original': ['ATCATCGAATGGA']})

	2520 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGG A' (with up to 1 mismatches) (at char 0), (line:1, col:1)

	2521

	2522 # exact match

	2523 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches' : [[]], 'original': ['ATCATCGAATGGA']})

	2524

	2525 # close match allowing up to 2 mismatches

	2526 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)

	2527 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches' : [[4, 9]], 'original': ['ATCATCGAATGGA']})

	2528 """

	2529 def __init__(self, match_string, maxMismatches=1):

	2530 super(CloseMatch,self).__init__()

	2531 self.name = match_string

	2532 self.match_string = match_string

	2533 self.maxMismatches = maxMismatches

	2534 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_str ing, self.maxMismatches)

	2535 self.mayIndexError = False

	2536 self.mayReturnEmpty = False

	2537

	2538 def parseImpl( self, instring, loc, doActions=True ):

	2539 start = loc

	2540 instrlen = len(instring)

	2541 maxloc = start + len(self.match_string)

	2542

	2543 if maxloc <= instrlen:

	2544 match_string = self.match_string

	2545 match_stringloc = 0

	2546 mismatches = []

	2547 maxMismatches = self.maxMismatches

	2548

	2549 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self. match_string)):

	2550 src,mat = s_m

	2551 if src != mat:

	2552 mismatches.append(match_stringloc)

	2553 if len(mismatches) > maxMismatches:

	2554 break

	2555 else:

	2556 loc = match_stringloc + 1

	2557 results = ParseResults([instring[start:loc]])

	2558 results['original'] = self.match_string

	2559 results['mismatches'] = mismatches

	2560 return loc, results

	2561

	2562 raise ParseException(instring, loc, self.errmsg, self)

	2563

	2564

	2565 class Word(Token):

	2566 """

	2567 Token for matching words composed of allowed character sets.

	2568 Defined with string containing all allowed initial characters,

	2569 an optional string containing allowed body characters (if omitted,

	2570 defaults to the initial character set), and an optional minimum,

	2571 maximum, and/or exact length. The default value for C{min} is 1 (a

	2572 minimum value < 1 is not valid); the default values for C{max} and C{exact}

	2573 are 0, meaning no maximum or exact length restriction. An optional

	2574 C{excludeChars} parameter can list characters that might be found in

	2575 the input C{bodyChars} string; useful to define a word of all printables

	2576 except for one or two characters, for instance.

	2577

	2578 L{srange} is useful for defining custom character set strings for defining

	2579 C{Word} expressions, using range notation from regular expression character sets.

	2580

	2581 A common mistake is to use C{Word} to match a specific literal string, as in

	2582 C{Word("Address")}. Remember that C{Word} uses the string argument to define

	2583 I{sets} of matchable characters. This expression would match "Add", "AAA",

	2584 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.

	2585 To match an exact literal string, use L{Literal} or L{Keyword}.

	2586

	2587 pyparsing includes helper strings for building Words:

	2588 - L{alphas}

	2589 - L{nums}

	2590 - L{alphanums}

	2591 - L{hexnums}

	2592 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, t ilded, umlauted, etc.)

	2593 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)

	2594 - L{printables} (any non-whitespace character)

	2595

	2596 Example::

	2597 # a word composed of digits

	2598 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange(" 0-9"))

	2599

	2600 # a word with a leading capital, and zero or more lowercase

	2601 capital_word = Word(alphas.upper(), alphas.lower())

	2602

	2603 # hostnames are alphanumeric, with leading alpha, and '-'

	2604 hostname = Word(alphas, alphanums+'-')

	2605

	2606 # roman numeral (not a strict parser, accepts invalid mix of characters)

	2607 roman = Word("IVXLCDM")

	2608

	2609 # any string of non-whitespace characters, except for ','

	2610 csv_value = Word(printables, excludeChars=",")

	2611 """

	2612 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyw ord=False, excludeChars=None ):

	2613 super(Word,self).__init__()

	2614 if excludeChars:

	2615 initChars = ''.join(c for c in initChars if c not in excludeChars)

	2616 if bodyChars:

	2617 bodyChars = ''.join(c for c in bodyChars if c not in excludeChar s)

	2618 self.initCharsOrig = initChars

	2619 self.initChars = set(initChars)

	2620 if bodyChars :

	2621 self.bodyCharsOrig = bodyChars

	2622 self.bodyChars = set(bodyChars)

	2623 else:

	2624 self.bodyCharsOrig = initChars

	2625 self.bodyChars = set(initChars)

	2626

	2627 self.maxSpecified = max > 0

	2628

	2629 if min < 1:

	2630 raise ValueError("cannot specify a minimum length < 1; use Optional( Word()) if zero-length word is permitted")

	2631

	2632 self.minLen = min

	2633

	2634 if max > 0:

	2635 self.maxLen = max

	2636 else:

	2637 self.maxLen = _MAX_INT

	2638

	2639 if exact > 0:

	2640 self.maxLen = exact

	2641 self.minLen = exact

	2642

	2643 self.name = _ustr(self)

	2644 self.errmsg = "Expected " + self.name

	2645 self.mayIndexError = False

	2646 self.asKeyword = asKeyword

	2647

	2648 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max= =0 and exact==0):

	2649 if self.bodyCharsOrig == self.initCharsOrig:

	2650 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsO rig)

	2651 elif len(self.initCharsOrig) == 1:

	2652 self.reString = "%s[%s]*" % \

	2653 (re.escape(self.initCharsOrig),

	2654 _escapeRegexRangeChars(self.bodyCharsOrig) ,)

	2655 else:

	2656 self.reString = "[%s][%s]*" % \

	2657 (_escapeRegexRangeChars(self.initCharsOrig ),

	2658 _escapeRegexRangeChars(self.bodyCharsOrig) ,)

	2659 if self.asKeyword:

	2660 self.reString = r"\b"+self.reString+r"\b"

	2661 try:

	2662 self.re = re.compile( self.reString )

	2663 except Exception:

	2664 self.re = None

	2665

	2666 def parseImpl( self, instring, loc, doActions=True ):

	2667 if self.re:

	2668 result = self.re.match(instring,loc)

	2669 if not result:

	2670 raise ParseException(instring, loc, self.errmsg, self)

	2671

	2672 loc = result.end()

	2673 return loc, result.group()

	2674

	2675 if not(instring[ loc ] in self.initChars):

	2676 raise ParseException(instring, loc, self.errmsg, self)

	2677

	2678 start = loc

	2679 loc += 1

	2680 instrlen = len(instring)

	2681 bodychars = self.bodyChars

	2682 maxloc = start + self.maxLen

	2683 maxloc = min( maxloc, instrlen )

	2684 while loc < maxloc and instring[loc] in bodychars:

	2685 loc += 1

	2686

	2687 throwException = False

	2688 if loc - start < self.minLen:

	2689 throwException = True

	2690 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

	2691 throwException = True

	2692 if self.asKeyword:

	2693 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):

	2694 throwException = True

	2695

	2696 if throwException:

	2697 raise ParseException(instring, loc, self.errmsg, self)

	2698

	2699 return loc, instring[start:loc]

	2700

	2701 def __str__( self ):

	2702 try:

	2703 return super(Word,self).__str__()

	2704 except Exception:

	2705 pass

	2706

	2707

	2708 if self.strRepr is None:

	2709

	2710 def charsAsStr(s):

	2711 if len(s)>4:

	2712 return s[:4]+"..."

	2713 else:

	2714 return s

	2715

	2716 if ( self.initCharsOrig != self.bodyCharsOrig ):

	2717 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), c harsAsStr(self.bodyCharsOrig) )

	2718 else:

	2719 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)

	2720

	2721 return self.strRepr

	2722

	2723

	2724 class Regex(Token):

	2725 """

	2726 Token for matching strings that match a given regular expression.

	2727 Defined with string specifying the regular expression in a form recognized b y the inbuilt Python re module.

	2728 If the given regex contains named groups (defined using C{(?P<name>...)}), t hese will be preserved as

	2729 named parse results.

	2730

	2731 Example::

	2732 realnum = Regex(r"[+-]?\d+\.\d*")

	2733 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

	2734 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-v alid-roman-numerals-with-a-regular-expression

	2735 roman = Regex(r"M{0,4}(CM\|CD\|D?C{0,3})(XC\|XL\|L?X{0,3})(IX\|IV\|V?I{0,3})")

	2736 """

	2737 compiledREtype = type(re.compile("[A-Z]"))

	2738 def __init__( self, pattern, flags=0):

	2739 """The parameters C{pattern} and C{flags} are passed to the C{re.compile ()} function as-is. See the Python C{re} module for an explanation of the accept able patterns and flags."""

	2740 super(Regex,self).__init__()

	2741

	2742 if isinstance(pattern, basestring):

	2743 if not pattern:

	2744 warnings.warn("null string passed to Regex; use Empty() instead" ,

	2745 SyntaxWarning, stacklevel=2)

	2746

	2747 self.pattern = pattern

	2748 self.flags = flags

	2749

	2750 try:

	2751 self.re = re.compile(self.pattern, self.flags)

	2752 self.reString = self.pattern

	2753 except sre_constants.error:

	2754 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,

	2755 SyntaxWarning, stacklevel=2)

	2756 raise

	2757

	2758 elif isinstance(pattern, Regex.compiledREtype):

	2759 self.re = pattern

	2760 self.pattern = \

	2761 self.reString = str(pattern)

	2762 self.flags = flags

	2763

	2764 else:

	2765 raise ValueError("Regex may only be constructed with a string or a c ompiled RE object")

	2766

	2767 self.name = _ustr(self)

	2768 self.errmsg = "Expected " + self.name

	2769 self.mayIndexError = False

	2770 self.mayReturnEmpty = True

	2771

	2772 def parseImpl( self, instring, loc, doActions=True ):

	2773 result = self.re.match(instring,loc)

	2774 if not result:

	2775 raise ParseException(instring, loc, self.errmsg, self)

	2776

	2777 loc = result.end()

	2778 d = result.groupdict()

	2779 ret = ParseResults(result.group())

	2780 if d:

	2781 for k in d:

	2782 ret[k] = d[k]

	2783 return loc,ret

	2784

	2785 def __str__( self ):

	2786 try:

	2787 return super(Regex,self).__str__()

	2788 except Exception:

	2789 pass

	2790

	2791 if self.strRepr is None:

	2792 self.strRepr = "Re:(%s)" % repr(self.pattern)

	2793

	2794 return self.strRepr

	2795

	2796

	2797 class QuotedString(Token):

	2798 r"""

	2799 Token for matching strings that are delimited by quoting characters.

	2800

	2801 Defined with the following parameters:

	2802 - quoteChar - string of one or more characters defining the quote delimi ting string

	2803 - escChar - character to escape quotes, typically backslash (default=C{N one})

	2804 - escQuote - special quote sequence to escape an embedded quote string ( such as SQL's "" to escape an embedded ") (default=C{None})

	2805 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})

	2806 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})

	2807 - endQuoteChar - string of one or more characters defining the end of th e quote delimited string (default=C{None} => same as quoteChar)

	2808 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n' }, etc.) to actual whitespace (default=C{True})

	2809

	2810 Example::

	2811 qs = QuotedString('"')

	2812 print(qs.searchString('lsjdf "This is the quote" sldjf'))

	2813 complex_qs = QuotedString('{{', endQuoteChar='}}')

	2814 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))

	2815 sql_qs = QuotedString('"', escQuote='""')

	2816 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" qu otes" sldjf'))

	2817 prints::

	2818 [['This is the quote']]

	2819 [['This is the "quote"']]

	2820 [['This is the quote with "embedded" quotes']]

	2821 """

	2822 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):

	2823 super(QuotedString,self).__init__()

	2824

	2825 # remove white space from quote chars - wont work anyway

	2826 quoteChar = quoteChar.strip()

	2827 if not quoteChar:

	2828 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,s tacklevel=2)

	2829 raise SyntaxError()

	2830

	2831 if endQuoteChar is None:

	2832 endQuoteChar = quoteChar

	2833 else:

	2834 endQuoteChar = endQuoteChar.strip()

	2835 if not endQuoteChar:

	2836 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWa rning,stacklevel=2)

	2837 raise SyntaxError()

	2838

	2839 self.quoteChar = quoteChar

	2840 self.quoteCharLen = len(quoteChar)

	2841 self.firstQuoteChar = quoteChar[0]

	2842 self.endQuoteChar = endQuoteChar

	2843 self.endQuoteCharLen = len(endQuoteChar)

	2844 self.escChar = escChar

	2845 self.escQuote = escQuote

	2846 self.unquoteResults = unquoteResults

	2847 self.convertWhitespaceEscapes = convertWhitespaceEscapes

	2848

	2849 if multiline:

	2850 self.flags = re.MULTILINE \| re.DOTALL

	2851 self.pattern = r'%s(?:[^%s%s]' % \

	2852 ( re.escape(self.quoteChar),

	2853 _escapeRegexRangeChars(self.endQuoteChar[0]),

	2854 (escChar is not None and _escapeRegexRangeChars(escChar) or '' ) )

	2855 else:

	2856 self.flags = 0

	2857 self.pattern = r'%s(?:[^%s\n\r%s]' % \

	2858 ( re.escape(self.quoteChar),

	2859 _escapeRegexRangeChars(self.endQuoteChar[0]),

	2860 (escChar is not None and _escapeRegexRangeChars(escChar) or '' ) )

	2861 if len(self.endQuoteChar) > 1:

	2862 self.pattern += (

	2863 '\|(?:' + ')\|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[: i]),

	2864 _escapeRegexRangeChars(self.endQu oteChar[i]))

	2865 for i in range(len(self.endQuoteChar)-1,0,-1 )) + ')'

	2866 )

	2867 if escQuote:

	2868 self.pattern += (r'\|(?:%s)' % re.escape(escQuote))

	2869 if escChar:

	2870 self.pattern += (r'\|(?:%s.)' % re.escape(escChar))

	2871 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"

	2872 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))

	2873

	2874 try:

	2875 self.re = re.compile(self.pattern, self.flags)

	2876 self.reString = self.pattern

	2877 except sre_constants.error:

	2878 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

	2879 SyntaxWarning, stacklevel=2)

	2880 raise

	2881

	2882 self.name = _ustr(self)

	2883 self.errmsg = "Expected " + self.name

	2884 self.mayIndexError = False

	2885 self.mayReturnEmpty = True

	2886

	2887 def parseImpl( self, instring, loc, doActions=True ):

	2888 result = instring[loc] == self.firstQuoteChar and self.re.match(instring ,loc) or None

	2889 if not result:

	2890 raise ParseException(instring, loc, self.errmsg, self)

	2891

	2892 loc = result.end()

	2893 ret = result.group()

	2894

	2895 if self.unquoteResults:

	2896

	2897 # strip off quotes

	2898 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]

	2899

	2900 if isinstance(ret,basestring):

	2901 # replace escaped whitespace

	2902 if '\\' in ret and self.convertWhitespaceEscapes:

	2903 ws_map = {

	2904 r'\t' : '\t',

	2905 r'\n' : '\n',

	2906 r'\f' : '\f',

	2907 r'\r' : '\r',

	2908 }

	2909 for wslit,wschar in ws_map.items():

	2910 ret = ret.replace(wslit, wschar)

	2911

	2912 # replace escaped characters

	2913 if self.escChar:

	2914 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)

	2915

	2916 # replace escaped quotes

	2917 if self.escQuote:

	2918 ret = ret.replace(self.escQuote, self.endQuoteChar)

	2919

	2920 return loc, ret

	2921

	2922 def __str__( self ):

	2923 try:

	2924 return super(QuotedString,self).__str__()

	2925 except Exception:

	2926 pass

	2927

	2928 if self.strRepr is None:

	2929 self.strRepr = "quoted string, starting with %s ending with %s" % (s elf.quoteChar, self.endQuoteChar)

	2930

	2931 return self.strRepr

	2932

	2933

	2934 class CharsNotIn(Token):

	2935 """

	2936 Token for matching words composed of characters I{not} in a given set (will

	2937 include whitespace in matched characters if not listed in the provided exclu sion set - see example).

	2938 Defined with string containing all disallowed characters, and an optional

	2939 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a

	2940 minimum value < 1 is not valid); the default values for C{max} and C{exact}

	2941 are 0, meaning no maximum or exact length restriction.

	2942

	2943 Example::

	2944 # define a comma-separated-value as anything that is not a ','

	2945 csv_value = CharsNotIn(',')

	2946 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213") )

	2947 prints::

	2948 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

	2949 """

	2950 def __init__( self, notChars, min=1, max=0, exact=0 ):

	2951 super(CharsNotIn,self).__init__()

	2952 self.skipWhitespace = False

	2953 self.notChars = notChars

	2954

	2955 if min < 1:

	2956 raise ValueError("cannot specify a minimum length < 1; use Optional( CharsNotIn()) if zero-length char group is permitted")

	2957

	2958 self.minLen = min

	2959

	2960 if max > 0:

	2961 self.maxLen = max

	2962 else:

	2963 self.maxLen = _MAX_INT

	2964

	2965 if exact > 0:

	2966 self.maxLen = exact

	2967 self.minLen = exact

	2968

	2969 self.name = _ustr(self)

	2970 self.errmsg = "Expected " + self.name

	2971 self.mayReturnEmpty = ( self.minLen == 0 )

	2972 self.mayIndexError = False

	2973

	2974 def parseImpl( self, instring, loc, doActions=True ):

	2975 if instring[loc] in self.notChars:

	2976 raise ParseException(instring, loc, self.errmsg, self)

	2977

	2978 start = loc

	2979 loc += 1

	2980 notchars = self.notChars

	2981 maxlen = min( start+self.maxLen, len(instring) )

	2982 while loc < maxlen and \

	2983 (instring[loc] not in notchars):

	2984 loc += 1

	2985

	2986 if loc - start < self.minLen:

	2987 raise ParseException(instring, loc, self.errmsg, self)

	2988

	2989 return loc, instring[start:loc]

	2990

	2991 def __str__( self ):

	2992 try:

	2993 return super(CharsNotIn, self).__str__()

	2994 except Exception:

	2995 pass

	2996

	2997 if self.strRepr is None:

	2998 if len(self.notChars) > 4:

	2999 self.strRepr = "!W:(%s...)" % self.notChars[:4]

	3000 else:

	3001 self.strRepr = "!W:(%s)" % self.notChars

	3002

	3003 return self.strRepr

	3004

	3005 class White(Token):

	3006 """

	3007 Special matching class for matching whitespace. Normally, whitespace is ign ored

	3008 by pyparsing grammars. This class is included when some whitespace structur es

	3009 are significant. Define with a string containing the whitespace characters to be

	3010 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, an d C{exact} arguments,

	3011 as defined for the C{L{Word}} class.

	3012 """

	3013 whiteStrs = {

	3014 " " : "<SPC>",

	3015 "\t": "<TAB>",

	3016 "\n": "<LF>",

	3017 "\r": "<CR>",

	3018 "\f": "<FF>",

	3019 }

	3020 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):

	3021 super(White,self).__init__()

	3022 self.matchWhite = ws

	3023 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )

	3024 #~ self.leaveWhitespace()

	3025 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))

	3026 self.mayReturnEmpty = True

	3027 self.errmsg = "Expected " + self.name

	3028

	3029 self.minLen = min

	3030

	3031 if max > 0:

	3032 self.maxLen = max

	3033 else:

	3034 self.maxLen = _MAX_INT

	3035

	3036 if exact > 0:

	3037 self.maxLen = exact

	3038 self.minLen = exact

	3039

	3040 def parseImpl( self, instring, loc, doActions=True ):

	3041 if not(instring[ loc ] in self.matchWhite):

	3042 raise ParseException(instring, loc, self.errmsg, self)

	3043 start = loc

	3044 loc += 1

	3045 maxloc = start + self.maxLen

	3046 maxloc = min( maxloc, len(instring) )

	3047 while loc < maxloc and instring[loc] in self.matchWhite:

	3048 loc += 1

	3049

	3050 if loc - start < self.minLen:

	3051 raise ParseException(instring, loc, self.errmsg, self)

	3052

	3053 return loc, instring[start:loc]

	3054

	3055

	3056 class _PositionToken(Token):

	3057 def __init__( self ):

	3058 super(_PositionToken,self).__init__()

	3059 self.name=self.__class__.__name__

	3060 self.mayReturnEmpty = True

	3061 self.mayIndexError = False

	3062

	3063 class GoToColumn(_PositionToken):

	3064 """

	3065 Token to advance to a specific column of input text; useful for tabular repo rt scraping.

	3066 """

	3067 def __init__( self, colno ):

	3068 super(GoToColumn,self).__init__()

	3069 self.col = colno

	3070

	3071 def preParse( self, instring, loc ):

	3072 if col(loc,instring) != self.col:

	3073 instrlen = len(instring)

	3074 if self.ignoreExprs:

	3075 loc = self._skipIgnorables( instring, loc )

	3076 while loc < instrlen and instring[loc].isspace() and col( loc, instr ing ) != self.col :

	3077 loc += 1

	3078 return loc

	3079

	3080 def parseImpl( self, instring, loc, doActions=True ):

	3081 thiscol = col( loc, instring )

	3082 if thiscol > self.col:

	3083 raise ParseException( instring, loc, "Text not in expected column", self )

	3084 newloc = loc + self.col - thiscol

	3085 ret = instring[ loc: newloc ]

	3086 return newloc, ret

	3087

	3088

	3089 class LineStart(_PositionToken):

	3090 """

	3091 Matches if current position is at the beginning of a line within the parse s tring

	3092

	3093 Example::

	3094

	3095 test = '''\

	3096 AAA this line

	3097 AAA and this line

	3098 AAA but not this one

	3099 B AAA and definitely not this one

	3100 '''

	3101

	3102 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):

	3103 print(t)

	3104

	3105 Prints::

	3106 ['AAA', ' this line']

	3107 ['AAA', ' and this line']

	3108

	3109 """

	3110 def __init__( self ):

	3111 super(LineStart,self).__init__()

	3112 self.errmsg = "Expected start of line"

	3113

	3114 def parseImpl( self, instring, loc, doActions=True ):

	3115 if col(loc, instring) == 1:

	3116 return loc, []

	3117 raise ParseException(instring, loc, self.errmsg, self)

	3118

	3119 class LineEnd(_PositionToken):

	3120 """

	3121 Matches if current position is at the end of a line within the parse string

	3122 """

	3123 def __init__( self ):

	3124 super(LineEnd,self).__init__()

	3125 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "") )

	3126 self.errmsg = "Expected end of line"

	3127

	3128 def parseImpl( self, instring, loc, doActions=True ):

	3129 if loc<len(instring):

	3130 if instring[loc] == "\n":

	3131 return loc+1, "\n"

	3132 else:

	3133 raise ParseException(instring, loc, self.errmsg, self)

	3134 elif loc == len(instring):

	3135 return loc+1, []

	3136 else:

	3137 raise ParseException(instring, loc, self.errmsg, self)

	3138

	3139 class StringStart(_PositionToken):

	3140 """

	3141 Matches if current position is at the beginning of the parse string

	3142 """

	3143 def __init__( self ):

	3144 super(StringStart,self).__init__()

	3145 self.errmsg = "Expected start of text"

	3146

	3147 def parseImpl( self, instring, loc, doActions=True ):

	3148 if loc != 0:

	3149 # see if entire string up to here is just whitespace and ignoreables

	3150 if loc != self.preParse( instring, 0 ):

	3151 raise ParseException(instring, loc, self.errmsg, self)

	3152 return loc, []

	3153

	3154 class StringEnd(_PositionToken):

	3155 """

	3156 Matches if current position is at the end of the parse string

	3157 """

	3158 def __init__( self ):

	3159 super(StringEnd,self).__init__()

	3160 self.errmsg = "Expected end of text"

	3161

	3162 def parseImpl( self, instring, loc, doActions=True ):

	3163 if loc < len(instring):

	3164 raise ParseException(instring, loc, self.errmsg, self)

	3165 elif loc == len(instring):

	3166 return loc+1, []

	3167 elif loc > len(instring):

	3168 return loc, []

	3169 else:

	3170 raise ParseException(instring, loc, self.errmsg, self)

	3171

	3172 class WordStart(_PositionToken):

	3173 """

	3174 Matches if the current position is at the beginning of a Word, and

	3175 is not preceded by any character in a given set of C{wordChars}

	3176 (default=C{printables}). To emulate the C{\b} behavior of regular expression s,

	3177 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning o f

	3178 the string being parsed, or at the beginning of a line.

	3179 """

	3180 def __init__(self, wordChars = printables):

	3181 super(WordStart,self).__init__()

	3182 self.wordChars = set(wordChars)

	3183 self.errmsg = "Not at the start of a word"

	3184

	3185 def parseImpl(self, instring, loc, doActions=True ):

	3186 if loc != 0:

	3187 if (instring[loc-1] in self.wordChars or

	3188 instring[loc] not in self.wordChars):

	3189 raise ParseException(instring, loc, self.errmsg, self)

	3190 return loc, []

	3191

	3192 class WordEnd(_PositionToken):

	3193 """

	3194 Matches if the current position is at the end of a Word, and

	3195 is not followed by any character in a given set of C{wordChars}

	3196 (default=C{printables}). To emulate the C{\b} behavior of regular expression s,

	3197 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of

	3198 the string being parsed, or at the end of a line.

	3199 """

	3200 def __init__(self, wordChars = printables):

	3201 super(WordEnd,self).__init__()

	3202 self.wordChars = set(wordChars)

	3203 self.skipWhitespace = False

	3204 self.errmsg = "Not at the end of a word"

	3205

	3206 def parseImpl(self, instring, loc, doActions=True ):

	3207 instrlen = len(instring)

	3208 if instrlen>0 and loc<instrlen:

	3209 if (instring[loc] in self.wordChars or

	3210 instring[loc-1] not in self.wordChars):

	3211 raise ParseException(instring, loc, self.errmsg, self)

	3212 return loc, []

	3213

	3214

	3215 class ParseExpression(ParserElement):

	3216 """

	3217 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.

	3218 """

	3219 def __init__( self, exprs, savelist = False ):

	3220 super(ParseExpression,self).__init__(savelist)

	3221 if isinstance( exprs, _generatorType ):

	3222 exprs = list(exprs)

	3223

	3224 if isinstance( exprs, basestring ):

	3225 self.exprs = [ ParserElement._literalStringClass( exprs ) ]

	3226 elif isinstance( exprs, collections.Iterable ):

	3227 exprs = list(exprs)

	3228 # if sequence of strings provided, wrap with Literal

	3229 if all(isinstance(expr, basestring) for expr in exprs):

	3230 exprs = map(ParserElement._literalStringClass, exprs)

	3231 self.exprs = list(exprs)

	3232 else:

	3233 try:

	3234 self.exprs = list( exprs )

	3235 except TypeError:

	3236 self.exprs = [ exprs ]

	3237 self.callPreparse = False

	3238

	3239 def __getitem__( self, i ):

	3240 return self.exprs[i]

	3241

	3242 def append( self, other ):

	3243 self.exprs.append( other )

	3244 self.strRepr = None

	3245 return self

	3246

	3247 def leaveWhitespace( self ):

	3248 """Extends C{leaveWhitespace} defined in base class, and also invokes C{ leaveWhitespace} on

	3249 all contained expressions."""

	3250 self.skipWhitespace = False

	3251 self.exprs = [ e.copy() for e in self.exprs ]

	3252 for e in self.exprs:

	3253 e.leaveWhitespace()

	3254 return self

	3255

	3256 def ignore( self, other ):

	3257 if isinstance( other, Suppress ):

	3258 if other not in self.ignoreExprs:

	3259 super( ParseExpression, self).ignore( other )

	3260 for e in self.exprs:

	3261 e.ignore( self.ignoreExprs[-1] )

	3262 else:

	3263 super( ParseExpression, self).ignore( other )

	3264 for e in self.exprs:

	3265 e.ignore( self.ignoreExprs[-1] )

	3266 return self

	3267

	3268 def __str__( self ):

	3269 try:

	3270 return super(ParseExpression,self).__str__()

	3271 except Exception:

	3272 pass

	3273

	3274 if self.strRepr is None:

	3275 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exp rs) )

	3276 return self.strRepr

	3277

	3278 def streamline( self ):

	3279 super(ParseExpression,self).streamline()

	3280

	3281 for e in self.exprs:

	3282 e.streamline()

	3283

	3284 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )

	3285 # but only if there are no parse actions or resultsNames on the nested A nd's

	3286 # (likewise for Or's and MatchFirst's)

	3287 if ( len(self.exprs) == 2 ):

	3288 other = self.exprs[0]

	3289 if ( isinstance( other, self.__class__ ) and

	3290 not(other.parseAction) and

	3291 other.resultsName is None and

	3292 not other.debug ):

	3293 self.exprs = other.exprs[:] + [ self.exprs[1] ]

	3294 self.strRepr = None

	3295 self.mayReturnEmpty \|= other.mayReturnEmpty

	3296 self.mayIndexError \|= other.mayIndexError

	3297

	3298 other = self.exprs[-1]

	3299 if ( isinstance( other, self.__class__ ) and

	3300 not(other.parseAction) and

	3301 other.resultsName is None and

	3302 not other.debug ):

	3303 self.exprs = self.exprs[:-1] + other.exprs[:]

	3304 self.strRepr = None

	3305 self.mayReturnEmpty \|= other.mayReturnEmpty

	3306 self.mayIndexError \|= other.mayIndexError

	3307

	3308 self.errmsg = "Expected " + _ustr(self)

	3309

	3310 return self

	3311

	3312 def setResultsName( self, name, listAllMatches=False ):

	3313 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)

	3314 return ret

	3315

	3316 def validate( self, validateTrace=[] ):

	3317 tmp = validateTrace[:]+[self]

	3318 for e in self.exprs:

	3319 e.validate(tmp)

	3320 self.checkRecursion( [] )

	3321

	3322 def copy(self):

	3323 ret = super(ParseExpression,self).copy()

	3324 ret.exprs = [e.copy() for e in self.exprs]

	3325 return ret

	3326

	3327 class And(ParseExpression):

	3328 """

	3329 Requires all given C{ParseExpression}s to be found in the given order.

	3330 Expressions may be separated by whitespace.

	3331 May be constructed using the C{'+'} operator.

	3332 May also be constructed using the C{'-'} operator, which will suppress backt racking.

	3333

	3334 Example::

	3335 integer = Word(nums)

	3336 name_expr = OneOrMore(Word(alphas))

	3337

	3338 expr = And([integer("id"),name_expr("name"),integer("age")])

	3339 # more easily written as:

	3340 expr = integer("id") + name_expr("name") + integer("age")

	3341 """

	3342

	3343 class _ErrorStop(Empty):

	3344 def __init__(self, args, *kwargs):

	3345 super(And._ErrorStop,self).__init__(args, *kwargs)

	3346 self.name = '-'

	3347 self.leaveWhitespace()

	3348

	3349 def __init__( self, exprs, savelist = True ):

	3350 super(And,self).__init__(exprs, savelist)

	3351 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

	3352 self.setWhitespaceChars( self.exprs[0].whiteChars )

	3353 self.skipWhitespace = self.exprs[0].skipWhitespace

	3354 self.callPreparse = True

	3355

	3356 def parseImpl( self, instring, loc, doActions=True ):

	3357 # pass False as last arg to _parse for first element, since we already

	3358 # pre-parsed the string as part of our And pre-parsing

	3359 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPr eParse=False )

	3360 errorStop = False

	3361 for e in self.exprs[1:]:

	3362 if isinstance(e, And._ErrorStop):

	3363 errorStop = True

	3364 continue

	3365 if errorStop:

	3366 try:

	3367 loc, exprtokens = e._parse( instring, loc, doActions )

	3368 except ParseSyntaxException:

	3369 raise

	3370 except ParseBaseException as pe:

	3371 pe.__traceback__ = None

	3372 raise ParseSyntaxException._from_exception(pe)

	3373 except IndexError:

	3374 raise ParseSyntaxException(instring, len(instring), self.err msg, self)

	3375 else:

	3376 loc, exprtokens = e._parse( instring, loc, doActions )

	3377 if exprtokens or exprtokens.haskeys():

	3378 resultlist += exprtokens

	3379 return loc, resultlist

	3380

	3381 def __iadd__(self, other ):

	3382 if isinstance( other, basestring ):

	3383 other = ParserElement._literalStringClass( other )

	3384 return self.append( other ) #And( [ self, other ] )

	3385

	3386 def checkRecursion( self, parseElementList ):

	3387 subRecCheckList = parseElementList[:] + [ self ]

	3388 for e in self.exprs:

	3389 e.checkRecursion( subRecCheckList )

	3390 if not e.mayReturnEmpty:

	3391 break

	3392

	3393 def __str__( self ):

	3394 if hasattr(self,"name"):

	3395 return self.name

	3396

	3397 if self.strRepr is None:

	3398 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"

	3399

	3400 return self.strRepr

	3401

	3402

	3403 class Or(ParseExpression):

	3404 """

	3405 Requires that at least one C{ParseExpression} is found.

	3406 If two expressions match, the expression that matches the longest string wil l be used.

	3407 May be constructed using the C{'^'} operator.

	3408

	3409 Example::

	3410 # construct Or using '^' operator

	3411

	3412 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

	3413 print(number.searchString("123 3.1416 789"))

	3414 prints::

	3415 [['123'], ['3.1416'], ['789']]

	3416 """

	3417 def __init__( self, exprs, savelist = False ):

	3418 super(Or,self).__init__(exprs, savelist)

	3419 if self.exprs:

	3420 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

	3421 else:

	3422 self.mayReturnEmpty = True

	3423

	3424 def parseImpl( self, instring, loc, doActions=True ):

	3425 maxExcLoc = -1

	3426 maxException = None

	3427 matches = []

	3428 for e in self.exprs:

	3429 try:

	3430 loc2 = e.tryParse( instring, loc )

	3431 except ParseException as err:

	3432 err.__traceback__ = None

	3433 if err.loc > maxExcLoc:

	3434 maxException = err

	3435 maxExcLoc = err.loc

	3436 except IndexError:

	3437 if len(instring) > maxExcLoc:

	3438 maxException = ParseException(instring,len(instring),e.errms g,self)

	3439 maxExcLoc = len(instring)

	3440 else:

	3441 # save match among all matches, to retry longest to shortest

	3442 matches.append((loc2, e))

	3443

	3444 if matches:

	3445 matches.sort(key=lambda x: -x[0])

	3446 for _,e in matches:

	3447 try:

	3448 return e._parse( instring, loc, doActions )

	3449 except ParseException as err:

	3450 err.__traceback__ = None

	3451 if err.loc > maxExcLoc:

	3452 maxException = err

	3453 maxExcLoc = err.loc

	3454

	3455 if maxException is not None:

	3456 maxException.msg = self.errmsg

	3457 raise maxException

	3458 else:

	3459 raise ParseException(instring, loc, "no defined alternatives to matc h", self)

	3460

	3461

	3462 def __ixor__(self, other ):

	3463 if isinstance( other, basestring ):

	3464 other = ParserElement._literalStringClass( other )

	3465 return self.append( other ) #Or( [ self, other ] )

	3466

	3467 def __str__( self ):

	3468 if hasattr(self,"name"):

	3469 return self.name

	3470

	3471 if self.strRepr is None:

	3472 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"

	3473

	3474 return self.strRepr

	3475

	3476 def checkRecursion( self, parseElementList ):

	3477 subRecCheckList = parseElementList[:] + [ self ]

	3478 for e in self.exprs:

	3479 e.checkRecursion( subRecCheckList )

	3480

	3481

	3482 class MatchFirst(ParseExpression):

	3483 """

	3484 Requires that at least one C{ParseExpression} is found.

	3485 If two expressions match, the first one listed is the one that will match.

	3486 May be constructed using the C{'\|'} operator.

	3487

	3488 Example::

	3489 # construct MatchFirst using '\|' operator

	3490

	3491 # watch the order of expressions to match

	3492 number = Word(nums) \| Combine(Word(nums) + '.' + Word(nums))

	3493 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'] , ['1416'], ['789']]

	3494

	3495 # put more selective expression first

	3496 number = Combine(Word(nums) + '.' + Word(nums)) \| Word(nums)

	3497 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3. 1416'], ['789']]

	3498 """

	3499 def __init__( self, exprs, savelist = False ):

	3500 super(MatchFirst,self).__init__(exprs, savelist)

	3501 if self.exprs:

	3502 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

	3503 else:

	3504 self.mayReturnEmpty = True

	3505

	3506 def parseImpl( self, instring, loc, doActions=True ):

	3507 maxExcLoc = -1

	3508 maxException = None

	3509 for e in self.exprs:

	3510 try:

	3511 ret = e._parse( instring, loc, doActions )

	3512 return ret

	3513 except ParseException as err:

	3514 if err.loc > maxExcLoc:

	3515 maxException = err

	3516 maxExcLoc = err.loc

	3517 except IndexError:

	3518 if len(instring) > maxExcLoc:

	3519 maxException = ParseException(instring,len(instring),e.errms g,self)

	3520 maxExcLoc = len(instring)

	3521

	3522 # only got here if no expression matched, raise exception for match that made it the furthest

	3523 else:

	3524 if maxException is not None:

	3525 maxException.msg = self.errmsg

	3526 raise maxException

	3527 else:

	3528 raise ParseException(instring, loc, "no defined alternatives to match", self)

	3529

	3530 def __ior__(self, other ):

	3531 if isinstance( other, basestring ):

	3532 other = ParserElement._literalStringClass( other )

	3533 return self.append( other ) #MatchFirst( [ self, other ] )

	3534

	3535 def __str__( self ):

	3536 if hasattr(self,"name"):

	3537 return self.name

	3538

	3539 if self.strRepr is None:

	3540 self.strRepr = "{" + " \| ".join(_ustr(e) for e in self.exprs) + "}"

	3541

	3542 return self.strRepr

	3543

	3544 def checkRecursion( self, parseElementList ):

	3545 subRecCheckList = parseElementList[:] + [ self ]

	3546 for e in self.exprs:

	3547 e.checkRecursion( subRecCheckList )

	3548

	3549

	3550 class Each(ParseExpression):

	3551 """

	3552 Requires all given C{ParseExpression}s to be found, but in any order.

	3553 Expressions may be separated by whitespace.

	3554 May be constructed using the C{'&'} operator.

	3555

	3556 Example::

	3557 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

	3558 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

	3559 integer = Word(nums)

	3560 shape_attr = "shape:" + shape_type("shape")

	3561 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

	3562 color_attr = "color:" + color("color")

	3563 size_attr = "size:" + integer("size")

	3564

	3565 # use Each (using operator '&') to accept attributes in any order

	3566 # (shape and posn are required, color and size are optional)

	3567 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(si ze_attr)

	3568

	3569 shape_spec.runTests('''

	3570 shape: SQUARE color: BLACK posn: 100, 120

	3571 shape: CIRCLE size: 50 color: BLUE posn: 50,80

	3572 color:GREEN size:20 shape:TRIANGLE posn:20,40

	3573 '''

	3574 )

	3575 prints::

	3576 shape: SQUARE color: BLACK posn: 100, 120

	3577 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

	3578 - color: BLACK

	3579 - posn: ['100', ',', '120']

	3580 - x: 100

	3581 - y: 120

	3582 - shape: SQUARE

	3583

	3584

	3585 shape: CIRCLE size: 50 color: BLUE posn: 50,80

	3586 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ', ', '80']]

	3587 - color: BLUE

	3588 - posn: ['50', ',', '80']

	3589 - x: 50

	3590 - y: 80

	3591 - shape: CIRCLE

	3592 - size: 50

	3593

	3594

	3595 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

	3596 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

	3597 - color: GREEN

	3598 - posn: ['20', ',', '40']

	3599 - x: 20

	3600 - y: 40

	3601 - shape: TRIANGLE

	3602 - size: 20

	3603 """

	3604 def __init__( self, exprs, savelist = True ):

	3605 super(Each,self).__init__(exprs, savelist)

	3606 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

	3607 self.skipWhitespace = True

	3608 self.initExprGroups = True

	3609

	3610 def parseImpl( self, instring, loc, doActions=True ):

	3611 if self.initExprGroups:

	3612 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance (e,Optional))

	3613 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]

	3614 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstanc e(e,Optional)]

	3615 self.optionals = opt1 + opt2

	3616 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,Z eroOrMore) ]

	3617 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,On eOrMore) ]

	3618 self.required = [ e for e in self.exprs if not isinstance(e,(Optiona l,ZeroOrMore,OneOrMore)) ]

	3619 self.required += self.multirequired

	3620 self.initExprGroups = False

	3621 tmpLoc = loc

	3622 tmpReqd = self.required[:]

	3623 tmpOpt = self.optionals[:]

	3624 matchOrder = []

	3625

	3626 keepMatching = True

	3627 while keepMatching:

	3628 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequir ed

	3629 failed = []

	3630 for e in tmpExprs:

	3631 try:

	3632 tmpLoc = e.tryParse( instring, tmpLoc )

	3633 except ParseException:

	3634 failed.append(e)

	3635 else:

	3636 matchOrder.append(self.opt1map.get(id(e),e))

	3637 if e in tmpReqd:

	3638 tmpReqd.remove(e)

	3639 elif e in tmpOpt:

	3640 tmpOpt.remove(e)

	3641 if len(failed) == len(tmpExprs):

	3642 keepMatching = False

	3643

	3644 if tmpReqd:

	3645 missing = ", ".join(_ustr(e) for e in tmpReqd)

	3646 raise ParseException(instring,loc,"Missing one or more required elem ents (%s)" % missing )

	3647

	3648 # add any unmatched Optionals, in case they have default values defined

	3649 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.exp r in tmpOpt]

	3650

	3651 resultlist = []

	3652 for e in matchOrder:

	3653 loc,results = e._parse(instring,loc,doActions)

	3654 resultlist.append(results)

	3655

	3656 finalResults = sum(resultlist, ParseResults([]))

	3657 return loc, finalResults

	3658

	3659 def __str__( self ):

	3660 if hasattr(self,"name"):

	3661 return self.name

	3662

	3663 if self.strRepr is None:

	3664 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"

	3665

	3666 return self.strRepr

	3667

	3668 def checkRecursion( self, parseElementList ):

	3669 subRecCheckList = parseElementList[:] + [ self ]

	3670 for e in self.exprs:

	3671 e.checkRecursion( subRecCheckList )

	3672

	3673

	3674 class ParseElementEnhance(ParserElement):

	3675 """

	3676 Abstract subclass of C{ParserElement}, for combining and post-processing par sed tokens.

	3677 """

	3678 def __init__( self, expr, savelist=False ):

	3679 super(ParseElementEnhance,self).__init__(savelist)

	3680 if isinstance( expr, basestring ):

	3681 if issubclass(ParserElement._literalStringClass, Token):

	3682 expr = ParserElement._literalStringClass(expr)

	3683 else:

	3684 expr = ParserElement._literalStringClass(Literal(expr))

	3685 self.expr = expr

	3686 self.strRepr = None

	3687 if expr is not None:

	3688 self.mayIndexError = expr.mayIndexError

	3689 self.mayReturnEmpty = expr.mayReturnEmpty

	3690 self.setWhitespaceChars( expr.whiteChars )

	3691 self.skipWhitespace = expr.skipWhitespace

	3692 self.saveAsList = expr.saveAsList

	3693 self.callPreparse = expr.callPreparse

	3694 self.ignoreExprs.extend(expr.ignoreExprs)

	3695

	3696 def parseImpl( self, instring, loc, doActions=True ):

	3697 if self.expr is not None:

	3698 return self.expr._parse( instring, loc, doActions, callPreParse=Fals e )

	3699 else:

	3700 raise ParseException("",loc,self.errmsg,self)

	3701

	3702 def leaveWhitespace( self ):

	3703 self.skipWhitespace = False

	3704 self.expr = self.expr.copy()

	3705 if self.expr is not None:

	3706 self.expr.leaveWhitespace()

	3707 return self

	3708

	3709 def ignore( self, other ):

	3710 if isinstance( other, Suppress ):

	3711 if other not in self.ignoreExprs:

	3712 super( ParseElementEnhance, self).ignore( other )

	3713 if self.expr is not None:

	3714 self.expr.ignore( self.ignoreExprs[-1] )

	3715 else:

	3716 super( ParseElementEnhance, self).ignore( other )

	3717 if self.expr is not None:

	3718 self.expr.ignore( self.ignoreExprs[-1] )

	3719 return self

	3720

	3721 def streamline( self ):

	3722 super(ParseElementEnhance,self).streamline()

	3723 if self.expr is not None:

	3724 self.expr.streamline()

	3725 return self

	3726

	3727 def checkRecursion( self, parseElementList ):

	3728 if self in parseElementList:

	3729 raise RecursiveGrammarException( parseElementList+[self] )

	3730 subRecCheckList = parseElementList[:] + [ self ]

	3731 if self.expr is not None:

	3732 self.expr.checkRecursion( subRecCheckList )

	3733

	3734 def validate( self, validateTrace=[] ):

	3735 tmp = validateTrace[:]+[self]

	3736 if self.expr is not None:

	3737 self.expr.validate(tmp)

	3738 self.checkRecursion( [] )

	3739

	3740 def __str__( self ):

	3741 try:

	3742 return super(ParseElementEnhance,self).__str__()

	3743 except Exception:

	3744 pass

	3745

	3746 if self.strRepr is None and self.expr is not None:

	3747 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exp r) )

	3748 return self.strRepr

	3749

	3750

	3751 class FollowedBy(ParseElementEnhance):

	3752 """

	3753 Lookahead matching of the given parse expression. C{FollowedBy}

	3754 does I{not} advance the parsing position within the input string, it only

	3755 verifies that the specified parse expression matches at the current

	3756 position. C{FollowedBy} always returns a null token list.

	3757

	3758 Example::

	3759 # use FollowedBy to match a label only if it is followed by a ':'

	3760 data_word = Word(alphas)

	3761 label = data_word + FollowedBy(':')

	3762 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=la bel).setParseAction(' '.join))

	3763

	3764 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()

	3765 prints::

	3766 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

	3767 """

	3768 def __init__( self, expr ):

	3769 super(FollowedBy,self).__init__(expr)

	3770 self.mayReturnEmpty = True

	3771

	3772 def parseImpl( self, instring, loc, doActions=True ):

	3773 self.expr.tryParse( instring, loc )

	3774 return loc, []

	3775

	3776

	3777 class NotAny(ParseElementEnhance):

	3778 """

	3779 Lookahead to disallow matching with the given parse expression. C{NotAny}

	3780 does I{not} advance the parsing position within the input string, it only

	3781 verifies that the specified parse expression does I{not} match at the curren t

	3782 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAn y}

	3783 always returns a null token list. May be constructed using the '~' operator .

	3784

	3785 Example::

	3786

	3787 """

	3788 def __init__( self, expr ):

	3789 super(NotAny,self).__init__(expr)

	3790 #~ self.leaveWhitespace()

	3791 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs

	3792 self.mayReturnEmpty = True

	3793 self.errmsg = "Found unwanted token, "+_ustr(self.expr)

	3794

	3795 def parseImpl( self, instring, loc, doActions=True ):

	3796 if self.expr.canParseNext(instring, loc):

	3797 raise ParseException(instring, loc, self.errmsg, self)

	3798 return loc, []

	3799

	3800 def __str__( self ):

	3801 if hasattr(self,"name"):

	3802 return self.name

	3803

	3804 if self.strRepr is None:

	3805 self.strRepr = "~{" + _ustr(self.expr) + "}"

	3806

	3807 return self.strRepr

	3808

	3809 class _MultipleMatch(ParseElementEnhance):

	3810 def __init__( self, expr, stopOn=None):

	3811 super(_MultipleMatch, self).__init__(expr)

	3812 self.saveAsList = True

	3813 ender = stopOn

	3814 if isinstance(ender, basestring):

	3815 ender = ParserElement._literalStringClass(ender)

	3816 self.not_ender = ~ender if ender is not None else None

	3817

	3818 def parseImpl( self, instring, loc, doActions=True ):

	3819 self_expr_parse = self.expr._parse

	3820 self_skip_ignorables = self._skipIgnorables

	3821 check_ender = self.not_ender is not None

	3822 if check_ender:

	3823 try_not_ender = self.not_ender.tryParse

	3824

	3825 # must be at least one (but first see if we are the stopOn sentinel;

	3826 # if so, fail)

	3827 if check_ender:

	3828 try_not_ender(instring, loc)

	3829 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=Fa lse )

	3830 try:

	3831 hasIgnoreExprs = (not not self.ignoreExprs)

	3832 while 1:

	3833 if check_ender:

	3834 try_not_ender(instring, loc)

	3835 if hasIgnoreExprs:

	3836 preloc = self_skip_ignorables( instring, loc )

	3837 else:

	3838 preloc = loc

	3839 loc, tmptokens = self_expr_parse( instring, preloc, doActions )

	3840 if tmptokens or tmptokens.haskeys():

	3841 tokens += tmptokens

	3842 except (ParseException,IndexError):

	3843 pass

	3844

	3845 return loc, tokens

	3846

	3847 class OneOrMore(_MultipleMatch):

	3848 """

	3849 Repetition of one or more of the given expression.

	3850

	3851 Parameters:

	3852 - expr - expression that must match one or more times

	3853 - stopOn - (default=C{None}) - expression for a terminating sentinel

	3854 (only required if the sentinel would ordinarily match the repetition

	3855 expression)

	3856

	3857 Example::

	3858 data_word = Word(alphas)

	3859 label = data_word + FollowedBy(':')

	3860 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseA ction(' '.join))

	3861

	3862 text = "shape: SQUARE posn: upper left color: BLACK"

	3863 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

	3864

	3865 # use stopOn attribute for OneOrMore to avoid reading label string as pa rt of the data

	3866 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=la bel).setParseAction(' '.join))

	3867 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', ' SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

	3868

	3869 # could also be written as

	3870 (attr_expr * (1,)).parseString(text).pprint()

	3871 """

	3872

	3873 def __str__( self ):

	3874 if hasattr(self,"name"):

	3875 return self.name

	3876

	3877 if self.strRepr is None:

	3878 self.strRepr = "{" + _ustr(self.expr) + "}..."

	3879

	3880 return self.strRepr

	3881

	3882 class ZeroOrMore(_MultipleMatch):

	3883 """

	3884 Optional repetition of zero or more of the given expression.

	3885

	3886 Parameters:

	3887 - expr - expression that must match zero or more times

	3888 - stopOn - (default=C{None}) - expression for a terminating sentinel

	3889 (only required if the sentinel would ordinarily match the repetition

	3890 expression)

	3891

	3892 Example: similar to L{OneOrMore}

	3893 """

	3894 def __init__( self, expr, stopOn=None):

	3895 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)

	3896 self.mayReturnEmpty = True

	3897

	3898 def parseImpl( self, instring, loc, doActions=True ):

	3899 try:

	3900 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)

	3901 except (ParseException,IndexError):

	3902 return loc, []

	3903

	3904 def __str__( self ):

	3905 if hasattr(self,"name"):

	3906 return self.name

	3907

	3908 if self.strRepr is None:

	3909 self.strRepr = "[" + _ustr(self.expr) + "]..."

	3910

	3911 return self.strRepr

	3912

	3913 class _NullToken(object):

	3914 def __bool__(self):

	3915 return False

	3916 __nonzero__ = __bool__

	3917 def __str__(self):

	3918 return ""

	3919

	3920 _optionalNotMatched = _NullToken()

	3921 class Optional(ParseElementEnhance):

	3922 """

	3923 Optional matching of the given expression.

	3924

	3925 Parameters:

	3926 - expr - expression that must match zero or more times

	3927 - default (optional) - value to be returned if the optional expression is n ot found.

	3928

	3929 Example::

	3930 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

	3931 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))

	3932 zip.runTests('''

	3933 # traditional ZIP code

	3934 12345

	3935

	3936 # ZIP+4 form

	3937 12101-0001

	3938

	3939 # invalid ZIP

	3940 98765-

	3941 ''')

	3942 prints::

	3943 # traditional ZIP code

	3944 12345

	3945 ['12345']

	3946

	3947 # ZIP+4 form

	3948 12101-0001

	3949 ['12101-0001']

	3950

	3951 # invalid ZIP

	3952 98765-

	3953 ^

	3954 FAIL: Expected end of text (at char 5), (line:1, col:6)

	3955 """

	3956 def __init__( self, expr, default=_optionalNotMatched ):

	3957 super(Optional,self).__init__( expr, savelist=False )

	3958 self.saveAsList = self.expr.saveAsList

	3959 self.defaultValue = default

	3960 self.mayReturnEmpty = True

	3961

	3962 def parseImpl( self, instring, loc, doActions=True ):

	3963 try:

	3964 loc, tokens = self.expr._parse( instring, loc, doActions, callPrePar se=False )

	3965 except (ParseException,IndexError):

	3966 if self.defaultValue is not _optionalNotMatched:

	3967 if self.expr.resultsName:

	3968 tokens = ParseResults([ self.defaultValue ])

	3969 tokens[self.expr.resultsName] = self.defaultValue

	3970 else:

	3971 tokens = [ self.defaultValue ]

	3972 else:

	3973 tokens = []

	3974 return loc, tokens

	3975

	3976 def __str__( self ):

	3977 if hasattr(self,"name"):

	3978 return self.name

	3979

	3980 if self.strRepr is None:

	3981 self.strRepr = "[" + _ustr(self.expr) + "]"

	3982

	3983 return self.strRepr

	3984

	3985 class SkipTo(ParseElementEnhance):

	3986 """

	3987 Token for skipping over all undefined text until the matched expression is f ound.

	3988

	3989 Parameters:

	3990 - expr - target expression marking the end of the data to be skipped

	3991 - include - (default=C{False}) if True, the target expression is also parse d

	3992 (the skipped text and target expression are returned as a 2-element li st).

	3993 - ignore - (default=C{None}) used to define grammars (typically quoted stri ngs and

	3994 comments) that might contain false matches to the target expression

	3995 - failOn - (default=C{None}) define expressions that are not allowed to be

	3996 included in the skipped test; if found before the target expression is found,

	3997 the SkipTo is not a match

	3998

	3999 Example::

	4000 report = '''

	4001 Outstanding Issues Report - 1 Jan 2000

	4002

	4003 # \| Severity \| Description \| Days Open

	4004 -----+----------+-------------------------------------------+------- ----

	4005 101 \| Critical \| Intermittent system crash \| 6

	4006 94 \| Cosmetic \| Spelling error on Login ('log\|n') \| 14

	4007 79 \| Minor \| System slow when running too many reports \| 47

	4008 '''

	4009 integer = Word(nums)

	4010 SEP = Suppress('\|')

	4011 # use SkipTo to simply match everything up until the next SEP

	4012 # - ignore quoted strings, so that a '\|' character inside a quoted strin g does not match

	4013 # - parse action will call token.strip() for each matched token, i.e., t he description body

	4014 string_data = SkipTo(SEP, ignore=quotedString)

	4015 string_data.setParseAction(tokenMap(str.strip))

	4016 ticket_expr = (integer("issue_num") + SEP

	4017 + string_data("sev") + SEP

	4018 + string_data("desc") + SEP

	4019 + integer("days_open"))

	4020

	4021 for tkt in ticket_expr.searchString(report):

	4022 print tkt.dump()

	4023 prints::

	4024 ['101', 'Critical', 'Intermittent system crash', '6']

	4025 - days_open: 6

	4026 - desc: Intermittent system crash

	4027 - issue_num: 101

	4028 - sev: Critical

	4029 ['94', 'Cosmetic', "Spelling error on Login ('log\|n')", '14']

	4030 - days_open: 14

	4031 - desc: Spelling error on Login ('log\|n')

	4032 - issue_num: 94

	4033 - sev: Cosmetic

	4034 ['79', 'Minor', 'System slow when running too many reports', '47']

	4035 - days_open: 47

	4036 - desc: System slow when running too many reports

	4037 - issue_num: 79

	4038 - sev: Minor

	4039 """

	4040 def __init__( self, other, include=False, ignore=None, failOn=None ):

	4041 super( SkipTo, self ).__init__( other )

	4042 self.ignoreExpr = ignore

	4043 self.mayReturnEmpty = True

	4044 self.mayIndexError = False

	4045 self.includeMatch = include

	4046 self.asList = False

	4047 if isinstance(failOn, basestring):

	4048 self.failOn = ParserElement._literalStringClass(failOn)

	4049 else:

	4050 self.failOn = failOn

	4051 self.errmsg = "No match found for "+_ustr(self.expr)

	4052

	4053 def parseImpl( self, instring, loc, doActions=True ):

	4054 startloc = loc

	4055 instrlen = len(instring)

	4056 expr = self.expr

	4057 expr_parse = self.expr._parse

	4058 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is no t None else None

	4059 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr i s not None else None

	4060

	4061 tmploc = loc

	4062 while tmploc <= instrlen:

	4063 if self_failOn_canParseNext is not None:

	4064 # break if failOn expression matches

	4065 if self_failOn_canParseNext(instring, tmploc):

	4066 break

	4067

	4068 if self_ignoreExpr_tryParse is not None:

	4069 # advance past ignore expressions

	4070 while 1:

	4071 try:

	4072 tmploc = self_ignoreExpr_tryParse(instring, tmploc)

	4073 except ParseBaseException:

	4074 break

	4075

	4076 try:

	4077 expr_parse(instring, tmploc, doActions=False, callPreParse=False )

	4078 except (ParseException, IndexError):

	4079 # no match, advance loc in string

	4080 tmploc += 1

	4081 else:

	4082 # matched skipto expr, done

	4083 break

	4084

	4085 else:

	4086 # ran off the end of the input string without matching skipto expr, fail

	4087 raise ParseException(instring, loc, self.errmsg, self)

	4088

	4089 # build up return values

	4090 loc = tmploc

	4091 skiptext = instring[startloc:loc]

	4092 skipresult = ParseResults(skiptext)

	4093

	4094 if self.includeMatch:

	4095 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)

	4096 skipresult += mat

	4097

	4098 return loc, skipresult

	4099

	4100 class Forward(ParseElementEnhance):

	4101 """

	4102 Forward declaration of an expression to be defined later -

	4103 used for recursive grammars, such as algebraic infix notation.

	4104 When the expression is known, it is assigned to the C{Forward} variable usin g the '<<' operator.

	4105

	4106 Note: take care when assigning to C{Forward} not to overlook precedence of o perators.

	4107 Specifically, '\|' has a lower precedence than '<<', so that::

	4108 fwdExpr << a \| b \| c

	4109 will actually be evaluated as::

	4110 (fwdExpr << a) \| b \| c

	4111 thereby leaving b and c out as parseable alternatives. It is recommended th at you

	4112 explicitly group the values inserted into the C{Forward}::

	4113 fwdExpr << (a \| b \| c)

	4114 Converting to use the '<<=' operator instead will avoid this problem.

	4115

	4116 See L{ParseResults.pprint} for an example of a recursive parser created usin g

	4117 C{Forward}.

	4118 """

	4119 def __init__( self, other=None ):

	4120 super(Forward,self).__init__( other, savelist=False )

	4121

	4122 def __lshift__( self, other ):

	4123 if isinstance( other, basestring ):

	4124 other = ParserElement._literalStringClass(other)

	4125 self.expr = other

	4126 self.strRepr = None

	4127 self.mayIndexError = self.expr.mayIndexError

	4128 self.mayReturnEmpty = self.expr.mayReturnEmpty

	4129 self.setWhitespaceChars( self.expr.whiteChars )

	4130 self.skipWhitespace = self.expr.skipWhitespace

	4131 self.saveAsList = self.expr.saveAsList

	4132 self.ignoreExprs.extend(self.expr.ignoreExprs)

	4133 return self

	4134

	4135 def __ilshift__(self, other):

	4136 return self << other

	4137

	4138 def leaveWhitespace( self ):

	4139 self.skipWhitespace = False

	4140 return self

	4141

	4142 def streamline( self ):

	4143 if not self.streamlined:

	4144 self.streamlined = True

	4145 if self.expr is not None:

	4146 self.expr.streamline()

	4147 return self

	4148

	4149 def validate( self, validateTrace=[] ):

	4150 if self not in validateTrace:

	4151 tmp = validateTrace[:]+[self]

	4152 if self.expr is not None:

	4153 self.expr.validate(tmp)

	4154 self.checkRecursion([])

	4155

	4156 def __str__( self ):

	4157 if hasattr(self,"name"):

	4158 return self.name

	4159 return self.__class__.__name__ + ": ..."

	4160

	4161 # stubbed out for now - creates awful memory and perf issues

	4162 self._revertClass = self.__class__

	4163 self.__class__ = _ForwardNoRecurse

	4164 try:

	4165 if self.expr is not None:

	4166 retString = _ustr(self.expr)

	4167 else:

	4168 retString = "None"

	4169 finally:

	4170 self.__class__ = self._revertClass

	4171 return self.__class__.__name__ + ": " + retString

	4172

	4173 def copy(self):

	4174 if self.expr is not None:

	4175 return super(Forward,self).copy()

	4176 else:

	4177 ret = Forward()

	4178 ret <<= self

	4179 return ret

	4180

	4181 class _ForwardNoRecurse(Forward):

	4182 def __str__( self ):

	4183 return "..."

	4184

	4185 class TokenConverter(ParseElementEnhance):

	4186 """

	4187 Abstract subclass of C{ParseExpression}, for converting parsed results.

	4188 """

	4189 def __init__( self, expr, savelist=False ):

	4190 super(TokenConverter,self).__init__( expr )#, savelist )

	4191 self.saveAsList = False

	4192

	4193 class Combine(TokenConverter):

	4194 """

	4195 Converter to concatenate all matching tokens to a single string.

	4196 By default, the matching patterns must also be contiguous in the input strin g;

	4197 this can be disabled by specifying C{'adjacent=False'} in the constructor.

	4198

	4199 Example::

	4200 real = Word(nums) + '.' + Word(nums)

	4201 print(real.parseString('3.1416')) # -> ['3', '.', '1416']

	4202 # will also erroneously match the following

	4203 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']

	4204

	4205 real = Combine(Word(nums) + '.' + Word(nums))

	4206 print(real.parseString('3.1416')) # -> ['3.1416']

	4207 # no match when there are internal spaces

	4208 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)

	4209 """

	4210 def __init__( self, expr, joinString="", adjacent=True ):

	4211 super(Combine,self).__init__( expr )

	4212 # suppress whitespace-stripping in contained parse expressions, but re-e nable it on the Combine itself

	4213 if adjacent:

	4214 self.leaveWhitespace()

	4215 self.adjacent = adjacent

	4216 self.skipWhitespace = True

	4217 self.joinString = joinString

	4218 self.callPreparse = True

	4219

	4220 def ignore( self, other ):

	4221 if self.adjacent:

	4222 ParserElement.ignore(self, other)

	4223 else:

	4224 super( Combine, self).ignore( other )

	4225 return self

	4226

	4227 def postParse( self, instring, loc, tokenlist ):

	4228 retToks = tokenlist.copy()

	4229 del retToks[:]

	4230 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinStrin g)) ], modal=self.modalResults)

	4231

	4232 if self.resultsName and retToks.haskeys():

	4233 return [ retToks ]

	4234 else:

	4235 return retToks

	4236

	4237 class Group(TokenConverter):

	4238 """

	4239 Converter to return the matched tokens as a list - useful for returning toke ns of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.

	4240

	4241 Example::

	4242 ident = Word(alphas)

	4243 num = Word(nums)

	4244 term = ident \| num

	4245 func = ident + Optional(delimitedList(term))

	4246 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']

	4247

	4248 func = ident + Group(Optional(delimitedList(term)))

	4249 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]

	4250 """

	4251 def __init__( self, expr ):

	4252 super(Group,self).__init__( expr )

	4253 self.saveAsList = True

	4254

	4255 def postParse( self, instring, loc, tokenlist ):

	4256 return [ tokenlist ]

	4257

	4258 class Dict(TokenConverter):

	4259 """

	4260 Converter to return a repetitive expression as a list, but also as a diction ary.

	4261 Each element can also be referenced using the first token in the expression as its key.

	4262 Useful for tabular report scraping when the first column can be used as a it em key.

	4263

	4264 Example::

	4265 data_word = Word(alphas)

	4266 label = data_word + FollowedBy(':')

	4267 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseA ction(' '.join))

	4268

	4269 text = "shape: SQUARE posn: upper left color: light blue texture: burlap "

	4270 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label). setParseAction(' '.join))

	4271

	4272 # print attributes as plain groups

	4273 print(OneOrMore(attr_expr).parseString(text).dump())

	4274

	4275 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names

	4276 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)

	4277 print(result.dump())

	4278

	4279 # access named fields as dict entries, or output as dict

	4280 print(result['shape'])

	4281 print(result.asDict())

	4282 prints::

	4283 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'textur e', 'burlap']

	4284

	4285 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], [ 'texture', 'burlap']]

	4286 - color: light blue

	4287 - posn: upper left

	4288 - shape: SQUARE

	4289 - texture: burlap

	4290 SQUARE

	4291 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shap e': 'SQUARE'}

	4292 See more examples at L{ParseResults} of accessing fields by results name.

	4293 """

	4294 def __init__( self, expr ):

	4295 super(Dict,self).__init__( expr )

	4296 self.saveAsList = True

	4297

	4298 def postParse( self, instring, loc, tokenlist ):

	4299 for i,tok in enumerate(tokenlist):

	4300 if len(tok) == 0:

	4301 continue

	4302 ikey = tok[0]

	4303 if isinstance(ikey,int):

	4304 ikey = _ustr(tok[0]).strip()

	4305 if len(tok)==1:

	4306 tokenlist[ikey] = _ParseResultsWithOffset("",i)

	4307 elif len(tok)==2 and not isinstance(tok[1],ParseResults):

	4308 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)

	4309 else:

	4310 dictvalue = tok.copy() #ParseResults(i)

	4311 del dictvalue[0]

	4312 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):

	4313 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)

	4314 else:

	4315 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)

	4316

	4317 if self.resultsName:

	4318 return [ tokenlist ]

	4319 else:

	4320 return tokenlist

	4321

	4322

	4323 class Suppress(TokenConverter):

	4324 """

	4325 Converter for ignoring the results of a parsed expression.

	4326

	4327 Example::

	4328 source = "a, b, c,d"

	4329 wd = Word(alphas)

	4330 wd_list1 = wd + ZeroOrMore(',' + wd)

	4331 print(wd_list1.parseString(source))

	4332

	4333 # often, delimiters that are useful during parsing are just in the

	4334 # way afterward - use Suppress to keep them out of the parsed output

	4335 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)

	4336 print(wd_list2.parseString(source))

	4337 prints::

	4338 ['a', ',', 'b', ',', 'c', ',', 'd']

	4339 ['a', 'b', 'c', 'd']

	4340 (See also L{delimitedList}.)

	4341 """

	4342 def postParse( self, instring, loc, tokenlist ):

	4343 return []

	4344

	4345 def suppress( self ):

	4346 return self

	4347

	4348

	4349 class OnlyOnce(object):

	4350 """

	4351 Wrapper for parse actions, to ensure they are only called once.

	4352 """

	4353 def __init__(self, methodCall):

	4354 self.callable = _trim_arity(methodCall)

	4355 self.called = False

	4356 def __call__(self,s,l,t):

	4357 if not self.called:

	4358 results = self.callable(s,l,t)

	4359 self.called = True

	4360 return results

	4361 raise ParseException(s,l,"")

	4362 def reset(self):

	4363 self.called = False

	4364

	4365 def traceParseAction(f):

	4366 """

	4367 Decorator for debugging parse actions.

	4368

	4369 When the parse action is called, this decorator will print C{">> entering I{ method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})" .}

	4370 When the parse action completes, the decorator will print C{"<<"} followed b y the returned value, or any exception that the parse action raised.

	4371

	4372 Example::

	4373 wd = Word(alphas)

	4374

	4375 @traceParseAction

	4376 def remove_duplicate_chars(tokens):

	4377 return ''.join(sorted(set(''.join(tokens)))

	4378

	4379 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)

	4380 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))

	4381 prints::

	4382 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

	4383 <<leaving remove_duplicate_chars (ret: 'dfjkls')

	4384 ['dfjkls']

	4385 """

	4386 f = _trim_arity(f)

	4387 def z(*paArgs):

	4388 thisFunc = f.__name__

	4389 s,l,t = paArgs[-3:]

	4390 if len(paArgs)>3:

	4391 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc

	4392 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line (l,s),l,t) )

	4393 try:

	4394 ret = f(*paArgs)

	4395 except Exception as exc:

	4396 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )

	4397 raise

	4398 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )

	4399 return ret

	4400 try:

	4401 z.__name__ = f.__name__

	4402 except AttributeError:

	4403 pass

	4404 return z

	4405

	4406 #

	4407 # global helpers

	4408 #

	4409 def delimitedList( expr, delim=",", combine=False ):

	4410 """

	4411 Helper to define a delimited list of expressions - the delimiter defaults to ','.

	4412 By default, the list elements and delimiters can have intervening whitespace , and

	4413 comments, but this can be overridden by passing C{combine=True} in the const ructor.

	4414 If C{combine} is set to C{True}, the matching tokens are returned as a singl e token

	4415 string, with the delimiters included; otherwise, the matching tokens are ret urned

	4416 as a list of tokens, with the delimiters suppressed.

	4417

	4418 Example::

	4419 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'c c']

	4420 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB :CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

	4421 """

	4422 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."

	4423 if combine:

	4424 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)

	4425 else:

	4426 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)

	4427

	4428 def countedArray( expr, intExpr=None ):

	4429 """

	4430 Helper to define a counted list of expressions.

	4431 This helper defines a pattern of the form::

	4432 integer expr expr expr...

	4433 where the leading integer tells how many expr expressions follow.

	4434 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.

	4435

	4436 If C{intExpr} is specified, it should be a pyparsing expression that produce s an integer value.

	4437

	4438 Example::

	4439 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']

	4440

	4441 # in this parser, the leading integer value is given in binary,

	4442 # '10' indicating that 2 values are in the array

	4443 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))

	4444 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']

	4445 """

	4446 arrayExpr = Forward()

	4447 def countFieldParseAction(s,l,t):

	4448 n = t[0]

	4449 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))

	4450 return []

	4451 if intExpr is None:

	4452 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))

	4453 else:

	4454 intExpr = intExpr.copy()

	4455 intExpr.setName("arrayLen")

	4456 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)

	4457 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')

	4458

	4459 def _flatten(L):

	4460 ret = []

	4461 for i in L:

	4462 if isinstance(i,list):

	4463 ret.extend(_flatten(i))

	4464 else:

	4465 ret.append(i)

	4466 return ret

	4467

	4468 def matchPreviousLiteral(expr):

	4469 """

	4470 Helper to define an expression that is indirectly defined from

	4471 the tokens matched in a previous expression, that is, it looks

	4472 for a 'repeat' of a previous expression. For example::

	4473 first = Word(nums)

	4474 second = matchPreviousLiteral(first)

	4475 matchExpr = first + ":" + second

	4476 will match C{"1:1"}, but not C{"1:2"}. Because this matches a

	4477 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.

	4478 If this is not desired, use C{matchPreviousExpr}.

	4479 Do I{not} use with packrat parsing enabled.

	4480 """

	4481 rep = Forward()

	4482 def copyTokenToRepeater(s,l,t):

	4483 if t:

	4484 if len(t) == 1:

	4485 rep << t[0]

	4486 else:

	4487 # flatten t tokens

	4488 tflat = _flatten(t.asList())

	4489 rep << And(Literal(tt) for tt in tflat)

	4490 else:

	4491 rep << Empty()

	4492 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

	4493 rep.setName('(prev) ' + _ustr(expr))

	4494 return rep

	4495

	4496 def matchPreviousExpr(expr):

	4497 """

	4498 Helper to define an expression that is indirectly defined from

	4499 the tokens matched in a previous expression, that is, it looks

	4500 for a 'repeat' of a previous expression. For example::

	4501 first = Word(nums)

	4502 second = matchPreviousExpr(first)

	4503 matchExpr = first + ":" + second

	4504 will match C{"1:1"}, but not C{"1:2"}. Because this matches by

	4505 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};

	4506 the expressions are evaluated first, and then compared, so

	4507 C{"1"} is compared with C{"10"}.

	4508 Do I{not} use with packrat parsing enabled.

	4509 """

	4510 rep = Forward()

	4511 e2 = expr.copy()

	4512 rep <<= e2

	4513 def copyTokenToRepeater(s,l,t):

	4514 matchTokens = _flatten(t.asList())

	4515 def mustMatchTheseTokens(s,l,t):

	4516 theseTokens = _flatten(t.asList())

	4517 if theseTokens != matchTokens:

	4518 raise ParseException("",0,"")

	4519 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )

	4520 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

	4521 rep.setName('(prev) ' + _ustr(expr))

	4522 return rep

	4523

	4524 def _escapeRegexRangeChars(s):

	4525 #~ escape these chars: ^-]

	4526 for c in r"\^-]":

	4527 s = s.replace(c,_bslash+c)

	4528 s = s.replace("\n",r"\n")

	4529 s = s.replace("\t",r"\t")

	4530 return _ustr(s)

	4531

	4532 def oneOf( strs, caseless=False, useRegex=True ):

	4533 """

	4534 Helper to quickly define a set of alternative Literals, and makes sure to do

	4535 longest-first testing when there is a conflict, regardless of the input orde r,

	4536 but returns a C{L{MatchFirst}} for best performance.

	4537

	4538 Parameters:

	4539 - strs - a string of space-delimited literals, or a collection of string li terals

	4540 - caseless - (default=C{False}) - treat all literals as caseless

	4541 - useRegex - (default=C{True}) - as an optimization, will generate a Regex

	4542 object; otherwise, will generate a C{MatchFirst} object (if C{caseless =True}, or

	4543 if creating a C{Regex} raises an exception)

	4544

	4545 Example::

	4546 comp_oper = oneOf("< = > <= >= !=")

	4547 var = Word(alphas)

	4548 number = Word(nums)

	4549 term = var \| number

	4550 comparison_expr = term + comp_oper + term

	4551 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))

	4552 prints::

	4553 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12 ']]

	4554 """

	4555 if caseless:

	4556 isequal = ( lambda a,b: a.upper() == b.upper() )

	4557 masks = ( lambda a,b: b.upper().startswith(a.upper()) )

	4558 parseElementClass = CaselessLiteral

	4559 else:

	4560 isequal = ( lambda a,b: a == b )

	4561 masks = ( lambda a,b: b.startswith(a) )

	4562 parseElementClass = Literal

	4563

	4564 symbols = []

	4565 if isinstance(strs,basestring):

	4566 symbols = strs.split()

	4567 elif isinstance(strs, collections.Iterable):

	4568 symbols = list(strs)

	4569 else:

	4570 warnings.warn("Invalid argument to oneOf, expected string or iterable",

	4571 SyntaxWarning, stacklevel=2)

	4572 if not symbols:

	4573 return NoMatch()

	4574

	4575 i = 0

	4576 while i < len(symbols)-1:

	4577 cur = symbols[i]

	4578 for j,other in enumerate(symbols[i+1:]):

	4579 if ( isequal(other, cur) ):

	4580 del symbols[i+j+1]

	4581 break

	4582 elif ( masks(cur, other) ):

	4583 del symbols[i+j+1]

	4584 symbols.insert(i,other)

	4585 cur = other

	4586 break

	4587 else:

	4588 i += 1

	4589

	4590 if not caseless and useRegex:

	4591 #~ print (strs,"->", "\|".join( [ _escapeRegexChars(sym) for sym in symbo ls] ))

	4592 try:

	4593 if len(symbols)==len("".join(symbols)):

	4594 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for s ym in symbols) ).setName(' \| '.join(symbols))

	4595 else:

	4596 return Regex( "\|".join(re.escape(sym) for sym in symbols) ).setN ame(' \| '.join(symbols))

	4597 except Exception:

	4598 warnings.warn("Exception creating Regex for oneOf, building MatchFir st",

	4599 SyntaxWarning, stacklevel=2)

	4600

	4601

	4602 # last resort, just use MatchFirst

	4603 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' \| '.j oin(symbols))

	4604

	4605 def dictOf( key, value ):

	4606 """

	4607 Helper to easily and clearly define a dictionary by specifying the respectiv e patterns

	4608 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMor e}}, and C{L{Group}} tokens

	4609 in the proper order. The key pattern can include delimiting markers or punc tuation,

	4610 as long as they are suppressed, thereby leaving the significant key text. T he value

	4611 pattern can include named results, so that the C{Dict} results can include n amed token

	4612 fields.

	4613

	4614 Example::

	4615 text = "shape: SQUARE posn: upper left color: light blue texture: burlap "

	4616 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label). setParseAction(' '.join))

	4617 print(OneOrMore(attr_expr).parseString(text).dump())

	4618

	4619 attr_label = label

	4620 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParse Action(' '.join)

	4621

	4622 # similar to Dict, but simpler call format

	4623 result = dictOf(attr_label, attr_value).parseString(text)

	4624 print(result.dump())

	4625 print(result['shape'])

	4626 print(result.shape) # object attribute access works too

	4627 print(result.asDict())

	4628 prints::

	4629 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], [ 'texture', 'burlap']]

	4630 - color: light blue

	4631 - posn: upper left

	4632 - shape: SQUARE

	4633 - texture: burlap

	4634 SQUARE

	4635 SQUARE

	4636 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'textur e': 'burlap'}

	4637 """

	4638 return Dict( ZeroOrMore( Group ( key + value ) ) )

	4639

	4640 def originalTextFor(expr, asString=True):

	4641 """

	4642 Helper to return the original, untokenized text for a given expression. Use ful to

	4643 restore the parsed fields of an HTML start tag into the raw tag text itself, or to

	4644 revert separate tokens with intervening whitespace back to the original matc hing

	4645 input text. By default, returns astring containing the original parsed text.

	4646

	4647 If the optional C{asString} argument is passed as C{False}, then the return value is a

	4648 C{L{ParseResults}} containing any results names that were originally matched , and a

	4649 single token containing the original matched text from the input string. So if

	4650 the expression passed to C{L{originalTextFor}} contains expressions with def ined

	4651 results names, you must set C{asString} to C{False} if you want to preserve those

	4652 results name values.

	4653

	4654 Example::

	4655 src = "this is test <b> bold <i>text</i> </b> normal text "

	4656 for tag in ("b","i"):

	4657 opener,closer = makeHTMLTags(tag)

	4658 patt = originalTextFor(opener + SkipTo(closer) + closer)

	4659 print(patt.searchString(src)[0])

	4660 prints::

	4661 ['<b> bold <i>text</i> </b>']

	4662 ['<i>text</i>']

	4663 """

	4664 locMarker = Empty().setParseAction(lambda s,loc,t: loc)

	4665 endlocMarker = locMarker.copy()

	4666 endlocMarker.callPreparse = False

	4667 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_en d")

	4668 if asString:

	4669 extractText = lambda s,l,t: s[t._original_start:t._original_end]

	4670 else:

	4671 def extractText(s,l,t):

	4672 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]

	4673 matchExpr.setParseAction(extractText)

	4674 matchExpr.ignoreExprs = expr.ignoreExprs

	4675 return matchExpr

	4676

	4677 def ungroup(expr):

	4678 """

	4679 Helper to undo pyparsing's default grouping of And expressions, even

	4680 if all but one are non-empty.

	4681 """

	4682 return TokenConverter(expr).setParseAction(lambda t:t[0])

	4683

	4684 def locatedExpr(expr):

	4685 """

	4686 Helper to decorate a returned token with its starting and ending locations i n the input string.

	4687 This helper adds the following results names:

	4688 - locn_start = location where matched expression begins

	4689 - locn_end = location where matched expression ends

	4690 - value = the actual parsed results

	4691

	4692 Be careful if the input text contains C{<TAB>} characters, you may want to c all

	4693 C{L{ParserElement.parseWithTabs}}

	4694

	4695 Example::

	4696 wd = Word(alphas)

	4697 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222") :

	4698 print(match)

	4699 prints::

	4700 [[0, 'ljsdf', 5]]

	4701 [[8, 'lksdjjf', 15]]

	4702 [[18, 'lkkjj', 23]]

	4703 """

	4704 locator = Empty().setParseAction(lambda s,l,t: l)

	4705 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhi tespace()("locn_end"))

	4706

	4707

	4708 # convenience constants for positional expressions

	4709 empty = Empty().setName("empty")

	4710 lineStart = LineStart().setName("lineStart")

	4711 lineEnd = LineEnd().setName("lineEnd")

	4712 stringStart = StringStart().setName("stringStart")

	4713 stringEnd = StringEnd().setName("stringEnd")

	4714

	4715 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])

	4716 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:uni chr(int(t[0].lstrip(r'\0x'),16)))

	4717 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0 ][1:],8)))

	4718 _singleChar = _escapedPunc \| _escapedHexChar \| _escapedOctChar \| Word(printables , excludeChars=r'\]', exact=1) \| Regex(r"\w", re.UNICODE)

	4719 _charRange = Group(_singleChar + Suppress("-") + _singleChar)

	4720 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange \| _singleChar ) ).setResultsName("body") + "]"

	4721

	4722 def srange(s):

	4723 r"""

	4724 Helper to easily define string ranges for use in Word construction. Borrows

	4725 syntax from regexp '[]' string range definitions::

	4726 srange("[0-9]") -> "0123456789"

	4727 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

	4728 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

	4729 The input string must be enclosed in []'s, and the returned string is the ex panded

	4730 character set joined into a single string.

	4731 The values enclosed in the []'s may be:

	4732 - a single character

	4733 - an escaped character with a leading backslash (such as C{\-} or C{\]})

	4734 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'! '} character)

	4735 (C{\0x##} is also supported for backwards compatibility)

	4736 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{ '!'} character)

	4737 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)

	4738 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)

	4739 """

	4740 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unich r(c) for c in range(ord(p[0]),ord(p[1])+1))

	4741 try:

	4742 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s) .body)

	4743 except Exception:

	4744 return ""

	4745

	4746 def matchOnlyAtCol(n):

	4747 """

	4748 Helper method for defining parse actions that require matching at a specific

	4749 column in the input text.

	4750 """

	4751 def verifyCol(strg,locn,toks):

	4752 if col(locn,strg) != n:

	4753 raise ParseException(strg,locn,"matched token not at column %d" % n)

	4754 return verifyCol

	4755

	4756 def replaceWith(replStr):

	4757 """

	4758 Helper method for common parse actions that simply return a literal value. Especially

	4759 useful when used with C{L{transformString<ParserElement.transformString>}()} .

	4760

	4761 Example::

	4762 num = Word(nums).setParseAction(lambda toks: int(toks[0]))

	4763 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))

	4764 term = na \| num

	4765

	4766 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]

	4767 """

	4768 return lambda s,l,t: [replStr]

	4769

	4770 def removeQuotes(s,l,t):

	4771 """

	4772 Helper parse action for removing quotation marks from parsed quoted strings.

	4773

	4774 Example::

	4775 # by default, quotation marks are included in parsed results

	4776 quotedString.parseString("'Now is the Winter of our Discontent'") # -> [ "'Now is the Winter of our Discontent'"]

	4777

	4778 # use removeQuotes to strip quotation marks from parsed results

	4779 quotedString.setParseAction(removeQuotes)

	4780 quotedString.parseString("'Now is the Winter of our Discontent'") # -> [ "Now is the Winter of our Discontent"]

	4781 """

	4782 return t[0][1:-1]

	4783

	4784 def tokenMap(func, *args):

	4785 """

	4786 Helper to define a parse action by mapping a function to all elements of a P arseResults list.If any additional

	4787 args are passed, they are forwarded to the given function as additional argu ments after

	4788 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the

	4789 parsed data to an integer using base 16.

	4790

	4791 Example (compare the last to example in L{ParserElement.transformString}::

	4792 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))

	4793 hex_ints.runTests('''

	4794 00 11 22 aa FF 0a 0d 1a

	4795 ''')

	4796

	4797 upperword = Word(alphas).setParseAction(tokenMap(str.upper))

	4798 OneOrMore(upperword).runTests('''

	4799 my kingdom for a horse

	4800 ''')

	4801

	4802 wd = Word(alphas).setParseAction(tokenMap(str.title))

	4803 OneOrMore(wd).setParseAction(' '.join).runTests('''

	4804 now is the winter of our discontent made glorious summer by this sun of york

	4805 ''')

	4806 prints::

	4807 00 11 22 aa FF 0a 0d 1a

	4808 [0, 17, 34, 170, 255, 10, 13, 26]

	4809

	4810 my kingdom for a horse

	4811 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

	4812

	4813 now is the winter of our discontent made glorious summer by this sun of york

	4814 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun O f York']

	4815 """

	4816 def pa(s,l,t):

	4817 return [func(tokn, *args) for tokn in t]

	4818

	4819 try:

	4820 func_name = getattr(func, '__name__',

	4821 getattr(func, '__class__').__name__)

	4822 except Exception:

	4823 func_name = str(func)

	4824 pa.__name__ = func_name

	4825

	4826 return pa

	4827

	4828 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())

	4829 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""

	4830

	4831 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())

	4832 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""

	4833

	4834 def _makeTags(tagStr, xml):

	4835 """Internal helper to construct opening and closing tag expressions, given a tag name"""

	4836 if isinstance(tagStr,basestring):

	4837 resname = tagStr

	4838 tagStr = Keyword(tagStr, caseless=not xml)

	4839 else:

	4840 resname = tagStr.name

	4841

	4842 tagAttrName = Word(alphas,alphanums+"_-:")

	4843 if (xml):

	4844 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )

	4845 openTag = Suppress("<") + tagStr("tag") + \

	4846 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValu e ))) + \

	4847 Optional("/",default=[False]).setResultsName("empty").setParseAc tion(lambda s,l,t:t[0]=='/') + Suppress(">")

	4848 else:

	4849 printablesLessRAbrack = "".join(c for c in printables if c not in ">")

	4850 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) \| Word (printablesLessRAbrack)

	4851 openTag = Suppress("<") + tagStr("tag") + \

	4852 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens ) + \

	4853 Optional( Suppress("=") + tagAttrValue ) ))) + \

	4854 Optional("/",default=[False]).setResultsName("empty").setParseAc tion(lambda s,l,t:t[0]=='/') + Suppress(">")

	4855 closeTag = Combine(_L("</") + tagStr + ">")

	4856

	4857 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").ti tle().split())).setName("<%s>" % resname)

	4858 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").ti tle().split())).setName("</%s>" % resname)

	4859 openTag.tag = resname

	4860 closeTag.tag = resname

	4861 return openTag, closeTag

	4862

	4863 def makeHTMLTags(tagStr):

	4864 """

	4865 Helper to construct opening and closing tag expressions for HTML, given a ta g name. Matches

	4866 tags in either upper or lower case, attributes with namespaces and with quot ed or unquoted values.

	4867

	4868 Example::

	4869 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">p yparsing</a> wiki page</td>'

	4870 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple

	4871 a,a_end = makeHTMLTags("A")

	4872 link_expr = a + SkipTo(a_end)("link_text") + a_end

	4873

	4874 for link in link_expr.searchString(text):

	4875 # attributes in the <A> tag (like "href" shown here) are also access ible as named results

	4876 print(link.link_text, '->', link.href)

	4877 prints::

	4878 pyparsing -> http://pyparsing.wikispaces.com

	4879 """

	4880 return _makeTags( tagStr, False )

	4881

	4882 def makeXMLTags(tagStr):

	4883 """

	4884 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches

	4885 tags only in the given upper/lower case.

	4886

	4887 Example: similar to L{makeHTMLTags}

	4888 """

	4889 return _makeTags( tagStr, True )

	4890

	4891 def withAttribute(args,*attrDict):

	4892 """

	4893 Helper to create a validating parse action to be used with start tags create d

	4894 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualif y a starting tag

	4895 with a required attribute value, to avoid false matches on common tags such as

	4896 C{<TD>} or C{<DIV>}.

	4897

	4898 Call C{withAttribute} with a series of attribute names and values. Specify t he list

	4899 of filter attributes names and values as:

	4900 - keyword arguments, as in C{(align="right")}, or

	4901 - as an explicit dict with C{**} operator, when an attribute name is also a Python

	4902 reserved word, as in C{**{"class":"Customer", "align":"right"}}

	4903 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:ali gn","right") )

	4904 For attribute names with a namespace prefix, you must use the second form. Attribute

	4905 names are matched insensitive to upper/lower case.

	4906

	4907 If just testing for C{class} (with or without a namespace), use C{L{withClas s}}.

	4908

	4909 To verify that the attribute exists, but without specifying a value, pass

	4910 C{withAttribute.ANY_VALUE} as the value.

	4911

	4912 Example::

	4913 html = '''

	4914 <div>

	4915 Some text

	4916 <div type="grid">1 4 0 1 0</div>

	4917 <div type="graph">1,3 2,3 1,1</div>

	4918 <div>this has no type</div>

	4919 </div>

	4920

	4921 '''

	4922 div,div_end = makeHTMLTags("div")

	4923

	4924 # only match div tag having a type attribute with value "grid"

	4925 div_grid = div().setParseAction(withAttribute(type="grid"))

	4926 grid_expr = div_grid + SkipTo(div \| div_end)("body")

	4927 for grid_header in grid_expr.searchString(html):

	4928 print(grid_header.body)

	4929

	4930 # construct a match with any div tag having a type attribute, regardless of the value

	4931 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY _VALUE))

	4932 div_expr = div_any_type + SkipTo(div \| div_end)("body")

	4933 for div_header in div_expr.searchString(html):

	4934 print(div_header.body)

	4935 prints::

	4936 1 4 0 1 0

	4937

	4938 1 4 0 1 0

	4939 1,3 2,3 1,1

	4940 """

	4941 if args:

	4942 attrs = args[:]

	4943 else:

	4944 attrs = attrDict.items()

	4945 attrs = [(k,v) for k,v in attrs]

	4946 def pa(s,l,tokens):

	4947 for attrName,attrValue in attrs:

	4948 if attrName not in tokens:

	4949 raise ParseException(s,l,"no matching attribute " + attrName)

	4950 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attr Value:

	4951 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %

	4952 (attrName, tokens[attrName], attrVal ue))

	4953 return pa

	4954 withAttribute.ANY_VALUE = object()

	4955

	4956 def withClass(classname, namespace=''):

	4957 """

	4958 Simplified version of C{L{withAttribute}} when matching on a div class - mad e

	4959 difficult because C{class} is a reserved word in Python.

	4960

	4961 Example::

	4962 html = '''

	4963 <div>

	4964 Some text

	4965 <div class="grid">1 4 0 1 0</div>

	4966 <div class="graph">1,3 2,3 1,1</div>

	4967 <div>this <div> has no class</div>

	4968 </div>

	4969

	4970 '''

	4971 div,div_end = makeHTMLTags("div")

	4972 div_grid = div().setParseAction(withClass("grid"))

	4973

	4974 grid_expr = div_grid + SkipTo(div \| div_end)("body")

	4975 for grid_header in grid_expr.searchString(html):

	4976 print(grid_header.body)

	4977

	4978 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))

	4979 div_expr = div_any_type + SkipTo(div \| div_end)("body")

	4980 for div_header in div_expr.searchString(html):

	4981 print(div_header.body)

	4982 prints::

	4983 1 4 0 1 0

	4984

	4985 1 4 0 1 0

	4986 1,3 2,3 1,1

	4987 """

	4988 classattr = "%s:class" % namespace if namespace else "class"

	4989 return withAttribute(**{classattr : classname})

	4990

	4991 opAssoc = _Constants()

	4992 opAssoc.LEFT = object()

	4993 opAssoc.RIGHT = object()

	4994

	4995 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):

	4996 """

	4997 Helper method for constructing grammars of expressions made up of

	4998 operators working in a precedence hierarchy. Operators may be unary or

	4999 binary, left- or right-associative. Parse actions can also be attached

	5000 to operator expressions. The generated parser will also recognize the use

	5001 of parentheses to override operator precedences (see example below).

	5002

	5003 Note: if you define a deep operator list, you may see performance issues

	5004 when using infixNotation. See L{ParserElement.enablePackrat} for a

	5005 mechanism to potentially improve your parser performance.

	5006

	5007 Parameters:

	5008 - baseExpr - expression representing the most basic element for the nested

	5009 - opList - list of tuples, one for each operator precedence level in the

	5010 expression grammar; each tuple is of the form

	5011 (opExpr, numTerms, rightLeftAssoc, parseAction), where:

	5012 - opExpr is the pyparsing expression for the operator;

	5013 may also be a string, which will be converted to a Literal;

	5014 if numTerms is 3, opExpr is a tuple of two expressions, for the

	5015 two operators separating the 3 terms

	5016 - numTerms is the number of terms for this operator (must

	5017 be 1, 2, or 3)

	5018 - rightLeftAssoc is the indicator whether the operator is

	5019 right or left associative, using the pyparsing-defined

	5020 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.

	5021 - parseAction is the parse action to be associated with

	5022 expressions matching this operator expression (the

	5023 parse action tuple member may be omitted)

	5024 - lpar - expression for matching left-parentheses (default=C{Suppress('(')} )

	5025 - rpar - expression for matching right-parentheses (default=C{Suppress(')') })

	5026

	5027 Example::

	5028 # simple example of four-function arithmetic with ints and variable name s

	5029 integer = pyparsing_common.signed_integer

	5030 varname = pyparsing_common.identifier

	5031

	5032 arith_expr = infixNotation(integer \| varname,

	5033 [

	5034 ('-', 1, opAssoc.RIGHT),

	5035 (oneOf('* /'), 2, opAssoc.LEFT),

	5036 (oneOf('+ -'), 2, opAssoc.LEFT),

	5037 ])

	5038

	5039 arith_expr.runTests('''

	5040 5+3*6

	5041 (5+3)*6

	5042 -2--11

	5043 ''', fullDump=False)

	5044 prints::

	5045 5+3*6

	5046 [[5, '+', [3, '*', 6]]]

	5047

	5048 (5+3)*6

	5049 [[[5, '+', 3], '*', 6]]

	5050

	5051 -2--11

	5052 [[['-', 2], '-', ['-', 11]]]

	5053 """

	5054 ret = Forward()

	5055 lastExpr = baseExpr \| ( lpar + ret + rpar )

	5056 for i,operDef in enumerate(opList):

	5057 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]

	5058 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr

	5059 if arity == 3:

	5060 if opExpr is None or len(opExpr) != 2:

	5061 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")

	5062 opExpr1, opExpr2 = opExpr

	5063 thisExpr = Forward().setName(termName)

	5064 if rightLeftAssoc == opAssoc.LEFT:

	5065 if arity == 1:

	5066 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + On eOrMore( opExpr ) )

	5067 elif arity == 2:

	5068 if opExpr is not None:

	5069 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group ( lastExpr + OneOrMore( opExpr + lastExpr ) )

	5070 else:

	5071 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )

	5072 elif arity == 3:

	5073 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \

	5074 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + las tExpr )

	5075 else:

	5076 raise ValueError("operator must be unary (1), binary (2), or ter nary (3)")

	5077 elif rightLeftAssoc == opAssoc.RIGHT:

	5078 if arity == 1:

	5079 # try to avoid LR with this extra test

	5080 if not isinstance(opExpr, Optional):

	5081 opExpr = Optional(opExpr)

	5082 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )

	5083 elif arity == 2:

	5084 if opExpr is not None:

	5085 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group ( lastExpr + OneOrMore( opExpr + thisExpr ) )

	5086 else:

	5087 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExp r + OneOrMore( thisExpr ) )

	5088 elif arity == 3:

	5089 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \

	5090 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thi sExpr )

	5091 else:

	5092 raise ValueError("operator must be unary (1), binary (2), or ter nary (3)")

	5093 else:

	5094 raise ValueError("operator must indicate right or left associativity ")

	5095 if pa:

	5096 matchExpr.setParseAction( pa )

	5097 thisExpr <<= ( matchExpr.setName(termName) \| lastExpr )

	5098 lastExpr = thisExpr

	5099 ret <<= lastExpr

	5100 return ret

	5101

	5102 operatorPrecedence = infixNotation

	5103 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""

	5104

	5105 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]\|(?:"")\|(?:\\(?:[^x]\|x[0-9a-fA-F ]+)))*')+'"').setName("string enclosed in double quotes")

	5106 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]\|(?:'')\|(?:\\(?:[^x]\|x[0-9a-fA-F ]+)))*")+"'").setName("string enclosed in single quotes")

	5107 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]\|(?:"")\|(?:\\(?:[^x]\|x[0-9a-fA-F]+) ))*')+'"'\|

	5108 Regex(r"'(?:[^'\n\r\\]\|(?:'')\|(?:\\(?:[^x]\|x[0-9a-fA-F]+) ))*")+"'").setName("quotedString using single or double quotes")

	5109 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string l iteral")

	5110

	5111 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop y()):

	5112 """

	5113 Helper method for defining nested lists enclosed in opening and closing

	5114 delimiters ("(" and ")" are the default).

	5115

	5116 Parameters:

	5117 - opener - opening character for a nested list (default=C{"("}); can also b e a pyparsing expression

	5118 - closer - closing character for a nested list (default=C{")"}); can also b e a pyparsing expression

	5119 - content - expression for items within the nested lists (default=C{None})

	5120 - ignoreExpr - expression for ignoring opening and closing delimiters (defa ult=C{quotedString})

	5121

	5122 If an expression is not provided for the content argument, the nested

	5123 expression will capture all whitespace-delimited content between delimiters

	5124 as a list of separate values.

	5125

	5126 Use the C{ignoreExpr} argument to define expressions that may contain

	5127 opening or closing characters that should not be treated as opening

	5128 or closing characters for nesting, such as quotedString or a comment

	5129 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirs t}}.

	5130 The default is L{quotedString}, but if no expressions are to be ignored,

	5131 then pass C{None} for this argument.

	5132

	5133 Example::

	5134 data_type = oneOf("void int short long char float double")

	5135 decl_data_type = Combine(data_type + Optional(Word('*')))

	5136 ident = Word(alphas+'_', alphanums+'_')

	5137 number = pyparsing_common.number

	5138 arg = Group(decl_data_type + ident)

	5139 LPAR,RPAR = map(Suppress, "()")

	5140

	5141 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString \| cStyleCommen t))

	5142

	5143 c_function = (decl_data_type("type")

	5144 + ident("name")

	5145 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR

	5146 + code_body("body"))

	5147 c_function.ignore(cStyleComment)

	5148

	5149 source_code = '''

	5150 int is_odd(int x) {

	5151 return (x%2);

	5152 }

	5153

	5154 int dec_to_hex(char hchar) {

	5155 if (hchar >= '0' && hchar <= '9') {

	5156 return (ord(hchar)-ord('0'));

	5157 } else {

	5158 return (10+ord(hchar)-ord('A'));

	5159 }

	5160 }

	5161 '''

	5162 for func in c_function.searchString(source_code):

	5163 print("%(name)s (%(type)s) args: %(args)s" % func)

	5164

	5165 prints::

	5166 is_odd (int) args: [['int', 'x']]

	5167 dec_to_hex (int) args: [['char', 'hchar']]

	5168 """

	5169 if opener == closer:

	5170 raise ValueError("opening and closing strings cannot be the same")

	5171 if content is None:

	5172 if isinstance(opener,basestring) and isinstance(closer,basestring):

	5173 if len(opener) == 1 and len(closer)==1:

	5174 if ignoreExpr is not None:

	5175 content = (Combine(OneOrMore(~ignoreExpr +

	5176 CharsNotIn(opener+closer+ParserElement.DEFAU LT_WHITE_CHARS,exact=1))

	5177 ).setParseAction(lambda t:t[0].strip()))

	5178 else:

	5179 content = (empty.copy()+CharsNotIn(opener+closer+ParserEleme nt.DEFAULT_WHITE_CHARS

	5180 ).setParseAction(lambda t:t[0].strip()))

	5181 else:

	5182 if ignoreExpr is not None:

	5183 content = (Combine(OneOrMore(~ignoreExpr +

	5184 ~Literal(opener) + ~Literal(closer) +

	5185 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS ,exact=1))

	5186 ).setParseAction(lambda t:t[0].strip()))

	5187 else:

	5188 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(clo ser) +

	5189 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS ,exact=1))

	5190 ).setParseAction(lambda t:t[0].strip()))

	5191 else:

	5192 raise ValueError("opening and closing arguments must be strings if n o content expression is given")

	5193 ret = Forward()

	5194 if ignoreExpr is not None:

	5195 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr \| ret \| content ) + Suppress(closer) )

	5196 else:

	5197 ret <<= Group( Suppress(opener) + ZeroOrMore( ret \| content ) + Suppres s(closer) )

	5198 ret.setName('nested %s%s expression' % (opener,closer))

	5199 return ret

	5200

	5201 def indentedBlock(blockStatementExpr, indentStack, indent=True):

	5202 """

	5203 Helper method for defining space-delimited indentation blocks, such as

	5204 those used to define block statements in Python source code.

	5205

	5206 Parameters:

	5207 - blockStatementExpr - expression defining syntax of statement that

	5208 is repeated within the indented block

	5209 - indentStack - list created by caller to manage indentation stack

	5210 (multiple statementWithIndentedBlock expressions within a single gra mmar

	5211 should share a common indentStack)

	5212 - indent - boolean indicating whether block must be indented beyond the

	5213 the current level; set to False for block of left-most statements

	5214 (default=C{True})

	5215

	5216 A valid block must contain at least one C{blockStatement}.

	5217

	5218 Example::

	5219 data = '''

	5220 def A(z):

	5221 A1

	5222 B = 100

	5223 G = A2

	5224 A2

	5225 A3

	5226 B

	5227 def BB(a,b,c):

	5228 BB1

	5229 def BBA():

	5230 bba1

	5231 bba2

	5232 bba3

	5233 C

	5234 D

	5235 def spam(x,y):

	5236 def eggs(z):

	5237 pass

	5238 '''

	5239

	5240

	5241 indentStack = [1]

	5242 stmt = Forward()

	5243

	5244 identifier = Word(alphas, alphanums)

	5245 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(id entifier) ) + ")" ) + ":")

	5246 func_body = indentedBlock(stmt, indentStack)

	5247 funcDef = Group( funcDecl + func_body )

	5248

	5249 rvalue = Forward()

	5250 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ") ")

	5251 rvalue << (funcCall \| identifier \| Word(nums))

	5252 assignment = Group(identifier + "=" + rvalue)

	5253 stmt << ( funcDef \| assignment \| identifier )

	5254

	5255 module_body = OneOrMore(stmt)

	5256

	5257 parseTree = module_body.parseString(data)

	5258 parseTree.pprint()

	5259 prints::

	5260 [['def',

	5261 'A',

	5262 ['(', 'z', ')'],

	5263 ':',

	5264 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

	5265 'B',

	5266 ['def',

	5267 'BB',

	5268 ['(', 'a', 'b', 'c', ')'],

	5269 ':',

	5270 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3 ']]]]]],

	5271 'C',

	5272 'D',

	5273 ['def',

	5274 'spam',

	5275 ['(', 'x', 'y', ')'],

	5276 ':',

	5277 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

	5278 """

	5279 def checkPeerIndent(s,l,t):

	5280 if l >= len(s): return

	5281 curCol = col(l,s)

	5282 if curCol != indentStack[-1]:

	5283 if curCol > indentStack[-1]:

	5284 raise ParseFatalException(s,l,"illegal nesting")

	5285 raise ParseException(s,l,"not a peer entry")

	5286

	5287 def checkSubIndent(s,l,t):

	5288 curCol = col(l,s)

	5289 if curCol > indentStack[-1]:

	5290 indentStack.append( curCol )

	5291 else:

	5292 raise ParseException(s,l,"not a subentry")

	5293

	5294 def checkUnindent(s,l,t):

	5295 if l >= len(s): return

	5296 curCol = col(l,s)

	5297 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStac k[-2]):

	5298 raise ParseException(s,l,"not an unindent")

	5299 indentStack.pop()

	5300

	5301 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())

	5302 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT' )

	5303 PEER = Empty().setParseAction(checkPeerIndent).setName('')

	5304 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')

	5305 if indent:

	5306 smExpr = Group( Optional(NL) +

	5307 #~ FollowedBy(blockStatementExpr) +

	5308 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)

	5309 else:

	5310 smExpr = Group( Optional(NL) +

	5311 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )

	5312 blockStatementExpr.ignore(_bslash + LineEnd())

	5313 return smExpr.setName('indented block')

	5314

	5315 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

	5316 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

	5317

	5318 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any t ag'))

	5319 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))

	5320 commonHTMLEntity = Regex('&(?P<entity>' + '\|'.join(_htmlEntityMap.keys()) +");") .setName("common HTML entity")

	5321 def replaceHTMLEntity(t):

	5322 """Helper parser action to replace common HTML entities with their special c haracters"""

	5323 return _htmlEntityMap.get(t.entity)

	5324

	5325 # it's easy to get these comment structures wrong - they're very common, so may as well make them available

	5326 cStyleComment = Combine(Regex(r"/\(?:[^]\|\(?!/))") + '*/').setName("C style comment")

	5327 "Comment of the form C{/* ... */}"

	5328

	5329 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")

	5330 "Comment of the form C{<!-- ... -->}"

	5331

	5332 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")

	5333 dblSlashComment = Regex(r"//(?:\\\n\|[^\n])*").setName("// comment")

	5334 "Comment of the form C{// ... (to end of line)}"

	5335

	5336 cppStyleComment = Combine(Regex(r"/\(?:[^]\|\(?!/))") + '*/'\| dblSlashComment ).setName("C++ style comment")

	5337 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"

	5338

	5339 javaStyleComment = cppStyleComment

	5340 "Same as C{L{cppStyleComment}}"

	5341

	5342 pythonStyleComment = Regex(r"#.*").setName("Python style comment")

	5343 "Comment of the form C{# ... (to end of line)}"

	5344

	5345 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +

	5346 Optional( Word(" \t") +

	5347 ~Literal(",") + ~LineEnd() ) ) ).str eamline().setName("commaItem")

	5348 commaSeparatedList = delimitedList( Optional( quotedString.copy() \| _commasepite m, default="") ).setName("commaSeparatedList")

	5349 """(Deprecated) Predefined expression of 1 or more printable words or quoted str ings, separated by commas.

	5350 This expression is deprecated in favor of L{pyparsing_common.comma_separated_ list}."""

	5351

	5352 # some other useful expressions - using lower-case class name since we are reall y using this as a namespace

	5353 class pyparsing_common:

	5354 """

	5355 Here are some common low-level expressions that may be useful in jump-starti ng parser development:

	5356 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notatio n<sci_real>})

	5357 - common L{programming identifiers<identifier>}

	5358 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv 6_address>})

	5359 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}

	5360 - L{UUID<uuid>}

	5361 - L{comma-separated list<comma_separated_list>}

	5362 Parse actions:

	5363 - C{L{convertToInteger}}

	5364 - C{L{convertToFloat}}

	5365 - C{L{convertToDate}}

	5366 - C{L{convertToDatetime}}

	5367 - C{L{stripHTMLTags}}

	5368 - C{L{upcaseTokens}}

	5369 - C{L{downcaseTokens}}

	5370

	5371 Example::

	5372 pyparsing_common.number.runTests('''

	5373 # any int or real number, returned as the appropriate type

	5374 100

	5375 -100

	5376 +100

	5377 3.14159

	5378 6.02e23

	5379 1e-12

	5380 ''')

	5381

	5382 pyparsing_common.fnumber.runTests('''

	5383 # any int or real number, returned as float

	5384 100

	5385 -100

	5386 +100

	5387 3.14159

	5388 6.02e23

	5389 1e-12

	5390 ''')

	5391

	5392 pyparsing_common.hex_integer.runTests('''

	5393 # hex numbers

	5394 100

	5395 FF

	5396 ''')

	5397

	5398 pyparsing_common.fraction.runTests('''

	5399 # fractions

	5400 1/2

	5401 -3/4

	5402 ''')

	5403

	5404 pyparsing_common.mixed_integer.runTests('''

	5405 # mixed fractions

	5406 1

	5407 1/2

	5408 -3/4

	5409 1-3/4

	5410 ''')

	5411

	5412 import uuid

	5413 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

	5414 pyparsing_common.uuid.runTests('''

	5415 # uuid

	5416 12345678-1234-5678-1234-567812345678

	5417 ''')

	5418 prints::

	5419 # any int or real number, returned as the appropriate type

	5420 100

	5421 [100]

	5422

	5423 -100

	5424 [-100]

	5425

	5426 +100

	5427 [100]

	5428

	5429 3.14159

	5430 [3.14159]

	5431

	5432 6.02e23

	5433 [6.02e+23]

	5434

	5435 1e-12

	5436 [1e-12]

	5437

	5438 # any int or real number, returned as float

	5439 100

	5440 [100.0]

	5441

	5442 -100

	5443 [-100.0]

	5444

	5445 +100

	5446 [100.0]

	5447

	5448 3.14159

	5449 [3.14159]

	5450

	5451 6.02e23

	5452 [6.02e+23]

	5453

	5454 1e-12

	5455 [1e-12]

	5456

	5457 # hex numbers

	5458 100

	5459 [256]

	5460

	5461 FF

	5462 [255]

	5463

	5464 # fractions

	5465 1/2

	5466 [0.5]

	5467

	5468 -3/4

	5469 [-0.75]

	5470

	5471 # mixed fractions

	5472 1

	5473 [1]

	5474

	5475 1/2

	5476 [0.5]

	5477

	5478 -3/4

	5479 [-0.75]

	5480

	5481 1-3/4

	5482 [1.75]

	5483

	5484 # uuid

	5485 12345678-1234-5678-1234-567812345678

	5486 [UUID('12345678-1234-5678-1234-567812345678')]

	5487 """

	5488

	5489 convertToInteger = tokenMap(int)

	5490 """

	5491 Parse action for converting parsed integers to Python int

	5492 """

	5493

	5494 convertToFloat = tokenMap(float)

	5495 """

	5496 Parse action for converting parsed numbers to Python float

	5497 """

	5498

	5499 integer = Word(nums).setName("integer").setParseAction(convertToInteger)

	5500 """expression that parses an unsigned integer, returns an int"""

	5501

	5502 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(i nt,16))

	5503 """expression that parses a hexadecimal integer, returns an int"""

	5504

	5505 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction (convertToInteger)

	5506 """expression that parses an integer with optional leading sign, returns an int"""

	5507

	5508 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_i nteger().setParseAction(convertToFloat)).setName("fraction")

	5509 """fractional expression of an integer divided by an integer, returns a floa t"""

	5510 fraction.addParseAction(lambda t: t[0]/t[-1])

	5511

	5512 mixed_integer = (fraction \| signed_integer + Optional(Optional('-').suppress () + fraction)).setName("fraction or mixed integer-fraction")

	5513 """mixed integer of the form 'integer - fraction', with optional leading int eger, returns float"""

	5514 mixed_integer.addParseAction(sum)

	5515

	5516 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convert ToFloat)

	5517 """expression that parses a floating point number and returns a float"""

	5518

	5519 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+\|\.\d*([eE][+-]?\d+)?)').setName("re al number with scientific notation").setParseAction(convertToFloat)

	5520 """expression that parses a floating point number with optional scientific n otation and returns a float"""

	5521

	5522 # streamlining this expression makes the docs nicer-looking

	5523 number = (sci_real \| real \| signed_integer).streamline()

	5524 """any numeric expression, returns the corresponding Python type"""

	5525

	5526 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setPars eAction(convertToFloat)

	5527 """any int or real number, returned as float"""

	5528

	5529 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")

	5530 """typical code identifier (leading alpha or '_', followed by 0 or more alph as, nums, or '_')"""

	5531

	5532 ipv4_address = Regex(r'(25[0-5]\|2[0-4][0-9]\|1?[0-9]{1,2})(\.(25[0-5]\|2[0-4][ 0-9]\|1?[0-9]{1,2})){3}').setName("IPv4 address")

	5533 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"

	5534

	5535 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")

	5536 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")

	5537 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)(0,6)) + ":: " + Optional(_ipv6_part + (':' + _ipv6_part)(0,6))).setName("short IPv6 address ")

	5538 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_co mmon._ipv6_part.matches(tt)) < 8)

	5539 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address ")

	5540 ipv6_address = Combine((_full_ipv6_address \| _mixed_ipv6_address \| _short_ip v6_address).setName("IPv6 address")).setName("IPv6 address")

	5541 "IPv6 address (long, short, or mixed form)"

	5542

	5543 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2} ){4}').setName("MAC address")

	5544 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"

	5545

	5546 @staticmethod

	5547 def convertToDate(fmt="%Y-%m-%d"):

	5548 """

	5549 Helper to create a parse action for converting parsed date string to Pyt hon datetime.date

	5550

	5551 Params -

	5552 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"} )

	5553

	5554 Example::

	5555 date_expr = pyparsing_common.iso8601_date.copy()

	5556 date_expr.setParseAction(pyparsing_common.convertToDate())

	5557 print(date_expr.parseString("1999-12-31"))

	5558 prints::

	5559 [datetime.date(1999, 12, 31)]

	5560 """

	5561 def cvt_fn(s,l,t):

	5562 try:

	5563 return datetime.strptime(t[0], fmt).date()

	5564 except ValueError as ve:

	5565 raise ParseException(s, l, str(ve))

	5566 return cvt_fn

	5567

	5568 @staticmethod

	5569 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):

	5570 """

	5571 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime

	5572

	5573 Params -

	5574 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT% H:%M:%S.%f"})

	5575

	5576 Example::

	5577 dt_expr = pyparsing_common.iso8601_datetime.copy()

	5578 dt_expr.setParseAction(pyparsing_common.convertToDatetime())

	5579 print(dt_expr.parseString("1999-12-31T23:59:59.999"))

	5580 prints::

	5581 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]

	5582 """

	5583 def cvt_fn(s,l,t):

	5584 try:

	5585 return datetime.strptime(t[0], fmt)

	5586 except ValueError as ve:

	5587 raise ParseException(s, l, str(ve))

	5588 return cvt_fn

	5589

	5590 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))? )?').setName("ISO8601 date")

	5591 "ISO8601 date (C{yyyy-mm-dd})"

	5592

	5593 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ] (?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z\|[+-]\d\d:?\ d\d)?').setName("ISO8601 datetime")

	5594 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z\|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"

	5595

	5596 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName( "UUID")

	5597 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"

	5598

	5599 _html_stripper = anyOpenTag.suppress() \| anyCloseTag.suppress()

	5600 @staticmethod

	5601 def stripHTMLTags(s, l, tokens):

	5602 """

	5603 Parse action to remove HTML tags from web page HTML source

	5604

	5605 Example::

	5606 # strip HTML links from normal text

	5607 text = '<td>More info at the <a href="http://pyparsing.wikispaces.co m">pyparsing</a> wiki page</td>'

	5608 td,td_end = makeHTMLTags("TD")

	5609 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.str ipHTMLTags)("body") + td_end

	5610

	5611 print(table_text.parseString(text).body) # -> 'More info at the pypa rsing wiki page'

	5612 """

	5613 return pyparsing_common._html_stripper.transformString(tokens[0])

	5614

	5615 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printabl es, excludeChars=',')

	5616 + Optional( White(" \t") ) ) ).streamlin e().setName("commaItem")

	5617 comma_separated_list = delimitedList( Optional( quotedString.copy() \| _comma sepitem, default="") ).setName("comma separated list")

	5618 """Predefined expression of 1 or more printable words or quoted strings, sep arated by commas."""

	5619

	5620 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))

	5621 """Parse action to convert tokens to upper case."""

	5622

	5623 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))

	5624 """Parse action to convert tokens to lower case."""

	5625

	5626

	5627 if __name__ == "__main__":

	5628

	5629 selectToken = CaselessLiteral("select")

	5630 fromToken = CaselessLiteral("from")

	5631

	5632 ident = Word(alphas, alphanums + "_$")

	5633

	5634 columnName = delimitedList(ident, ".", combine=True).setParseAction(upca seTokens)

	5635 columnNameList = Group(delimitedList(columnName)).setName("columns")

	5636 columnSpec = ('*' \| columnNameList)

	5637

	5638 tableName = delimitedList(ident, ".", combine=True).setParseAction(upca seTokens)

	5639 tableNameList = Group(delimitedList(tableName)).setName("tables")

	5640

	5641 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")

	5642

	5643 # demo runTests method, including embedded comments in test string

	5644 simpleSQL.runTests("""

	5645 # '*' as column list and dotted table name

	5646 select * from SYS.XYZZY

	5647

	5648 # caseless match on "SELECT", and casts back to "select"

	5649 SELECT * from XYZZY, ABC

	5650

	5651 # list of column names, and mixed case SELECT keyword

	5652 Select AA,BB,CC from Sys.dual

	5653

	5654 # multiple tables

	5655 Select A, B, C from Sys.dual, Table2

	5656

	5657 # invalid SELECT keyword - should fail

	5658 Xelect A, B, C from Sys.dual

	5659

	5660 # incomplete command - should fail

	5661 Select

	5662

	5663 # invalid column name - should fail

	5664 Select ^^^ frox Sys.dual

	5665

	5666 """)

	5667

	5668 pyparsing_common.number.runTests("""

	5669 100

	5670 -100

	5671 +100

	5672 3.14159

	5673 6.02e23

	5674 1e-12

	5675 """)

	5676

	5677 # any int or real number, returned as float

	5678 pyparsing_common.fnumber.runTests("""

	5679 100

	5680 -100

	5681 +100

	5682 3.14159

	5683 6.02e23

	5684 1e-12

	5685 """)

	5686

	5687 pyparsing_common.hex_integer.runTests("""

	5688 100

	5689 FF

	5690 """)

	5691

	5692 import uuid

	5693 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

	5694 pyparsing_common.uuid.runTests("""

	5695 12345678-1234-5678-1234-567812345678

	5696 """)

OLD	NEW