OLD | NEW |
(Empty) | |
| 1 # module pyparsing.py |
| 2 # |
| 3 # Copyright (c) 2003-2016 Paul T. McGuire |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining |
| 6 # a copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, |
| 9 # distribute, sublicense, and/or sell copies of the Software, and to |
| 10 # permit persons to whom the Software is furnished to do so, subject to |
| 11 # the following conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be |
| 14 # included in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 # |
| 24 |
| 25 __doc__ = \ |
| 26 """ |
| 27 pyparsing module - Classes and methods to define and execute parsing grammars |
| 28 |
| 29 The pyparsing module is an alternative approach to creating and executing simple
grammars, |
| 30 vs. the traditional lex/yacc approach, or the use of regular expressions. With
pyparsing, you |
| 31 don't need to learn a new syntax for defining grammars or matching expressions -
the parsing module |
| 32 provides a library of classes that you use to construct the grammar directly in
Python. |
| 33 |
| 34 Here is a program to parse "Hello, World!" (or any greeting of the form |
| 35 C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And}
elements |
| 36 (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are au
to-converted to |
| 37 L{Literal} expressions):: |
| 38 |
| 39 from pyparsing import Word, alphas |
| 40 |
| 41 # define grammar of a greeting |
| 42 greet = Word(alphas) + "," + Word(alphas) + "!" |
| 43 |
| 44 hello = "Hello, World!" |
| 45 print (hello, "->", greet.parseString(hello)) |
| 46 |
| 47 The program outputs the following:: |
| 48 |
| 49 Hello, World! -> ['Hello', ',', 'World', '!'] |
| 50 |
| 51 The Python representation of the grammar is quite readable, owing to the self-ex
planatory |
| 52 class names, and the use of '+', '|' and '^' operators. |
| 53 |
| 54 The L{ParseResults} object returned from L{ParserElement.parseString<ParserEleme
nt.parseString>} can be accessed as a nested list, a dictionary, or an |
| 55 object with named attributes. |
| 56 |
| 57 The pyparsing module handles some of the problems that are typically vexing when
writing text parsers: |
| 58 - extra or missing whitespace (the above program will also handle "Hello,World!
", "Hello , World !", etc.) |
| 59 - quoted strings |
| 60 - embedded comments |
| 61 """ |
| 62 |
| 63 __version__ = "2.1.10" |
| 64 __versionTime__ = "07 Oct 2016 01:31 UTC" |
| 65 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" |
| 66 |
| 67 import string |
| 68 from weakref import ref as wkref |
| 69 import copy |
| 70 import sys |
| 71 import warnings |
| 72 import re |
| 73 import sre_constants |
| 74 import collections |
| 75 import pprint |
| 76 import traceback |
| 77 import types |
| 78 from datetime import datetime |
| 79 |
| 80 try: |
| 81 from _thread import RLock |
| 82 except ImportError: |
| 83 from threading import RLock |
| 84 |
| 85 try: |
| 86 from collections import OrderedDict as _OrderedDict |
| 87 except ImportError: |
| 88 try: |
| 89 from ordereddict import OrderedDict as _OrderedDict |
| 90 except ImportError: |
| 91 _OrderedDict = None |
| 92 |
| 93 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__
,__versionTime__ ) ) |
| 94 |
| 95 __all__ = [ |
| 96 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'E
ach', 'Empty', |
| 97 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart
', 'Literal', |
| 98 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', |
| 99 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression'
, 'ParseFatalException', |
| 100 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'Recurs
iveGrammarException', |
| 101 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConver
ter', |
| 102 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', |
| 103 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment
', 'col', |
| 104 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'db
lQuotedString', |
| 105 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnum
s', |
| 106 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', |
| 107 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPrev
iousLiteral', |
| 108 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence
', 'printables', |
| 109 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEn
tity', |
| 110 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', |
| 111 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribu
te', |
| 112 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'w
ithClass', |
| 113 'CloseMatch', 'tokenMap', 'pyparsing_common', |
| 114 ] |
| 115 |
| 116 system_version = tuple(sys.version_info)[:3] |
| 117 PY_3 = system_version[0] == 3 |
| 118 if PY_3: |
| 119 _MAX_INT = sys.maxsize |
| 120 basestring = str |
| 121 unichr = chr |
| 122 _ustr = str |
| 123 |
| 124 # build list of single arg builtins, that can be used as parse actions |
| 125 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all,
min, max] |
| 126 |
| 127 else: |
| 128 _MAX_INT = sys.maxint |
| 129 range = xrange |
| 130 |
| 131 def _ustr(obj): |
| 132 """Drop-in replacement for str(obj) that tries to be Unicode friendly. I
t first tries |
| 133 str(obj). If that fails with a UnicodeEncodeError, then it tries unic
ode(obj). It |
| 134 then < returns the unicode object | encodes it with the default encod
ing | ... >. |
| 135 """ |
| 136 if isinstance(obj,unicode): |
| 137 return obj |
| 138 |
| 139 try: |
| 140 # If this works, then _ustr(obj) has the same behaviour as str(obj),
so |
| 141 # it won't break any existing code. |
| 142 return str(obj) |
| 143 |
| 144 except UnicodeEncodeError: |
| 145 # Else encode it |
| 146 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefrepla
ce') |
| 147 xmlcharref = Regex('&#\d+;') |
| 148 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]
) |
| 149 return xmlcharref.transformString(ret) |
| 150 |
| 151 # build list of single arg builtins, tolerant of Python version, that can be
used as parse actions |
| 152 singleArgBuiltins = [] |
| 153 import __builtin__ |
| 154 for fname in "sum len sorted reversed list tuple set any all min max".split(
): |
| 155 try: |
| 156 singleArgBuiltins.append(getattr(__builtin__,fname)) |
| 157 except AttributeError: |
| 158 continue |
| 159 |
| 160 _generatorType = type((y for y in range(1))) |
| 161 |
| 162 def _xml_escape(data): |
| 163 """Escape &, <, >, ", ', etc. in a string of data.""" |
| 164 |
| 165 # ampersand must be replaced first |
| 166 from_symbols = '&><"\'' |
| 167 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) |
| 168 for from_,to_ in zip(from_symbols, to_symbols): |
| 169 data = data.replace(from_, to_) |
| 170 return data |
| 171 |
| 172 class _Constants(object): |
| 173 pass |
| 174 |
| 175 alphas = string.ascii_uppercase + string.ascii_lowercase |
| 176 nums = "0123456789" |
| 177 hexnums = nums + "ABCDEFabcdef" |
| 178 alphanums = alphas + nums |
| 179 _bslash = chr(92) |
| 180 printables = "".join(c for c in string.printable if c not in string.whitespace) |
| 181 |
| 182 class ParseBaseException(Exception): |
| 183 """base exception class for all parsing runtime exceptions""" |
| 184 # Performance tuning: we construct a *lot* of these, so keep this |
| 185 # constructor as small and fast as possible |
| 186 def __init__( self, pstr, loc=0, msg=None, elem=None ): |
| 187 self.loc = loc |
| 188 if msg is None: |
| 189 self.msg = pstr |
| 190 self.pstr = "" |
| 191 else: |
| 192 self.msg = msg |
| 193 self.pstr = pstr |
| 194 self.parserElement = elem |
| 195 self.args = (pstr, loc, msg) |
| 196 |
| 197 @classmethod |
| 198 def _from_exception(cls, pe): |
| 199 """ |
| 200 internal factory method to simplify creating one type of ParseException |
| 201 from another - avoids having __init__ signature conflicts among subclass
es |
| 202 """ |
| 203 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) |
| 204 |
| 205 def __getattr__( self, aname ): |
| 206 """supported attributes by name are: |
| 207 - lineno - returns the line number of the exception text |
| 208 - col - returns the column number of the exception text |
| 209 - line - returns the line containing the exception text |
| 210 """ |
| 211 if( aname == "lineno" ): |
| 212 return lineno( self.loc, self.pstr ) |
| 213 elif( aname in ("col", "column") ): |
| 214 return col( self.loc, self.pstr ) |
| 215 elif( aname == "line" ): |
| 216 return line( self.loc, self.pstr ) |
| 217 else: |
| 218 raise AttributeError(aname) |
| 219 |
| 220 def __str__( self ): |
| 221 return "%s (at char %d), (line:%d, col:%d)" % \ |
| 222 ( self.msg, self.loc, self.lineno, self.column ) |
| 223 def __repr__( self ): |
| 224 return _ustr(self) |
| 225 def markInputline( self, markerString = ">!<" ): |
| 226 """Extracts the exception line from the input string, and marks |
| 227 the location of the exception with a special symbol. |
| 228 """ |
| 229 line_str = self.line |
| 230 line_column = self.column - 1 |
| 231 if markerString: |
| 232 line_str = "".join((line_str[:line_column], |
| 233 markerString, line_str[line_column:])) |
| 234 return line_str.strip() |
| 235 def __dir__(self): |
| 236 return "lineno col line".split() + dir(type(self)) |
| 237 |
| 238 class ParseException(ParseBaseException): |
| 239 """ |
| 240 Exception thrown when parse expressions don't match class; |
| 241 supported attributes by name are: |
| 242 - lineno - returns the line number of the exception text |
| 243 - col - returns the column number of the exception text |
| 244 - line - returns the line containing the exception text |
| 245 |
| 246 Example:: |
| 247 try: |
| 248 Word(nums).setName("integer").parseString("ABC") |
| 249 except ParseException as pe: |
| 250 print(pe) |
| 251 print("column: {}".format(pe.col)) |
| 252 |
| 253 prints:: |
| 254 Expected integer (at char 0), (line:1, col:1) |
| 255 column: 1 |
| 256 """ |
| 257 pass |
| 258 |
| 259 class ParseFatalException(ParseBaseException): |
| 260 """user-throwable exception thrown when inconsistent parse content |
| 261 is found; stops all parsing immediately""" |
| 262 pass |
| 263 |
| 264 class ParseSyntaxException(ParseFatalException): |
| 265 """just like L{ParseFatalException}, but thrown internally when an |
| 266 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to
stop |
| 267 immediately because an unbacktrackable syntax error has been found""" |
| 268 pass |
| 269 |
| 270 #~ class ReparseException(ParseBaseException): |
| 271 #~ """Experimental class - parse actions can raise this exception to cause |
| 272 #~ pyparsing to reparse the input string: |
| 273 #~ - with a modified input string, and/or |
| 274 #~ - with a modified start location |
| 275 #~ Set the values of the ReparseException in the constructor, and raise t
he |
| 276 #~ exception in a parse action to cause pyparsing to use the new string/l
ocation. |
| 277 #~ Setting the values as None causes no change to be made. |
| 278 #~ """ |
| 279 #~ def __init_( self, newstring, restartLoc ): |
| 280 #~ self.newParseText = newstring |
| 281 #~ self.reparseLoc = restartLoc |
| 282 |
| 283 class RecursiveGrammarException(Exception): |
| 284 """exception thrown by L{ParserElement.validate} if the grammar could be imp
roperly recursive""" |
| 285 def __init__( self, parseElementList ): |
| 286 self.parseElementTrace = parseElementList |
| 287 |
| 288 def __str__( self ): |
| 289 return "RecursiveGrammarException: %s" % self.parseElementTrace |
| 290 |
| 291 class _ParseResultsWithOffset(object): |
| 292 def __init__(self,p1,p2): |
| 293 self.tup = (p1,p2) |
| 294 def __getitem__(self,i): |
| 295 return self.tup[i] |
| 296 def __repr__(self): |
| 297 return repr(self.tup[0]) |
| 298 def setOffset(self,i): |
| 299 self.tup = (self.tup[0],i) |
| 300 |
| 301 class ParseResults(object): |
| 302 """ |
| 303 Structured parse results, to provide multiple means of access to the parsed
data: |
| 304 - as a list (C{len(results)}) |
| 305 - by list index (C{results[0], results[1]}, etc.) |
| 306 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResults
Name}) |
| 307 |
| 308 Example:: |
| 309 integer = Word(nums) |
| 310 date_str = (integer.setResultsName("year") + '/' |
| 311 + integer.setResultsName("month") + '/' |
| 312 + integer.setResultsName("day")) |
| 313 # equivalent form: |
| 314 # date_str = integer("year") + '/' + integer("month") + '/' + integer("d
ay") |
| 315 |
| 316 # parseString returns a ParseResults object |
| 317 result = date_str.parseString("1999/12/31") |
| 318 |
| 319 def test(s, fn=repr): |
| 320 print("%s -> %s" % (s, fn(eval(s)))) |
| 321 test("list(result)") |
| 322 test("result[0]") |
| 323 test("result['month']") |
| 324 test("result.day") |
| 325 test("'month' in result") |
| 326 test("'minutes' in result") |
| 327 test("result.dump()", str) |
| 328 prints:: |
| 329 list(result) -> ['1999', '/', '12', '/', '31'] |
| 330 result[0] -> '1999' |
| 331 result['month'] -> '12' |
| 332 result.day -> '31' |
| 333 'month' in result -> True |
| 334 'minutes' in result -> False |
| 335 result.dump() -> ['1999', '/', '12', '/', '31'] |
| 336 - day: 31 |
| 337 - month: 12 |
| 338 - year: 1999 |
| 339 """ |
| 340 def __new__(cls, toklist=None, name=None, asList=True, modal=True ): |
| 341 if isinstance(toklist, cls): |
| 342 return toklist |
| 343 retobj = object.__new__(cls) |
| 344 retobj.__doinit = True |
| 345 return retobj |
| 346 |
| 347 # Performance tuning: we construct a *lot* of these, so keep this |
| 348 # constructor as small and fast as possible |
| 349 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinst
ance=isinstance ): |
| 350 if self.__doinit: |
| 351 self.__doinit = False |
| 352 self.__name = None |
| 353 self.__parent = None |
| 354 self.__accumNames = {} |
| 355 self.__asList = asList |
| 356 self.__modal = modal |
| 357 if toklist is None: |
| 358 toklist = [] |
| 359 if isinstance(toklist, list): |
| 360 self.__toklist = toklist[:] |
| 361 elif isinstance(toklist, _generatorType): |
| 362 self.__toklist = list(toklist) |
| 363 else: |
| 364 self.__toklist = [toklist] |
| 365 self.__tokdict = dict() |
| 366 |
| 367 if name is not None and name: |
| 368 if not modal: |
| 369 self.__accumNames[name] = 0 |
| 370 if isinstance(name,int): |
| 371 name = _ustr(name) # will always return a str, but use _ustr for
consistency |
| 372 self.__name = name |
| 373 if not (isinstance(toklist, (type(None), basestring, list)) and tokl
ist in (None,'',[])): |
| 374 if isinstance(toklist,basestring): |
| 375 toklist = [ toklist ] |
| 376 if asList: |
| 377 if isinstance(toklist,ParseResults): |
| 378 self[name] = _ParseResultsWithOffset(toklist.copy(),0) |
| 379 else: |
| 380 self[name] = _ParseResultsWithOffset(ParseResults(toklis
t[0]),0) |
| 381 self[name].__name = name |
| 382 else: |
| 383 try: |
| 384 self[name] = toklist[0] |
| 385 except (KeyError,TypeError,IndexError): |
| 386 self[name] = toklist |
| 387 |
| 388 def __getitem__( self, i ): |
| 389 if isinstance( i, (int,slice) ): |
| 390 return self.__toklist[i] |
| 391 else: |
| 392 if i not in self.__accumNames: |
| 393 return self.__tokdict[i][-1][0] |
| 394 else: |
| 395 return ParseResults([ v[0] for v in self.__tokdict[i] ]) |
| 396 |
| 397 def __setitem__( self, k, v, isinstance=isinstance ): |
| 398 if isinstance(v,_ParseResultsWithOffset): |
| 399 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] |
| 400 sub = v[0] |
| 401 elif isinstance(k,(int,slice)): |
| 402 self.__toklist[k] = v |
| 403 sub = v |
| 404 else: |
| 405 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWit
hOffset(v,0)] |
| 406 sub = v |
| 407 if isinstance(sub,ParseResults): |
| 408 sub.__parent = wkref(self) |
| 409 |
| 410 def __delitem__( self, i ): |
| 411 if isinstance(i,(int,slice)): |
| 412 mylen = len( self.__toklist ) |
| 413 del self.__toklist[i] |
| 414 |
| 415 # convert int to slice |
| 416 if isinstance(i, int): |
| 417 if i < 0: |
| 418 i += mylen |
| 419 i = slice(i, i+1) |
| 420 # get removed indices |
| 421 removed = list(range(*i.indices(mylen))) |
| 422 removed.reverse() |
| 423 # fixup indices in token dictionary |
| 424 for name,occurrences in self.__tokdict.items(): |
| 425 for j in removed: |
| 426 for k, (value, position) in enumerate(occurrences): |
| 427 occurrences[k] = _ParseResultsWithOffset(value, position
- (position > j)) |
| 428 else: |
| 429 del self.__tokdict[i] |
| 430 |
| 431 def __contains__( self, k ): |
| 432 return k in self.__tokdict |
| 433 |
| 434 def __len__( self ): return len( self.__toklist ) |
| 435 def __bool__(self): return ( not not self.__toklist ) |
| 436 __nonzero__ = __bool__ |
| 437 def __iter__( self ): return iter( self.__toklist ) |
| 438 def __reversed__( self ): return iter( self.__toklist[::-1] ) |
| 439 def _iterkeys( self ): |
| 440 if hasattr(self.__tokdict, "iterkeys"): |
| 441 return self.__tokdict.iterkeys() |
| 442 else: |
| 443 return iter(self.__tokdict) |
| 444 |
| 445 def _itervalues( self ): |
| 446 return (self[k] for k in self._iterkeys()) |
| 447 |
| 448 def _iteritems( self ): |
| 449 return ((k, self[k]) for k in self._iterkeys()) |
| 450 |
| 451 if PY_3: |
| 452 keys = _iterkeys |
| 453 """Returns an iterator of all named result keys (Python 3.x only).""" |
| 454 |
| 455 values = _itervalues |
| 456 """Returns an iterator of all named result values (Python 3.x only).""" |
| 457 |
| 458 items = _iteritems |
| 459 """Returns an iterator of all named result key-value tuples (Python 3.x
only).""" |
| 460 |
| 461 else: |
| 462 iterkeys = _iterkeys |
| 463 """Returns an iterator of all named result keys (Python 2.x only).""" |
| 464 |
| 465 itervalues = _itervalues |
| 466 """Returns an iterator of all named result values (Python 2.x only).""" |
| 467 |
| 468 iteritems = _iteritems |
| 469 """Returns an iterator of all named result key-value tuples (Python 2.x
only).""" |
| 470 |
| 471 def keys( self ): |
| 472 """Returns all named result keys (as a list in Python 2.x, as an ite
rator in Python 3.x).""" |
| 473 return list(self.iterkeys()) |
| 474 |
| 475 def values( self ): |
| 476 """Returns all named result values (as a list in Python 2.x, as an i
terator in Python 3.x).""" |
| 477 return list(self.itervalues()) |
| 478 |
| 479 def items( self ): |
| 480 """Returns all named result key-values (as a list of tuples in Pytho
n 2.x, as an iterator in Python 3.x).""" |
| 481 return list(self.iteritems()) |
| 482 |
| 483 def haskeys( self ): |
| 484 """Since keys() returns an iterator, this method is helpful in bypassing |
| 485 code that looks for the existence of any defined results names.""" |
| 486 return bool(self.__tokdict) |
| 487 |
| 488 def pop( self, *args, **kwargs): |
| 489 """ |
| 490 Removes and returns item at specified index (default=C{last}). |
| 491 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no |
| 492 argument or an integer argument, it will use C{list} semantics |
| 493 and pop tokens from the list of parsed tokens. If passed a |
| 494 non-integer argument (most likely a string), it will use C{dict} |
| 495 semantics and pop the corresponding value from any defined |
| 496 results names. A second default return value argument is |
| 497 supported, just as in C{dict.pop()}. |
| 498 |
| 499 Example:: |
| 500 def remove_first(tokens): |
| 501 tokens.pop(0) |
| 502 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12
3', '321'] |
| 503 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString
("0 123 321")) # -> ['123', '321'] |
| 504 |
| 505 label = Word(alphas) |
| 506 patt = label("LABEL") + OneOrMore(Word(nums)) |
| 507 print(patt.parseString("AAB 123 321").dump()) |
| 508 |
| 509 # Use pop() in a parse action to remove named result (note that corr
esponding value is not |
| 510 # removed from list form of results) |
| 511 def remove_LABEL(tokens): |
| 512 tokens.pop("LABEL") |
| 513 return tokens |
| 514 patt.addParseAction(remove_LABEL) |
| 515 print(patt.parseString("AAB 123 321").dump()) |
| 516 prints:: |
| 517 ['AAB', '123', '321'] |
| 518 - LABEL: AAB |
| 519 |
| 520 ['AAB', '123', '321'] |
| 521 """ |
| 522 if not args: |
| 523 args = [-1] |
| 524 for k,v in kwargs.items(): |
| 525 if k == 'default': |
| 526 args = (args[0], v) |
| 527 else: |
| 528 raise TypeError("pop() got an unexpected keyword argument '%s'"
% k) |
| 529 if (isinstance(args[0], int) or |
| 530 len(args) == 1 or |
| 531 args[0] in self): |
| 532 index = args[0] |
| 533 ret = self[index] |
| 534 del self[index] |
| 535 return ret |
| 536 else: |
| 537 defaultvalue = args[1] |
| 538 return defaultvalue |
| 539 |
| 540 def get(self, key, defaultValue=None): |
| 541 """ |
| 542 Returns named result matching the given key, or if there is no |
| 543 such name, then returns the given C{defaultValue} or C{None} if no |
| 544 C{defaultValue} is specified. |
| 545 |
| 546 Similar to C{dict.get()}. |
| 547 |
| 548 Example:: |
| 549 integer = Word(nums) |
| 550 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 551 |
| 552 result = date_str.parseString("1999/12/31") |
| 553 print(result.get("year")) # -> '1999' |
| 554 print(result.get("hour", "not specified")) # -> 'not specified' |
| 555 print(result.get("hour")) # -> None |
| 556 """ |
| 557 if key in self: |
| 558 return self[key] |
| 559 else: |
| 560 return defaultValue |
| 561 |
| 562 def insert( self, index, insStr ): |
| 563 """ |
| 564 Inserts new element at location index in the list of parsed tokens. |
| 565 |
| 566 Similar to C{list.insert()}. |
| 567 |
| 568 Example:: |
| 569 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12
3', '321'] |
| 570 |
| 571 # use a parse action to insert the parse location in the front of th
e parsed results |
| 572 def insert_locn(locn, tokens): |
| 573 tokens.insert(0, locn) |
| 574 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString(
"0 123 321")) # -> [0, '0', '123', '321'] |
| 575 """ |
| 576 self.__toklist.insert(index, insStr) |
| 577 # fixup indices in token dictionary |
| 578 for name,occurrences in self.__tokdict.items(): |
| 579 for k, (value, position) in enumerate(occurrences): |
| 580 occurrences[k] = _ParseResultsWithOffset(value, position + (posi
tion > index)) |
| 581 |
| 582 def append( self, item ): |
| 583 """ |
| 584 Add single element to end of ParseResults list of elements. |
| 585 |
| 586 Example:: |
| 587 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '12
3', '321'] |
| 588 |
| 589 # use a parse action to compute the sum of the parsed integers, and
add it to the end |
| 590 def append_sum(tokens): |
| 591 tokens.append(sum(map(int, tokens))) |
| 592 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("
0 123 321")) # -> ['0', '123', '321', 444] |
| 593 """ |
| 594 self.__toklist.append(item) |
| 595 |
| 596 def extend( self, itemseq ): |
| 597 """ |
| 598 Add sequence of elements to end of ParseResults list of elements. |
| 599 |
| 600 Example:: |
| 601 patt = OneOrMore(Word(alphas)) |
| 602 |
| 603 # use a parse action to append the reverse of the matched strings, t
o make a palindrome |
| 604 def make_palindrome(tokens): |
| 605 tokens.extend(reversed([t[::-1] for t in tokens])) |
| 606 return ''.join(tokens) |
| 607 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf
lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' |
| 608 """ |
| 609 if isinstance(itemseq, ParseResults): |
| 610 self += itemseq |
| 611 else: |
| 612 self.__toklist.extend(itemseq) |
| 613 |
| 614 def clear( self ): |
| 615 """ |
| 616 Clear all elements and results names. |
| 617 """ |
| 618 del self.__toklist[:] |
| 619 self.__tokdict.clear() |
| 620 |
| 621 def __getattr__( self, name ): |
| 622 try: |
| 623 return self[name] |
| 624 except KeyError: |
| 625 return "" |
| 626 |
| 627 if name in self.__tokdict: |
| 628 if name not in self.__accumNames: |
| 629 return self.__tokdict[name][-1][0] |
| 630 else: |
| 631 return ParseResults([ v[0] for v in self.__tokdict[name] ]) |
| 632 else: |
| 633 return "" |
| 634 |
| 635 def __add__( self, other ): |
| 636 ret = self.copy() |
| 637 ret += other |
| 638 return ret |
| 639 |
| 640 def __iadd__( self, other ): |
| 641 if other.__tokdict: |
| 642 offset = len(self.__toklist) |
| 643 addoffset = lambda a: offset if a<0 else a+offset |
| 644 otheritems = other.__tokdict.items() |
| 645 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1]))
) |
| 646 for (k,vlist) in otheritems for v in vlist] |
| 647 for k,v in otherdictitems: |
| 648 self[k] = v |
| 649 if isinstance(v[0],ParseResults): |
| 650 v[0].__parent = wkref(self) |
| 651 |
| 652 self.__toklist += other.__toklist |
| 653 self.__accumNames.update( other.__accumNames ) |
| 654 return self |
| 655 |
| 656 def __radd__(self, other): |
| 657 if isinstance(other,int) and other == 0: |
| 658 # useful for merging many ParseResults using sum() builtin |
| 659 return self.copy() |
| 660 else: |
| 661 # this may raise a TypeError - so be it |
| 662 return other + self |
| 663 |
| 664 def __repr__( self ): |
| 665 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) |
| 666 |
| 667 def __str__( self ): |
| 668 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr
(i) for i in self.__toklist) + ']' |
| 669 |
| 670 def _asStringList( self, sep='' ): |
| 671 out = [] |
| 672 for item in self.__toklist: |
| 673 if out and sep: |
| 674 out.append(sep) |
| 675 if isinstance( item, ParseResults ): |
| 676 out += item._asStringList() |
| 677 else: |
| 678 out.append( _ustr(item) ) |
| 679 return out |
| 680 |
| 681 def asList( self ): |
| 682 """ |
| 683 Returns the parse results as a nested list of matching tokens, all conve
rted to strings. |
| 684 |
| 685 Example:: |
| 686 patt = OneOrMore(Word(alphas)) |
| 687 result = patt.parseString("sldkj lsdkj sldkj") |
| 688 # even though the result prints in string-like form, it is actually
a pyparsing ParseResults |
| 689 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['
sldkj', 'lsdkj', 'sldkj'] |
| 690 |
| 691 # Use asList() to create an actual list |
| 692 result_list = result.asList() |
| 693 print(type(result_list), result_list) # -> <class 'list'> ['sldkj',
'lsdkj', 'sldkj'] |
| 694 """ |
| 695 return [res.asList() if isinstance(res,ParseResults) else res for res in
self.__toklist] |
| 696 |
| 697 def asDict( self ): |
| 698 """ |
| 699 Returns the named parse results as a nested dictionary. |
| 700 |
| 701 Example:: |
| 702 integer = Word(nums) |
| 703 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 704 |
| 705 result = date_str.parseString('12/31/1999') |
| 706 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResult
s'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)],
'month': [('31', 2)]}) |
| 707 |
| 708 result_dict = result.asDict() |
| 709 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'da
y': '1999', 'year': '12', 'month': '31'} |
| 710 |
| 711 # even though a ParseResults supports dict-like access, sometime you
just need to have a dict |
| 712 import json |
| 713 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON
serializable |
| 714 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999
", "year": "12"} |
| 715 """ |
| 716 if PY_3: |
| 717 item_fn = self.items |
| 718 else: |
| 719 item_fn = self.iteritems |
| 720 |
| 721 def toItem(obj): |
| 722 if isinstance(obj, ParseResults): |
| 723 if obj.haskeys(): |
| 724 return obj.asDict() |
| 725 else: |
| 726 return [toItem(v) for v in obj] |
| 727 else: |
| 728 return obj |
| 729 |
| 730 return dict((k,toItem(v)) for k,v in item_fn()) |
| 731 |
| 732 def copy( self ): |
| 733 """ |
| 734 Returns a new copy of a C{ParseResults} object. |
| 735 """ |
| 736 ret = ParseResults( self.__toklist ) |
| 737 ret.__tokdict = self.__tokdict.copy() |
| 738 ret.__parent = self.__parent |
| 739 ret.__accumNames.update( self.__accumNames ) |
| 740 ret.__name = self.__name |
| 741 return ret |
| 742 |
| 743 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=Tru
e ): |
| 744 """ |
| 745 (Deprecated) Returns the parse results as XML. Tags are created for toke
ns and lists that have defined results names. |
| 746 """ |
| 747 nl = "\n" |
| 748 out = [] |
| 749 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() |
| 750 for v in vlist) |
| 751 nextLevelIndent = indent + " " |
| 752 |
| 753 # collapse out indents if formatting is not desired |
| 754 if not formatted: |
| 755 indent = "" |
| 756 nextLevelIndent = "" |
| 757 nl = "" |
| 758 |
| 759 selfTag = None |
| 760 if doctag is not None: |
| 761 selfTag = doctag |
| 762 else: |
| 763 if self.__name: |
| 764 selfTag = self.__name |
| 765 |
| 766 if not selfTag: |
| 767 if namedItemsOnly: |
| 768 return "" |
| 769 else: |
| 770 selfTag = "ITEM" |
| 771 |
| 772 out += [ nl, indent, "<", selfTag, ">" ] |
| 773 |
| 774 for i,res in enumerate(self.__toklist): |
| 775 if isinstance(res,ParseResults): |
| 776 if i in namedItems: |
| 777 out += [ res.asXML(namedItems[i], |
| 778 namedItemsOnly and doctag is None, |
| 779 nextLevelIndent, |
| 780 formatted)] |
| 781 else: |
| 782 out += [ res.asXML(None, |
| 783 namedItemsOnly and doctag is None, |
| 784 nextLevelIndent, |
| 785 formatted)] |
| 786 else: |
| 787 # individual token, see if there is a name for it |
| 788 resTag = None |
| 789 if i in namedItems: |
| 790 resTag = namedItems[i] |
| 791 if not resTag: |
| 792 if namedItemsOnly: |
| 793 continue |
| 794 else: |
| 795 resTag = "ITEM" |
| 796 xmlBodyText = _xml_escape(_ustr(res)) |
| 797 out += [ nl, nextLevelIndent, "<", resTag, ">", |
| 798 xmlBodyText, |
| 799 "</", resTag, ">" ] |
| 800 |
| 801 out += [ nl, indent, "</", selfTag, ">" ] |
| 802 return "".join(out) |
| 803 |
| 804 def __lookup(self,sub): |
| 805 for k,vlist in self.__tokdict.items(): |
| 806 for v,loc in vlist: |
| 807 if sub is v: |
| 808 return k |
| 809 return None |
| 810 |
| 811 def getName(self): |
| 812 """ |
| 813 Returns the results name for this token expression. Useful when several |
| 814 different expressions might match at a particular location. |
| 815 |
| 816 Example:: |
| 817 integer = Word(nums) |
| 818 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") |
| 819 house_number_expr = Suppress('#') + Word(nums, alphanums) |
| 820 user_data = (Group(house_number_expr)("house_number") |
| 821 | Group(ssn_expr)("ssn") |
| 822 | Group(integer)("age")) |
| 823 user_info = OneOrMore(user_data) |
| 824 |
| 825 result = user_info.parseString("22 111-22-3333 #221B") |
| 826 for item in result: |
| 827 print(item.getName(), ':', item[0]) |
| 828 prints:: |
| 829 age : 22 |
| 830 ssn : 111-22-3333 |
| 831 house_number : 221B |
| 832 """ |
| 833 if self.__name: |
| 834 return self.__name |
| 835 elif self.__parent: |
| 836 par = self.__parent() |
| 837 if par: |
| 838 return par.__lookup(self) |
| 839 else: |
| 840 return None |
| 841 elif (len(self) == 1 and |
| 842 len(self.__tokdict) == 1 and |
| 843 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): |
| 844 return next(iter(self.__tokdict.keys())) |
| 845 else: |
| 846 return None |
| 847 |
| 848 def dump(self, indent='', depth=0, full=True): |
| 849 """ |
| 850 Diagnostic method for listing out the contents of a C{ParseResults}. |
| 851 Accepts an optional C{indent} argument so that this string can be embedd
ed |
| 852 in a nested display of other data. |
| 853 |
| 854 Example:: |
| 855 integer = Word(nums) |
| 856 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 857 |
| 858 result = date_str.parseString('12/31/1999') |
| 859 print(result.dump()) |
| 860 prints:: |
| 861 ['12', '/', '31', '/', '1999'] |
| 862 - day: 1999 |
| 863 - month: 31 |
| 864 - year: 12 |
| 865 """ |
| 866 out = [] |
| 867 NL = '\n' |
| 868 out.append( indent+_ustr(self.asList()) ) |
| 869 if full: |
| 870 if self.haskeys(): |
| 871 items = sorted((str(k), v) for k,v in self.items()) |
| 872 for k,v in items: |
| 873 if out: |
| 874 out.append(NL) |
| 875 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) |
| 876 if isinstance(v,ParseResults): |
| 877 if v: |
| 878 out.append( v.dump(indent,depth+1) ) |
| 879 else: |
| 880 out.append(_ustr(v)) |
| 881 else: |
| 882 out.append(repr(v)) |
| 883 elif any(isinstance(vv,ParseResults) for vv in self): |
| 884 v = self |
| 885 for i,vv in enumerate(v): |
| 886 if isinstance(vv,ParseResults): |
| 887 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)
),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) |
| 888 else: |
| 889 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)
),i,indent,(' '*(depth+1)),_ustr(vv))) |
| 890 |
| 891 return "".join(out) |
| 892 |
| 893 def pprint(self, *args, **kwargs): |
| 894 """ |
| 895 Pretty-printer for parsed results as a list, using the C{pprint} module. |
| 896 Accepts additional positional or keyword args as defined for the |
| 897 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html
#pprint.pprint}) |
| 898 |
| 899 Example:: |
| 900 ident = Word(alphas, alphanums) |
| 901 num = Word(nums) |
| 902 func = Forward() |
| 903 term = ident | num | Group('(' + func + ')') |
| 904 func <<= ident + Group(Optional(delimitedList(term))) |
| 905 result = func.parseString("fna a,b,(fnb c,d,200),100") |
| 906 result.pprint(width=40) |
| 907 prints:: |
| 908 ['fna', |
| 909 ['a', |
| 910 'b', |
| 911 ['(', 'fnb', ['c', 'd', '200'], ')'], |
| 912 '100']] |
| 913 """ |
| 914 pprint.pprint(self.asList(), *args, **kwargs) |
| 915 |
| 916 # add support for pickle protocol |
| 917 def __getstate__(self): |
| 918 return ( self.__toklist, |
| 919 ( self.__tokdict.copy(), |
| 920 self.__parent is not None and self.__parent() or None, |
| 921 self.__accumNames, |
| 922 self.__name ) ) |
| 923 |
| 924 def __setstate__(self,state): |
| 925 self.__toklist = state[0] |
| 926 (self.__tokdict, |
| 927 par, |
| 928 inAccumNames, |
| 929 self.__name) = state[1] |
| 930 self.__accumNames = {} |
| 931 self.__accumNames.update(inAccumNames) |
| 932 if par is not None: |
| 933 self.__parent = wkref(par) |
| 934 else: |
| 935 self.__parent = None |
| 936 |
| 937 def __getnewargs__(self): |
| 938 return self.__toklist, self.__name, self.__asList, self.__modal |
| 939 |
| 940 def __dir__(self): |
| 941 return (dir(type(self)) + list(self.keys())) |
| 942 |
| 943 collections.MutableMapping.register(ParseResults) |
| 944 |
| 945 def col (loc,strg): |
| 946 """Returns current column within a string, counting newlines as line separat
ors. |
| 947 The first column is number 1. |
| 948 |
| 949 Note: the default parsing behavior is to expand tabs in the input string |
| 950 before starting the parsing process. See L{I{ParserElement.parseString}<Pars
erElement.parseString>} for more information |
| 951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a |
| 952 consistent view of the parsed string, the parse location, and line and column |
| 953 positions within the parsed string. |
| 954 """ |
| 955 s = strg |
| 956 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, lo
c) |
| 957 |
| 958 def lineno(loc,strg): |
| 959 """Returns current line number within a string, counting newlines as line se
parators. |
| 960 The first line is number 1. |
| 961 |
| 962 Note: the default parsing behavior is to expand tabs in the input string |
| 963 before starting the parsing process. See L{I{ParserElement.parseString}<Pars
erElement.parseString>} for more information |
| 964 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a |
| 965 consistent view of the parsed string, the parse location, and line and column |
| 966 positions within the parsed string. |
| 967 """ |
| 968 return strg.count("\n",0,loc) + 1 |
| 969 |
| 970 def line( loc, strg ): |
| 971 """Returns the line of text containing loc within a string, counting newline
s as line separators. |
| 972 """ |
| 973 lastCR = strg.rfind("\n", 0, loc) |
| 974 nextCR = strg.find("\n", loc) |
| 975 if nextCR >= 0: |
| 976 return strg[lastCR+1:nextCR] |
| 977 else: |
| 978 return strg[lastCR+1:] |
| 979 |
| 980 def _defaultStartDebugAction( instring, loc, expr ): |
| 981 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lin
eno(loc,instring), col(loc,instring) ))) |
| 982 |
| 983 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): |
| 984 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) |
| 985 |
| 986 def _defaultExceptionDebugAction( instring, loc, expr, exc ): |
| 987 print ("Exception raised:" + _ustr(exc)) |
| 988 |
| 989 def nullDebugAction(*args): |
| 990 """'Do-nothing' debug action, to suppress debugging output during parsing.""
" |
| 991 pass |
| 992 |
| 993 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs |
| 994 #~ 'decorator to trim function calls to match the arity of the target' |
| 995 #~ def _trim_arity(func, maxargs=3): |
| 996 #~ if func in singleArgBuiltins: |
| 997 #~ return lambda s,l,t: func(t) |
| 998 #~ limit = 0 |
| 999 #~ foundArity = False |
| 1000 #~ def wrapper(*args): |
| 1001 #~ nonlocal limit,foundArity |
| 1002 #~ while 1: |
| 1003 #~ try: |
| 1004 #~ ret = func(*args[limit:]) |
| 1005 #~ foundArity = True |
| 1006 #~ return ret |
| 1007 #~ except TypeError: |
| 1008 #~ if limit == maxargs or foundArity: |
| 1009 #~ raise |
| 1010 #~ limit += 1 |
| 1011 #~ continue |
| 1012 #~ return wrapper |
| 1013 |
| 1014 # this version is Python 2.x-3.x cross-compatible |
| 1015 'decorator to trim function calls to match the arity of the target' |
| 1016 def _trim_arity(func, maxargs=2): |
| 1017 if func in singleArgBuiltins: |
| 1018 return lambda s,l,t: func(t) |
| 1019 limit = [0] |
| 1020 foundArity = [False] |
| 1021 |
| 1022 # traceback return data structure changed in Py3.5 - normalize back to plain
tuples |
| 1023 if system_version[:2] >= (3,5): |
| 1024 def extract_stack(limit=0): |
| 1025 # special handling for Python 3.5.0 - extra deep call stack by 1 |
| 1026 offset = -3 if system_version == (3,5,0) else -2 |
| 1027 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offse
t] |
| 1028 return [(frame_summary.filename, frame_summary.lineno)] |
| 1029 def extract_tb(tb, limit=0): |
| 1030 frames = traceback.extract_tb(tb, limit=limit) |
| 1031 frame_summary = frames[-1] |
| 1032 return [(frame_summary.filename, frame_summary.lineno)] |
| 1033 else: |
| 1034 extract_stack = traceback.extract_stack |
| 1035 extract_tb = traceback.extract_tb |
| 1036 |
| 1037 # synthesize what would be returned by traceback.extract_stack at the call t
o |
| 1038 # user's parse action 'func', so that we don't incur call penalty at parse t
ime |
| 1039 |
| 1040 LINE_DIFF = 6 |
| 1041 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT L
INE AND |
| 1042 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! |
| 1043 this_line = extract_stack(limit=2)[-1] |
| 1044 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) |
| 1045 |
| 1046 def wrapper(*args): |
| 1047 while 1: |
| 1048 try: |
| 1049 ret = func(*args[limit[0]:]) |
| 1050 foundArity[0] = True |
| 1051 return ret |
| 1052 except TypeError: |
| 1053 # re-raise TypeErrors if they did not come from our arity testin
g |
| 1054 if foundArity[0]: |
| 1055 raise |
| 1056 else: |
| 1057 try: |
| 1058 tb = sys.exc_info()[-1] |
| 1059 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_s
ynth: |
| 1060 raise |
| 1061 finally: |
| 1062 del tb |
| 1063 |
| 1064 if limit[0] <= maxargs: |
| 1065 limit[0] += 1 |
| 1066 continue |
| 1067 raise |
| 1068 |
| 1069 # copy func name to wrapper for sensible debug output |
| 1070 func_name = "<parse action>" |
| 1071 try: |
| 1072 func_name = getattr(func, '__name__', |
| 1073 getattr(func, '__class__').__name__) |
| 1074 except Exception: |
| 1075 func_name = str(func) |
| 1076 wrapper.__name__ = func_name |
| 1077 |
| 1078 return wrapper |
| 1079 |
| 1080 class ParserElement(object): |
| 1081 """Abstract base level parser element class.""" |
| 1082 DEFAULT_WHITE_CHARS = " \n\t\r" |
| 1083 verbose_stacktrace = False |
| 1084 |
| 1085 @staticmethod |
| 1086 def setDefaultWhitespaceChars( chars ): |
| 1087 r""" |
| 1088 Overrides the default whitespace chars |
| 1089 |
| 1090 Example:: |
| 1091 # default whitespace chars are space, <TAB> and newline |
| 1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc'
, 'def', 'ghi', 'jkl'] |
| 1093 |
| 1094 # change to just treat newline as significant |
| 1095 ParserElement.setDefaultWhitespaceChars(" \t") |
| 1096 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc'
, 'def'] |
| 1097 """ |
| 1098 ParserElement.DEFAULT_WHITE_CHARS = chars |
| 1099 |
| 1100 @staticmethod |
| 1101 def inlineLiteralsUsing(cls): |
| 1102 """ |
| 1103 Set class to be used for inclusion of string literals into a parser. |
| 1104 |
| 1105 Example:: |
| 1106 # default literal class used is Literal |
| 1107 integer = Word(nums) |
| 1108 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 1109 |
| 1110 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '3
1'] |
| 1111 |
| 1112 |
| 1113 # change to Suppress |
| 1114 ParserElement.inlineLiteralsUsing(Suppress) |
| 1115 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 1116 |
| 1117 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] |
| 1118 """ |
| 1119 ParserElement._literalStringClass = cls |
| 1120 |
| 1121 def __init__( self, savelist=False ): |
| 1122 self.parseAction = list() |
| 1123 self.failAction = None |
| 1124 #~ self.name = "<unknown>" # don't define self.name, let subclasses try
/except upcall |
| 1125 self.strRepr = None |
| 1126 self.resultsName = None |
| 1127 self.saveAsList = savelist |
| 1128 self.skipWhitespace = True |
| 1129 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS |
| 1130 self.copyDefaultWhiteChars = True |
| 1131 self.mayReturnEmpty = False # used when checking for left-recursion |
| 1132 self.keepTabs = False |
| 1133 self.ignoreExprs = list() |
| 1134 self.debug = False |
| 1135 self.streamlined = False |
| 1136 self.mayIndexError = True # used to optimize exception handling for subc
lasses that don't advance parse index |
| 1137 self.errmsg = "" |
| 1138 self.modalResults = True # used to mark results names as modal (report o
nly last) or cumulative (list all) |
| 1139 self.debugActions = ( None, None, None ) #custom debug actions |
| 1140 self.re = None |
| 1141 self.callPreparse = True # used to avoid redundant calls to preParse |
| 1142 self.callDuringTry = False |
| 1143 |
| 1144 def copy( self ): |
| 1145 """ |
| 1146 Make a copy of this C{ParserElement}. Useful for defining different par
se actions |
| 1147 for the same parsing pattern, using copies of the original parse element
. |
| 1148 |
| 1149 Example:: |
| 1150 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) |
| 1151 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024)
+ Suppress("K") |
| 1152 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1
024) + Suppress("M") |
| 1153 |
| 1154 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 6
40K 256M")) |
| 1155 prints:: |
| 1156 [5120, 100, 655360, 268435456] |
| 1157 Equivalent form of C{expr.copy()} is just C{expr()}:: |
| 1158 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024)
+ Suppress("M") |
| 1159 """ |
| 1160 cpy = copy.copy( self ) |
| 1161 cpy.parseAction = self.parseAction[:] |
| 1162 cpy.ignoreExprs = self.ignoreExprs[:] |
| 1163 if self.copyDefaultWhiteChars: |
| 1164 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS |
| 1165 return cpy |
| 1166 |
| 1167 def setName( self, name ): |
| 1168 """ |
| 1169 Define name for this expression, makes debugging and exception messages
clearer. |
| 1170 |
| 1171 Example:: |
| 1172 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...)
(at char 0), (line:1, col:1) |
| 1173 Word(nums).setName("integer").parseString("ABC") # -> Exception: Ex
pected integer (at char 0), (line:1, col:1) |
| 1174 """ |
| 1175 self.name = name |
| 1176 self.errmsg = "Expected " + self.name |
| 1177 if hasattr(self,"exception"): |
| 1178 self.exception.msg = self.errmsg |
| 1179 return self |
| 1180 |
| 1181 def setResultsName( self, name, listAllMatches=False ): |
| 1182 """ |
| 1183 Define name for referencing matching tokens as a nested attribute |
| 1184 of the returned parse results. |
| 1185 NOTE: this returns a *copy* of the original C{ParserElement} object; |
| 1186 this is so that the client can define a basic element, such as an |
| 1187 integer, and reference it in multiple places with different names. |
| 1188 |
| 1189 You can also set results names using the abbreviated syntax, |
| 1190 C{expr("name")} in place of C{expr.setResultsName("name")} - |
| 1191 see L{I{__call__}<__call__>}. |
| 1192 |
| 1193 Example:: |
| 1194 date_str = (integer.setResultsName("year") + '/' |
| 1195 + integer.setResultsName("month") + '/' |
| 1196 + integer.setResultsName("day")) |
| 1197 |
| 1198 # equivalent form: |
| 1199 date_str = integer("year") + '/' + integer("month") + '/' + integer(
"day") |
| 1200 """ |
| 1201 newself = self.copy() |
| 1202 if name.endswith("*"): |
| 1203 name = name[:-1] |
| 1204 listAllMatches=True |
| 1205 newself.resultsName = name |
| 1206 newself.modalResults = not listAllMatches |
| 1207 return newself |
| 1208 |
| 1209 def setBreak(self,breakFlag = True): |
| 1210 """Method to invoke the Python pdb debugger when this element is |
| 1211 about to be parsed. Set C{breakFlag} to True to enable, False to |
| 1212 disable. |
| 1213 """ |
| 1214 if breakFlag: |
| 1215 _parseMethod = self._parse |
| 1216 def breaker(instring, loc, doActions=True, callPreParse=True): |
| 1217 import pdb |
| 1218 pdb.set_trace() |
| 1219 return _parseMethod( instring, loc, doActions, callPreParse ) |
| 1220 breaker._originalParseMethod = _parseMethod |
| 1221 self._parse = breaker |
| 1222 else: |
| 1223 if hasattr(self._parse,"_originalParseMethod"): |
| 1224 self._parse = self._parse._originalParseMethod |
| 1225 return self |
| 1226 |
| 1227 def setParseAction( self, *fns, **kwargs ): |
| 1228 """ |
| 1229 Define action to perform when successfully matching parse element defini
tion. |
| 1230 Parse action fn is a callable method with 0-3 arguments, called as C{fn(
s,loc,toks)}, |
| 1231 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: |
| 1232 - s = the original string being parsed (see note below) |
| 1233 - loc = the location of the matching substring |
| 1234 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}}
object |
| 1235 If the functions in fns modify the tokens, they can return them as the r
eturn |
| 1236 value from fn, and the modified list of tokens will replace the original
. |
| 1237 Otherwise, fn does not need to return any value. |
| 1238 |
| 1239 Optional keyword arguments: |
| 1240 - callDuringTry = (default=C{False}) indicate if parse action should be
run during lookaheads and alternate testing |
| 1241 |
| 1242 Note: the default parsing behavior is to expand tabs in the input string |
| 1243 before starting the parsing process. See L{I{parseString}<parseString>}
for more information |
| 1244 on parsing strings containing C{<TAB>}s, and suggested methods to mainta
in a |
| 1245 consistent view of the parsed string, the parse location, and line and c
olumn |
| 1246 positions within the parsed string. |
| 1247 |
| 1248 Example:: |
| 1249 integer = Word(nums) |
| 1250 date_str = integer + '/' + integer + '/' + integer |
| 1251 |
| 1252 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '3
1'] |
| 1253 |
| 1254 # use parse action to convert to ints at parse time |
| 1255 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) |
| 1256 date_str = integer + '/' + integer + '/' + integer |
| 1257 |
| 1258 # note that integer fields are now ints, not strings |
| 1259 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] |
| 1260 """ |
| 1261 self.parseAction = list(map(_trim_arity, list(fns))) |
| 1262 self.callDuringTry = kwargs.get("callDuringTry", False) |
| 1263 return self |
| 1264 |
| 1265 def addParseAction( self, *fns, **kwargs ): |
| 1266 """ |
| 1267 Add parse action to expression's list of parse actions. See L{I{setParse
Action}<setParseAction>}. |
| 1268 |
| 1269 See examples in L{I{copy}<copy>}. |
| 1270 """ |
| 1271 self.parseAction += list(map(_trim_arity, list(fns))) |
| 1272 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", F
alse) |
| 1273 return self |
| 1274 |
| 1275 def addCondition(self, *fns, **kwargs): |
| 1276 """Add a boolean predicate function to expression's list of parse action
s. See |
| 1277 L{I{setParseAction}<setParseAction>} for function call signatures. Unlik
e C{setParseAction}, |
| 1278 functions passed to C{addCondition} need to return boolean success/fail
of the condition. |
| 1279 |
| 1280 Optional keyword arguments: |
| 1281 - message = define a custom message to be used in the raised exception |
| 1282 - fatal = if True, will raise ParseFatalException to stop parsing imm
ediately; otherwise will raise ParseException |
| 1283 |
| 1284 Example:: |
| 1285 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) |
| 1286 year_int = integer.copy() |
| 1287 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only su
pport years 2000 and later") |
| 1288 date_str = year_int + '/' + integer + '/' + integer |
| 1289 |
| 1290 result = date_str.parseString("1999/12/31") # -> Exception: Only su
pport years 2000 and later (at char 0), (line:1, col:1) |
| 1291 """ |
| 1292 msg = kwargs.get("message", "failed user-defined condition") |
| 1293 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseE
xception |
| 1294 for fn in fns: |
| 1295 def pa(s,l,t): |
| 1296 if not bool(_trim_arity(fn)(s,l,t)): |
| 1297 raise exc_type(s,l,msg) |
| 1298 self.parseAction.append(pa) |
| 1299 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", F
alse) |
| 1300 return self |
| 1301 |
| 1302 def setFailAction( self, fn ): |
| 1303 """Define action to perform if parsing fails at this expression. |
| 1304 Fail acton fn is a callable function that takes the arguments |
| 1305 C{fn(s,loc,expr,err)} where: |
| 1306 - s = string being parsed |
| 1307 - loc = location where expression match was attempted and failed |
| 1308 - expr = the parse expression that failed |
| 1309 - err = the exception thrown |
| 1310 The function returns no value. It may throw C{L{ParseFatalException}
} |
| 1311 if it is desired to stop parsing immediately.""" |
| 1312 self.failAction = fn |
| 1313 return self |
| 1314 |
| 1315 def _skipIgnorables( self, instring, loc ): |
| 1316 exprsFound = True |
| 1317 while exprsFound: |
| 1318 exprsFound = False |
| 1319 for e in self.ignoreExprs: |
| 1320 try: |
| 1321 while 1: |
| 1322 loc,dummy = e._parse( instring, loc ) |
| 1323 exprsFound = True |
| 1324 except ParseException: |
| 1325 pass |
| 1326 return loc |
| 1327 |
| 1328 def preParse( self, instring, loc ): |
| 1329 if self.ignoreExprs: |
| 1330 loc = self._skipIgnorables( instring, loc ) |
| 1331 |
| 1332 if self.skipWhitespace: |
| 1333 wt = self.whiteChars |
| 1334 instrlen = len(instring) |
| 1335 while loc < instrlen and instring[loc] in wt: |
| 1336 loc += 1 |
| 1337 |
| 1338 return loc |
| 1339 |
| 1340 def parseImpl( self, instring, loc, doActions=True ): |
| 1341 return loc, [] |
| 1342 |
| 1343 def postParse( self, instring, loc, tokenlist ): |
| 1344 return tokenlist |
| 1345 |
| 1346 #~ @profile |
| 1347 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): |
| 1348 debugging = ( self.debug ) #and doActions ) |
| 1349 |
| 1350 if debugging or self.failAction: |
| 1351 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instrin
g), col(loc,instring) )) |
| 1352 if (self.debugActions[0] ): |
| 1353 self.debugActions[0]( instring, loc, self ) |
| 1354 if callPreParse and self.callPreparse: |
| 1355 preloc = self.preParse( instring, loc ) |
| 1356 else: |
| 1357 preloc = loc |
| 1358 tokensStart = preloc |
| 1359 try: |
| 1360 try: |
| 1361 loc,tokens = self.parseImpl( instring, preloc, doActions ) |
| 1362 except IndexError: |
| 1363 raise ParseException( instring, len(instring), self.errmsg,
self ) |
| 1364 except ParseBaseException as err: |
| 1365 #~ print ("Exception raised:", err) |
| 1366 if self.debugActions[2]: |
| 1367 self.debugActions[2]( instring, tokensStart, self, err ) |
| 1368 if self.failAction: |
| 1369 self.failAction( instring, tokensStart, self, err ) |
| 1370 raise |
| 1371 else: |
| 1372 if callPreParse and self.callPreparse: |
| 1373 preloc = self.preParse( instring, loc ) |
| 1374 else: |
| 1375 preloc = loc |
| 1376 tokensStart = preloc |
| 1377 if self.mayIndexError or loc >= len(instring): |
| 1378 try: |
| 1379 loc,tokens = self.parseImpl( instring, preloc, doActions ) |
| 1380 except IndexError: |
| 1381 raise ParseException( instring, len(instring), self.errmsg,
self ) |
| 1382 else: |
| 1383 loc,tokens = self.parseImpl( instring, preloc, doActions ) |
| 1384 |
| 1385 tokens = self.postParse( instring, loc, tokens ) |
| 1386 |
| 1387 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsLi
st, modal=self.modalResults ) |
| 1388 if self.parseAction and (doActions or self.callDuringTry): |
| 1389 if debugging: |
| 1390 try: |
| 1391 for fn in self.parseAction: |
| 1392 tokens = fn( instring, tokensStart, retTokens ) |
| 1393 if tokens is not None: |
| 1394 retTokens = ParseResults( tokens, |
| 1395 self.resultsName, |
| 1396 asList=self.saveAsList and
isinstance(tokens,(ParseResults,list)), |
| 1397 modal=self.modalResults ) |
| 1398 except ParseBaseException as err: |
| 1399 #~ print "Exception raised in user parse action:", err |
| 1400 if (self.debugActions[2] ): |
| 1401 self.debugActions[2]( instring, tokensStart, self, err ) |
| 1402 raise |
| 1403 else: |
| 1404 for fn in self.parseAction: |
| 1405 tokens = fn( instring, tokensStart, retTokens ) |
| 1406 if tokens is not None: |
| 1407 retTokens = ParseResults( tokens, |
| 1408 self.resultsName, |
| 1409 asList=self.saveAsList and isi
nstance(tokens,(ParseResults,list)), |
| 1410 modal=self.modalResults ) |
| 1411 |
| 1412 if debugging: |
| 1413 #~ print ("Matched",self,"->",retTokens.asList()) |
| 1414 if (self.debugActions[1] ): |
| 1415 self.debugActions[1]( instring, tokensStart, loc, self, retToken
s ) |
| 1416 |
| 1417 return loc, retTokens |
| 1418 |
| 1419 def tryParse( self, instring, loc ): |
| 1420 try: |
| 1421 return self._parse( instring, loc, doActions=False )[0] |
| 1422 except ParseFatalException: |
| 1423 raise ParseException( instring, loc, self.errmsg, self) |
| 1424 |
| 1425 def canParseNext(self, instring, loc): |
| 1426 try: |
| 1427 self.tryParse(instring, loc) |
| 1428 except (ParseException, IndexError): |
| 1429 return False |
| 1430 else: |
| 1431 return True |
| 1432 |
| 1433 class _UnboundedCache(object): |
| 1434 def __init__(self): |
| 1435 cache = {} |
| 1436 self.not_in_cache = not_in_cache = object() |
| 1437 |
| 1438 def get(self, key): |
| 1439 return cache.get(key, not_in_cache) |
| 1440 |
| 1441 def set(self, key, value): |
| 1442 cache[key] = value |
| 1443 |
| 1444 def clear(self): |
| 1445 cache.clear() |
| 1446 |
| 1447 self.get = types.MethodType(get, self) |
| 1448 self.set = types.MethodType(set, self) |
| 1449 self.clear = types.MethodType(clear, self) |
| 1450 |
| 1451 if _OrderedDict is not None: |
| 1452 class _FifoCache(object): |
| 1453 def __init__(self, size): |
| 1454 self.not_in_cache = not_in_cache = object() |
| 1455 |
| 1456 cache = _OrderedDict() |
| 1457 |
| 1458 def get(self, key): |
| 1459 return cache.get(key, not_in_cache) |
| 1460 |
| 1461 def set(self, key, value): |
| 1462 cache[key] = value |
| 1463 if len(cache) > size: |
| 1464 cache.popitem(False) |
| 1465 |
| 1466 def clear(self): |
| 1467 cache.clear() |
| 1468 |
| 1469 self.get = types.MethodType(get, self) |
| 1470 self.set = types.MethodType(set, self) |
| 1471 self.clear = types.MethodType(clear, self) |
| 1472 |
| 1473 else: |
| 1474 class _FifoCache(object): |
| 1475 def __init__(self, size): |
| 1476 self.not_in_cache = not_in_cache = object() |
| 1477 |
| 1478 cache = {} |
| 1479 key_fifo = collections.deque([], size) |
| 1480 |
| 1481 def get(self, key): |
| 1482 return cache.get(key, not_in_cache) |
| 1483 |
| 1484 def set(self, key, value): |
| 1485 cache[key] = value |
| 1486 if len(cache) > size: |
| 1487 cache.pop(key_fifo.popleft(), None) |
| 1488 key_fifo.append(key) |
| 1489 |
| 1490 def clear(self): |
| 1491 cache.clear() |
| 1492 key_fifo.clear() |
| 1493 |
| 1494 self.get = types.MethodType(get, self) |
| 1495 self.set = types.MethodType(set, self) |
| 1496 self.clear = types.MethodType(clear, self) |
| 1497 |
| 1498 # argument cache for optimizing repeated calls when backtracking through rec
ursive expressions |
| 1499 packrat_cache = {} # this is set later by enabledPackrat(); this is here so
that resetCache() doesn't fail |
| 1500 packrat_cache_lock = RLock() |
| 1501 packrat_cache_stats = [0, 0] |
| 1502 |
| 1503 # this method gets repeatedly called during backtracking with the same argum
ents - |
| 1504 # we can cache these arguments and save ourselves the trouble of re-parsing
the contained expression |
| 1505 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): |
| 1506 HIT, MISS = 0, 1 |
| 1507 lookup = (self, instring, loc, callPreParse, doActions) |
| 1508 with ParserElement.packrat_cache_lock: |
| 1509 cache = ParserElement.packrat_cache |
| 1510 value = cache.get(lookup) |
| 1511 if value is cache.not_in_cache: |
| 1512 ParserElement.packrat_cache_stats[MISS] += 1 |
| 1513 try: |
| 1514 value = self._parseNoCache(instring, loc, doActions, callPre
Parse) |
| 1515 except ParseBaseException as pe: |
| 1516 # cache a copy of the exception, without the traceback |
| 1517 cache.set(lookup, pe.__class__(*pe.args)) |
| 1518 raise |
| 1519 else: |
| 1520 cache.set(lookup, (value[0], value[1].copy())) |
| 1521 return value |
| 1522 else: |
| 1523 ParserElement.packrat_cache_stats[HIT] += 1 |
| 1524 if isinstance(value, Exception): |
| 1525 raise value |
| 1526 return (value[0], value[1].copy()) |
| 1527 |
| 1528 _parse = _parseNoCache |
| 1529 |
| 1530 @staticmethod |
| 1531 def resetCache(): |
| 1532 ParserElement.packrat_cache.clear() |
| 1533 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_c
ache_stats) |
| 1534 |
| 1535 _packratEnabled = False |
| 1536 @staticmethod |
| 1537 def enablePackrat(cache_size_limit=128): |
| 1538 """Enables "packrat" parsing, which adds memoizing to the parsing logic. |
| 1539 Repeated parse attempts at the same string location (which happens |
| 1540 often in many complex grammars) can immediately return a cached value
, |
| 1541 instead of re-executing parsing/validating code. Memoizing is done o
f |
| 1542 both valid results and parsing exceptions. |
| 1543 |
| 1544 Parameters: |
| 1545 - cache_size_limit - (default=C{128}) - if an integer value is provi
ded |
| 1546 will limit the size of the packrat cache; if None is passed, then |
| 1547 the cache size will be unbounded; if 0 is passed, the cache will |
| 1548 be effectively disabled. |
| 1549 |
| 1550 This speedup may break existing programs that use parse actions that |
| 1551 have side-effects. For this reason, packrat parsing is disabled when |
| 1552 you first import pyparsing. To activate the packrat feature, your |
| 1553 program must call the class method C{ParserElement.enablePackrat()}.
If |
| 1554 your program uses C{psyco} to "compile as you go", you must call |
| 1555 C{enablePackrat} before calling C{psyco.full()}. If you do not do th
is, |
| 1556 Python will crash. For best results, call C{enablePackrat()} immedia
tely |
| 1557 after importing pyparsing. |
| 1558 |
| 1559 Example:: |
| 1560 import pyparsing |
| 1561 pyparsing.ParserElement.enablePackrat() |
| 1562 """ |
| 1563 if not ParserElement._packratEnabled: |
| 1564 ParserElement._packratEnabled = True |
| 1565 if cache_size_limit is None: |
| 1566 ParserElement.packrat_cache = ParserElement._UnboundedCache() |
| 1567 else: |
| 1568 ParserElement.packrat_cache = ParserElement._FifoCache(cache_siz
e_limit) |
| 1569 ParserElement._parse = ParserElement._parseCache |
| 1570 |
| 1571 def parseString( self, instring, parseAll=False ): |
| 1572 """ |
| 1573 Execute the parse expression with the given string. |
| 1574 This is the main interface to the client code, once the complete |
| 1575 expression has been built. |
| 1576 |
| 1577 If you want the grammar to require that the entire input string be |
| 1578 successfully parsed, then set C{parseAll} to True (equivalent to ending |
| 1579 the grammar with C{L{StringEnd()}}). |
| 1580 |
| 1581 Note: C{parseString} implicitly calls C{expandtabs()} on the input strin
g, |
| 1582 in order to report proper column numbers in parse actions. |
| 1583 If the input string contains tabs and |
| 1584 the grammar uses parse actions that use the C{loc} argument to index int
o the |
| 1585 string being parsed, you can ensure you have a consistent view of the in
put |
| 1586 string by: |
| 1587 - calling C{parseWithTabs} on your grammar before calling C{parseString
} |
| 1588 (see L{I{parseWithTabs}<parseWithTabs>}) |
| 1589 - define your parse action using the full C{(s,loc,toks)} signature, an
d |
| 1590 reference the input string using the parse action's C{s} argument |
| 1591 - explictly expand the tabs in your input string before calling |
| 1592 C{parseString} |
| 1593 |
| 1594 Example:: |
| 1595 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] |
| 1596 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: E
xpected end of text |
| 1597 """ |
| 1598 ParserElement.resetCache() |
| 1599 if not self.streamlined: |
| 1600 self.streamline() |
| 1601 #~ self.saveAsList = True |
| 1602 for e in self.ignoreExprs: |
| 1603 e.streamline() |
| 1604 if not self.keepTabs: |
| 1605 instring = instring.expandtabs() |
| 1606 try: |
| 1607 loc, tokens = self._parse( instring, 0 ) |
| 1608 if parseAll: |
| 1609 loc = self.preParse( instring, loc ) |
| 1610 se = Empty() + StringEnd() |
| 1611 se._parse( instring, loc ) |
| 1612 except ParseBaseException as exc: |
| 1613 if ParserElement.verbose_stacktrace: |
| 1614 raise |
| 1615 else: |
| 1616 # catch and re-raise exception from here, clears out pyparsing i
nternal stack trace |
| 1617 raise exc |
| 1618 else: |
| 1619 return tokens |
| 1620 |
| 1621 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): |
| 1622 """ |
| 1623 Scan the input string for expression matches. Each match will return th
e |
| 1624 matching tokens, start location, and end location. May be called with o
ptional |
| 1625 C{maxMatches} argument, to clip scanning after 'n' matches are found. I
f |
| 1626 C{overlap} is specified, then overlapping matches will be reported. |
| 1627 |
| 1628 Note that the start and end locations are reported relative to the strin
g |
| 1629 being parsed. See L{I{parseString}<parseString>} for more information o
n parsing |
| 1630 strings with embedded tabs. |
| 1631 |
| 1632 Example:: |
| 1633 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" |
| 1634 print(source) |
| 1635 for tokens,start,end in Word(alphas).scanString(source): |
| 1636 print(' '*start + '^'*(end-start)) |
| 1637 print(' '*start + tokens[0]) |
| 1638 |
| 1639 prints:: |
| 1640 |
| 1641 sldjf123lsdjjkf345sldkjf879lkjsfd987 |
| 1642 ^^^^^ |
| 1643 sldjf |
| 1644 ^^^^^^^ |
| 1645 lsdjjkf |
| 1646 ^^^^^^ |
| 1647 sldkjf |
| 1648 ^^^^^^ |
| 1649 lkjsfd |
| 1650 """ |
| 1651 if not self.streamlined: |
| 1652 self.streamline() |
| 1653 for e in self.ignoreExprs: |
| 1654 e.streamline() |
| 1655 |
| 1656 if not self.keepTabs: |
| 1657 instring = _ustr(instring).expandtabs() |
| 1658 instrlen = len(instring) |
| 1659 loc = 0 |
| 1660 preparseFn = self.preParse |
| 1661 parseFn = self._parse |
| 1662 ParserElement.resetCache() |
| 1663 matches = 0 |
| 1664 try: |
| 1665 while loc <= instrlen and matches < maxMatches: |
| 1666 try: |
| 1667 preloc = preparseFn( instring, loc ) |
| 1668 nextLoc,tokens = parseFn( instring, preloc, callPreParse=Fal
se ) |
| 1669 except ParseException: |
| 1670 loc = preloc+1 |
| 1671 else: |
| 1672 if nextLoc > loc: |
| 1673 matches += 1 |
| 1674 yield tokens, preloc, nextLoc |
| 1675 if overlap: |
| 1676 nextloc = preparseFn( instring, loc ) |
| 1677 if nextloc > loc: |
| 1678 loc = nextLoc |
| 1679 else: |
| 1680 loc += 1 |
| 1681 else: |
| 1682 loc = nextLoc |
| 1683 else: |
| 1684 loc = preloc+1 |
| 1685 except ParseBaseException as exc: |
| 1686 if ParserElement.verbose_stacktrace: |
| 1687 raise |
| 1688 else: |
| 1689 # catch and re-raise exception from here, clears out pyparsing i
nternal stack trace |
| 1690 raise exc |
| 1691 |
| 1692 def transformString( self, instring ): |
| 1693 """ |
| 1694 Extension to C{L{scanString}}, to modify matching text with modified tok
ens that may |
| 1695 be returned from a parse action. To use C{transformString}, define a gr
ammar and |
| 1696 attach a parse action to it that modifies the returned token list. |
| 1697 Invoking C{transformString()} on a target string will then scan for matc
hes, |
| 1698 and replace the matched text patterns according to the logic in the pars
e |
| 1699 action. C{transformString()} returns the resulting transformed string. |
| 1700 |
| 1701 Example:: |
| 1702 wd = Word(alphas) |
| 1703 wd.setParseAction(lambda toks: toks[0].title()) |
| 1704 |
| 1705 print(wd.transformString("now is the winter of our discontent made g
lorious summer by this sun of york.")) |
| 1706 Prints:: |
| 1707 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun
Of York. |
| 1708 """ |
| 1709 out = [] |
| 1710 lastE = 0 |
| 1711 # force preservation of <TAB>s, to minimize unwanted transformation of s
tring, and to |
| 1712 # keep string locs straight between transformString and scanString |
| 1713 self.keepTabs = True |
| 1714 try: |
| 1715 for t,s,e in self.scanString( instring ): |
| 1716 out.append( instring[lastE:s] ) |
| 1717 if t: |
| 1718 if isinstance(t,ParseResults): |
| 1719 out += t.asList() |
| 1720 elif isinstance(t,list): |
| 1721 out += t |
| 1722 else: |
| 1723 out.append(t) |
| 1724 lastE = e |
| 1725 out.append(instring[lastE:]) |
| 1726 out = [o for o in out if o] |
| 1727 return "".join(map(_ustr,_flatten(out))) |
| 1728 except ParseBaseException as exc: |
| 1729 if ParserElement.verbose_stacktrace: |
| 1730 raise |
| 1731 else: |
| 1732 # catch and re-raise exception from here, clears out pyparsing i
nternal stack trace |
| 1733 raise exc |
| 1734 |
| 1735 def searchString( self, instring, maxMatches=_MAX_INT ): |
| 1736 """ |
| 1737 Another extension to C{L{scanString}}, simplifying the access to the tok
ens found |
| 1738 to match the given parse expression. May be called with optional |
| 1739 C{maxMatches} argument, to clip searching after 'n' matches are found. |
| 1740 |
| 1741 Example:: |
| 1742 # a capitalized word starts with an uppercase letter, followed by ze
ro or more lowercase letters |
| 1743 cap_word = Word(alphas.upper(), alphas.lower()) |
| 1744 |
| 1745 print(cap_word.searchString("More than Iron, more than Lead, more th
an Gold I need Electricity")) |
| 1746 prints:: |
| 1747 ['More', 'Iron', 'Lead', 'Gold', 'I'] |
| 1748 """ |
| 1749 try: |
| 1750 return ParseResults([ t for t,s,e in self.scanString( instring, maxM
atches ) ]) |
| 1751 except ParseBaseException as exc: |
| 1752 if ParserElement.verbose_stacktrace: |
| 1753 raise |
| 1754 else: |
| 1755 # catch and re-raise exception from here, clears out pyparsing i
nternal stack trace |
| 1756 raise exc |
| 1757 |
| 1758 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): |
| 1759 """ |
| 1760 Generator method to split a string using the given expression as a separ
ator. |
| 1761 May be called with optional C{maxsplit} argument, to limit the number of
splits; |
| 1762 and the optional C{includeSeparators} argument (default=C{False}), if th
e separating |
| 1763 matching text should be included in the split results. |
| 1764 |
| 1765 Example:: |
| 1766 punc = oneOf(list(".,;:/-!?")) |
| 1767 print(list(punc.split("This, this?, this sentence, is badly punctuat
ed!"))) |
| 1768 prints:: |
| 1769 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] |
| 1770 """ |
| 1771 splits = 0 |
| 1772 last = 0 |
| 1773 for t,s,e in self.scanString(instring, maxMatches=maxsplit): |
| 1774 yield instring[last:s] |
| 1775 if includeSeparators: |
| 1776 yield t[0] |
| 1777 last = e |
| 1778 yield instring[last:] |
| 1779 |
| 1780 def __add__(self, other ): |
| 1781 """ |
| 1782 Implementation of + operator - returns C{L{And}}. Adding strings to a Pa
rserElement |
| 1783 converts them to L{Literal}s by default. |
| 1784 |
| 1785 Example:: |
| 1786 greet = Word(alphas) + "," + Word(alphas) + "!" |
| 1787 hello = "Hello, World!" |
| 1788 print (hello, "->", greet.parseString(hello)) |
| 1789 Prints:: |
| 1790 Hello, World! -> ['Hello', ',', 'World', '!'] |
| 1791 """ |
| 1792 if isinstance( other, basestring ): |
| 1793 other = ParserElement._literalStringClass( other ) |
| 1794 if not isinstance( other, ParserElement ): |
| 1795 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1796 SyntaxWarning, stacklevel=2) |
| 1797 return None |
| 1798 return And( [ self, other ] ) |
| 1799 |
| 1800 def __radd__(self, other ): |
| 1801 """ |
| 1802 Implementation of + operator when left operand is not a C{L{ParserElemen
t}} |
| 1803 """ |
| 1804 if isinstance( other, basestring ): |
| 1805 other = ParserElement._literalStringClass( other ) |
| 1806 if not isinstance( other, ParserElement ): |
| 1807 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1808 SyntaxWarning, stacklevel=2) |
| 1809 return None |
| 1810 return other + self |
| 1811 |
| 1812 def __sub__(self, other): |
| 1813 """ |
| 1814 Implementation of - operator, returns C{L{And}} with error stop |
| 1815 """ |
| 1816 if isinstance( other, basestring ): |
| 1817 other = ParserElement._literalStringClass( other ) |
| 1818 if not isinstance( other, ParserElement ): |
| 1819 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1820 SyntaxWarning, stacklevel=2) |
| 1821 return None |
| 1822 return And( [ self, And._ErrorStop(), other ] ) |
| 1823 |
| 1824 def __rsub__(self, other ): |
| 1825 """ |
| 1826 Implementation of - operator when left operand is not a C{L{ParserElemen
t}} |
| 1827 """ |
| 1828 if isinstance( other, basestring ): |
| 1829 other = ParserElement._literalStringClass( other ) |
| 1830 if not isinstance( other, ParserElement ): |
| 1831 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1832 SyntaxWarning, stacklevel=2) |
| 1833 return None |
| 1834 return other - self |
| 1835 |
| 1836 def __mul__(self,other): |
| 1837 """ |
| 1838 Implementation of * operator, allows use of C{expr * 3} in place of |
| 1839 C{expr + expr + expr}. Expressions may also me multiplied by a 2-intege
r |
| 1840 tuple, similar to C{{min,max}} multipliers in regular expressions. Tupl
es |
| 1841 may also include C{None} as in: |
| 1842 - C{expr*(n,None)} or C{expr*(n,)} is equivalent |
| 1843 to C{expr*n + L{ZeroOrMore}(expr)} |
| 1844 (read as "at least n instances of C{expr}") |
| 1845 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} |
| 1846 (read as "0 to n instances of C{expr}") |
| 1847 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} |
| 1848 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} |
| 1849 |
| 1850 Note that C{expr*(None,n)} does not raise an exception if |
| 1851 more than n exprs exist in the input stream; that is, |
| 1852 C{expr*(None,n)} does not enforce a maximum number of expr |
| 1853 occurrences. If this behavior is desired, then write |
| 1854 C{expr*(None,n) + ~expr} |
| 1855 """ |
| 1856 if isinstance(other,int): |
| 1857 minElements, optElements = other,0 |
| 1858 elif isinstance(other,tuple): |
| 1859 other = (other + (None, None))[:2] |
| 1860 if other[0] is None: |
| 1861 other = (0, other[1]) |
| 1862 if isinstance(other[0],int) and other[1] is None: |
| 1863 if other[0] == 0: |
| 1864 return ZeroOrMore(self) |
| 1865 if other[0] == 1: |
| 1866 return OneOrMore(self) |
| 1867 else: |
| 1868 return self*other[0] + ZeroOrMore(self) |
| 1869 elif isinstance(other[0],int) and isinstance(other[1],int): |
| 1870 minElements, optElements = other |
| 1871 optElements -= minElements |
| 1872 else: |
| 1873 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s')
objects", type(other[0]),type(other[1])) |
| 1874 else: |
| 1875 raise TypeError("cannot multiply 'ParserElement' and '%s' objects",
type(other)) |
| 1876 |
| 1877 if minElements < 0: |
| 1878 raise ValueError("cannot multiply ParserElement by negative value") |
| 1879 if optElements < 0: |
| 1880 raise ValueError("second tuple value must be greater or equal to fir
st tuple value") |
| 1881 if minElements == optElements == 0: |
| 1882 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") |
| 1883 |
| 1884 if (optElements): |
| 1885 def makeOptionalList(n): |
| 1886 if n>1: |
| 1887 return Optional(self + makeOptionalList(n-1)) |
| 1888 else: |
| 1889 return Optional(self) |
| 1890 if minElements: |
| 1891 if minElements == 1: |
| 1892 ret = self + makeOptionalList(optElements) |
| 1893 else: |
| 1894 ret = And([self]*minElements) + makeOptionalList(optElements
) |
| 1895 else: |
| 1896 ret = makeOptionalList(optElements) |
| 1897 else: |
| 1898 if minElements == 1: |
| 1899 ret = self |
| 1900 else: |
| 1901 ret = And([self]*minElements) |
| 1902 return ret |
| 1903 |
| 1904 def __rmul__(self, other): |
| 1905 return self.__mul__(other) |
| 1906 |
| 1907 def __or__(self, other ): |
| 1908 """ |
| 1909 Implementation of | operator - returns C{L{MatchFirst}} |
| 1910 """ |
| 1911 if isinstance( other, basestring ): |
| 1912 other = ParserElement._literalStringClass( other ) |
| 1913 if not isinstance( other, ParserElement ): |
| 1914 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1915 SyntaxWarning, stacklevel=2) |
| 1916 return None |
| 1917 return MatchFirst( [ self, other ] ) |
| 1918 |
| 1919 def __ror__(self, other ): |
| 1920 """ |
| 1921 Implementation of | operator when left operand is not a C{L{ParserElemen
t}} |
| 1922 """ |
| 1923 if isinstance( other, basestring ): |
| 1924 other = ParserElement._literalStringClass( other ) |
| 1925 if not isinstance( other, ParserElement ): |
| 1926 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1927 SyntaxWarning, stacklevel=2) |
| 1928 return None |
| 1929 return other | self |
| 1930 |
| 1931 def __xor__(self, other ): |
| 1932 """ |
| 1933 Implementation of ^ operator - returns C{L{Or}} |
| 1934 """ |
| 1935 if isinstance( other, basestring ): |
| 1936 other = ParserElement._literalStringClass( other ) |
| 1937 if not isinstance( other, ParserElement ): |
| 1938 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1939 SyntaxWarning, stacklevel=2) |
| 1940 return None |
| 1941 return Or( [ self, other ] ) |
| 1942 |
| 1943 def __rxor__(self, other ): |
| 1944 """ |
| 1945 Implementation of ^ operator when left operand is not a C{L{ParserElemen
t}} |
| 1946 """ |
| 1947 if isinstance( other, basestring ): |
| 1948 other = ParserElement._literalStringClass( other ) |
| 1949 if not isinstance( other, ParserElement ): |
| 1950 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1951 SyntaxWarning, stacklevel=2) |
| 1952 return None |
| 1953 return other ^ self |
| 1954 |
| 1955 def __and__(self, other ): |
| 1956 """ |
| 1957 Implementation of & operator - returns C{L{Each}} |
| 1958 """ |
| 1959 if isinstance( other, basestring ): |
| 1960 other = ParserElement._literalStringClass( other ) |
| 1961 if not isinstance( other, ParserElement ): |
| 1962 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1963 SyntaxWarning, stacklevel=2) |
| 1964 return None |
| 1965 return Each( [ self, other ] ) |
| 1966 |
| 1967 def __rand__(self, other ): |
| 1968 """ |
| 1969 Implementation of & operator when left operand is not a C{L{ParserElemen
t}} |
| 1970 """ |
| 1971 if isinstance( other, basestring ): |
| 1972 other = ParserElement._literalStringClass( other ) |
| 1973 if not isinstance( other, ParserElement ): |
| 1974 warnings.warn("Cannot combine element of type %s with ParserElement"
% type(other), |
| 1975 SyntaxWarning, stacklevel=2) |
| 1976 return None |
| 1977 return other & self |
| 1978 |
| 1979 def __invert__( self ): |
| 1980 """ |
| 1981 Implementation of ~ operator - returns C{L{NotAny}} |
| 1982 """ |
| 1983 return NotAny( self ) |
| 1984 |
| 1985 def __call__(self, name=None): |
| 1986 """ |
| 1987 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. |
| 1988 |
| 1989 If C{name} is given with a trailing C{'*'} character, then C{listAllMatc
hes} will be |
| 1990 passed as C{True}. |
| 1991 |
| 1992 If C{name} is omitted, same as calling C{L{copy}}. |
| 1993 |
| 1994 Example:: |
| 1995 # these are equivalent |
| 1996 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setR
esultsName("socsecno") |
| 1997 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
| 1998 """ |
| 1999 if name is not None: |
| 2000 return self.setResultsName(name) |
| 2001 else: |
| 2002 return self.copy() |
| 2003 |
| 2004 def suppress( self ): |
| 2005 """ |
| 2006 Suppresses the output of this C{ParserElement}; useful to keep punctuati
on from |
| 2007 cluttering up returned output. |
| 2008 """ |
| 2009 return Suppress( self ) |
| 2010 |
| 2011 def leaveWhitespace( self ): |
| 2012 """ |
| 2013 Disables the skipping of whitespace before matching the characters in th
e |
| 2014 C{ParserElement}'s defined pattern. This is normally only used internal
ly by |
| 2015 the pyparsing module, but may be needed in some whitespace-sensitive gra
mmars. |
| 2016 """ |
| 2017 self.skipWhitespace = False |
| 2018 return self |
| 2019 |
| 2020 def setWhitespaceChars( self, chars ): |
| 2021 """ |
| 2022 Overrides the default whitespace chars |
| 2023 """ |
| 2024 self.skipWhitespace = True |
| 2025 self.whiteChars = chars |
| 2026 self.copyDefaultWhiteChars = False |
| 2027 return self |
| 2028 |
| 2029 def parseWithTabs( self ): |
| 2030 """ |
| 2031 Overrides default behavior to expand C{<TAB>}s to spaces before parsing
the input string. |
| 2032 Must be called before C{parseString} when the input grammar contains ele
ments that |
| 2033 match C{<TAB>} characters. |
| 2034 """ |
| 2035 self.keepTabs = True |
| 2036 return self |
| 2037 |
| 2038 def ignore( self, other ): |
| 2039 """ |
| 2040 Define expression to be ignored (e.g., comments) while doing pattern |
| 2041 matching; may be called repeatedly, to define multiple comment or other |
| 2042 ignorable patterns. |
| 2043 |
| 2044 Example:: |
| 2045 patt = OneOrMore(Word(alphas)) |
| 2046 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] |
| 2047 |
| 2048 patt.ignore(cStyleComment) |
| 2049 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'
] |
| 2050 """ |
| 2051 if isinstance(other, basestring): |
| 2052 other = Suppress(other) |
| 2053 |
| 2054 if isinstance( other, Suppress ): |
| 2055 if other not in self.ignoreExprs: |
| 2056 self.ignoreExprs.append(other) |
| 2057 else: |
| 2058 self.ignoreExprs.append( Suppress( other.copy() ) ) |
| 2059 return self |
| 2060 |
| 2061 def setDebugActions( self, startAction, successAction, exceptionAction ): |
| 2062 """ |
| 2063 Enable display of debugging messages while doing pattern matching. |
| 2064 """ |
| 2065 self.debugActions = (startAction or _defaultStartDebugAction, |
| 2066 successAction or _defaultSuccessDebugAction, |
| 2067 exceptionAction or _defaultExceptionDebugAction) |
| 2068 self.debug = True |
| 2069 return self |
| 2070 |
| 2071 def setDebug( self, flag=True ): |
| 2072 """ |
| 2073 Enable display of debugging messages while doing pattern matching. |
| 2074 Set C{flag} to True to enable, False to disable. |
| 2075 |
| 2076 Example:: |
| 2077 wd = Word(alphas).setName("alphaword") |
| 2078 integer = Word(nums).setName("numword") |
| 2079 term = wd | integer |
| 2080 |
| 2081 # turn on debugging for wd |
| 2082 wd.setDebug() |
| 2083 |
| 2084 OneOrMore(term).parseString("abc 123 xyz 890") |
| 2085 |
| 2086 prints:: |
| 2087 Match alphaword at loc 0(1,1) |
| 2088 Matched alphaword -> ['abc'] |
| 2089 Match alphaword at loc 3(1,4) |
| 2090 Exception raised:Expected alphaword (at char 4), (line:1, col:5) |
| 2091 Match alphaword at loc 7(1,8) |
| 2092 Matched alphaword -> ['xyz'] |
| 2093 Match alphaword at loc 11(1,12) |
| 2094 Exception raised:Expected alphaword (at char 12), (line:1, col:13) |
| 2095 Match alphaword at loc 15(1,16) |
| 2096 Exception raised:Expected alphaword (at char 15), (line:1, col:16) |
| 2097 |
| 2098 The output shown is that produced by the default debug actions - custom
debug actions can be |
| 2099 specified using L{setDebugActions}. Prior to attempting |
| 2100 to match the C{wd} expression, the debugging message C{"Match <exprname>
at loc <n>(<line>,<col>)"} |
| 2101 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, o
r an C{"Exception raised"} |
| 2102 message is shown. Also note the use of L{setName} to assign a human-read
able name to the expression, |
| 2103 which makes debugging and exception messages easier to understand - for
instance, the default |
| 2104 name created for the C{Word} expression without calling C{setName} is C{
"W:(ABCD...)"}. |
| 2105 """ |
| 2106 if flag: |
| 2107 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebug
Action, _defaultExceptionDebugAction ) |
| 2108 else: |
| 2109 self.debug = False |
| 2110 return self |
| 2111 |
| 2112 def __str__( self ): |
| 2113 return self.name |
| 2114 |
| 2115 def __repr__( self ): |
| 2116 return _ustr(self) |
| 2117 |
| 2118 def streamline( self ): |
| 2119 self.streamlined = True |
| 2120 self.strRepr = None |
| 2121 return self |
| 2122 |
| 2123 def checkRecursion( self, parseElementList ): |
| 2124 pass |
| 2125 |
| 2126 def validate( self, validateTrace=[] ): |
| 2127 """ |
| 2128 Check defined expressions for valid structure, check for infinite recurs
ive definitions. |
| 2129 """ |
| 2130 self.checkRecursion( [] ) |
| 2131 |
| 2132 def parseFile( self, file_or_filename, parseAll=False ): |
| 2133 """ |
| 2134 Execute the parse expression on the given file or filename. |
| 2135 If a filename is specified (instead of a file object), |
| 2136 the entire file is opened, read, and closed before parsing. |
| 2137 """ |
| 2138 try: |
| 2139 file_contents = file_or_filename.read() |
| 2140 except AttributeError: |
| 2141 with open(file_or_filename, "r") as f: |
| 2142 file_contents = f.read() |
| 2143 try: |
| 2144 return self.parseString(file_contents, parseAll) |
| 2145 except ParseBaseException as exc: |
| 2146 if ParserElement.verbose_stacktrace: |
| 2147 raise |
| 2148 else: |
| 2149 # catch and re-raise exception from here, clears out pyparsing i
nternal stack trace |
| 2150 raise exc |
| 2151 |
| 2152 def __eq__(self,other): |
| 2153 if isinstance(other, ParserElement): |
| 2154 return self is other or vars(self) == vars(other) |
| 2155 elif isinstance(other, basestring): |
| 2156 return self.matches(other) |
| 2157 else: |
| 2158 return super(ParserElement,self)==other |
| 2159 |
| 2160 def __ne__(self,other): |
| 2161 return not (self == other) |
| 2162 |
| 2163 def __hash__(self): |
| 2164 return hash(id(self)) |
| 2165 |
| 2166 def __req__(self,other): |
| 2167 return self == other |
| 2168 |
| 2169 def __rne__(self,other): |
| 2170 return not (self == other) |
| 2171 |
| 2172 def matches(self, testString, parseAll=True): |
| 2173 """ |
| 2174 Method for quick testing of a parser against a test string. Good for sim
ple |
| 2175 inline microtests of sub expressions while building up larger parser. |
| 2176 |
| 2177 Parameters: |
| 2178 - testString - to test against this expression for a match |
| 2179 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when
running tests |
| 2180 |
| 2181 Example:: |
| 2182 expr = Word(nums) |
| 2183 assert expr.matches("100") |
| 2184 """ |
| 2185 try: |
| 2186 self.parseString(_ustr(testString), parseAll=parseAll) |
| 2187 return True |
| 2188 except ParseBaseException: |
| 2189 return False |
| 2190 |
| 2191 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printRe
sults=True, failureTests=False): |
| 2192 """ |
| 2193 Execute the parse expression on a series of test strings, showing each |
| 2194 test, the parsed results or where the parse failed. Quick and easy way t
o |
| 2195 run a parse expression against a list of sample strings. |
| 2196 |
| 2197 Parameters: |
| 2198 - tests - a list of separate test strings, or a multiline string of tes
t strings |
| 2199 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when
running tests |
| 2200 - comment - (default=C{'#'}) - expression for indicating embedded comme
nts in the test |
| 2201 string; pass None to disable comment filtering |
| 2202 - fullDump - (default=C{True}) - dump results as list followed by resul
ts names in nested outline; |
| 2203 if False, only dump nested list |
| 2204 - printResults - (default=C{True}) prints test output to stdout |
| 2205 - failureTests - (default=C{False}) indicates if these tests are expect
ed to fail parsing |
| 2206 |
| 2207 Returns: a (success, results) tuple, where success indicates that all te
sts succeeded |
| 2208 (or failed if C{failureTests} is True), and the results contain a list o
f lines of each |
| 2209 test's output |
| 2210 |
| 2211 Example:: |
| 2212 number_expr = pyparsing_common.number.copy() |
| 2213 |
| 2214 result = number_expr.runTests(''' |
| 2215 # unsigned integer |
| 2216 100 |
| 2217 # negative integer |
| 2218 -100 |
| 2219 # float with scientific notation |
| 2220 6.02e23 |
| 2221 # integer with scientific notation |
| 2222 1e-12 |
| 2223 ''') |
| 2224 print("Success" if result[0] else "Failed!") |
| 2225 |
| 2226 result = number_expr.runTests(''' |
| 2227 # stray character |
| 2228 100Z |
| 2229 # missing leading digit before '.' |
| 2230 -.100 |
| 2231 # too many '.' |
| 2232 3.14.159 |
| 2233 ''', failureTests=True) |
| 2234 print("Success" if result[0] else "Failed!") |
| 2235 prints:: |
| 2236 # unsigned integer |
| 2237 100 |
| 2238 [100] |
| 2239 |
| 2240 # negative integer |
| 2241 -100 |
| 2242 [-100] |
| 2243 |
| 2244 # float with scientific notation |
| 2245 6.02e23 |
| 2246 [6.02e+23] |
| 2247 |
| 2248 # integer with scientific notation |
| 2249 1e-12 |
| 2250 [1e-12] |
| 2251 |
| 2252 Success |
| 2253 |
| 2254 # stray character |
| 2255 100Z |
| 2256 ^ |
| 2257 FAIL: Expected end of text (at char 3), (line:1, col:4) |
| 2258 |
| 2259 # missing leading digit before '.' |
| 2260 -.100 |
| 2261 ^ |
| 2262 FAIL: Expected {real number with scientific notation | real number |
signed integer} (at char 0), (line:1, col:1) |
| 2263 |
| 2264 # too many '.' |
| 2265 3.14.159 |
| 2266 ^ |
| 2267 FAIL: Expected end of text (at char 4), (line:1, col:5) |
| 2268 |
| 2269 Success |
| 2270 |
| 2271 Each test string must be on a single line. If you want to test a string
that spans multiple |
| 2272 lines, create a test like this:: |
| 2273 |
| 2274 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") |
| 2275 |
| 2276 (Note that this is a raw string literal, you must include the leading 'r
'.) |
| 2277 """ |
| 2278 if isinstance(tests, basestring): |
| 2279 tests = list(map(str.strip, tests.rstrip().splitlines())) |
| 2280 if isinstance(comment, basestring): |
| 2281 comment = Literal(comment) |
| 2282 allResults = [] |
| 2283 comments = [] |
| 2284 success = True |
| 2285 for t in tests: |
| 2286 if comment is not None and comment.matches(t, False) or comments and
not t: |
| 2287 comments.append(t) |
| 2288 continue |
| 2289 if not t: |
| 2290 continue |
| 2291 out = ['\n'.join(comments), t] |
| 2292 comments = [] |
| 2293 try: |
| 2294 t = t.replace(r'\n','\n') |
| 2295 result = self.parseString(t, parseAll=parseAll) |
| 2296 out.append(result.dump(full=fullDump)) |
| 2297 success = success and not failureTests |
| 2298 except ParseBaseException as pe: |
| 2299 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" |
| 2300 if '\n' in t: |
| 2301 out.append(line(pe.loc, t)) |
| 2302 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) |
| 2303 else: |
| 2304 out.append(' '*pe.loc + '^' + fatal) |
| 2305 out.append("FAIL: " + str(pe)) |
| 2306 success = success and failureTests |
| 2307 result = pe |
| 2308 except Exception as exc: |
| 2309 out.append("FAIL-EXCEPTION: " + str(exc)) |
| 2310 success = success and failureTests |
| 2311 result = exc |
| 2312 |
| 2313 if printResults: |
| 2314 if fullDump: |
| 2315 out.append('') |
| 2316 print('\n'.join(out)) |
| 2317 |
| 2318 allResults.append((t, result)) |
| 2319 |
| 2320 return success, allResults |
| 2321 |
| 2322 |
| 2323 class Token(ParserElement): |
| 2324 """ |
| 2325 Abstract C{ParserElement} subclass, for defining atomic matching patterns. |
| 2326 """ |
| 2327 def __init__( self ): |
| 2328 super(Token,self).__init__( savelist=False ) |
| 2329 |
| 2330 |
| 2331 class Empty(Token): |
| 2332 """ |
| 2333 An empty token, will always match. |
| 2334 """ |
| 2335 def __init__( self ): |
| 2336 super(Empty,self).__init__() |
| 2337 self.name = "Empty" |
| 2338 self.mayReturnEmpty = True |
| 2339 self.mayIndexError = False |
| 2340 |
| 2341 |
| 2342 class NoMatch(Token): |
| 2343 """ |
| 2344 A token that will never match. |
| 2345 """ |
| 2346 def __init__( self ): |
| 2347 super(NoMatch,self).__init__() |
| 2348 self.name = "NoMatch" |
| 2349 self.mayReturnEmpty = True |
| 2350 self.mayIndexError = False |
| 2351 self.errmsg = "Unmatchable token" |
| 2352 |
| 2353 def parseImpl( self, instring, loc, doActions=True ): |
| 2354 raise ParseException(instring, loc, self.errmsg, self) |
| 2355 |
| 2356 |
| 2357 class Literal(Token): |
| 2358 """ |
| 2359 Token to exactly match a specified string. |
| 2360 |
| 2361 Example:: |
| 2362 Literal('blah').parseString('blah') # -> ['blah'] |
| 2363 Literal('blah').parseString('blahfooblah') # -> ['blah'] |
| 2364 Literal('blah').parseString('bla') # -> Exception: Expected "blah" |
| 2365 |
| 2366 For case-insensitive matching, use L{CaselessLiteral}. |
| 2367 |
| 2368 For keyword matching (force word break before and after the matched string), |
| 2369 use L{Keyword} or L{CaselessKeyword}. |
| 2370 """ |
| 2371 def __init__( self, matchString ): |
| 2372 super(Literal,self).__init__() |
| 2373 self.match = matchString |
| 2374 self.matchLen = len(matchString) |
| 2375 try: |
| 2376 self.firstMatchChar = matchString[0] |
| 2377 except IndexError: |
| 2378 warnings.warn("null string passed to Literal; use Empty() instead", |
| 2379 SyntaxWarning, stacklevel=2) |
| 2380 self.__class__ = Empty |
| 2381 self.name = '"%s"' % _ustr(self.match) |
| 2382 self.errmsg = "Expected " + self.name |
| 2383 self.mayReturnEmpty = False |
| 2384 self.mayIndexError = False |
| 2385 |
| 2386 # Performance tuning: this routine gets called a *lot* |
| 2387 # if this is a single character match string and the first character matche
s, |
| 2388 # short-circuit as quickly as possible, and avoid calling startswith |
| 2389 #~ @profile |
| 2390 def parseImpl( self, instring, loc, doActions=True ): |
| 2391 if (instring[loc] == self.firstMatchChar and |
| 2392 (self.matchLen==1 or instring.startswith(self.match,loc)) ): |
| 2393 return loc+self.matchLen, self.match |
| 2394 raise ParseException(instring, loc, self.errmsg, self) |
| 2395 _L = Literal |
| 2396 ParserElement._literalStringClass = Literal |
| 2397 |
| 2398 class Keyword(Token): |
| 2399 """ |
| 2400 Token to exactly match a specified string as a keyword, that is, it must be |
| 2401 immediately followed by a non-keyword character. Compare with C{L{Literal}}
: |
| 2402 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. |
| 2403 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'i
f x=1'}, or C{'if(y==2)'} |
| 2404 Accepts two optional constructor arguments in addition to the keyword string
: |
| 2405 - C{identChars} is a string of characters that would be valid identifier ch
aracters, |
| 2406 defaulting to all alphanumerics + "_" and "$" |
| 2407 - C{caseless} allows case-insensitive matching, default is C{False}. |
| 2408 |
| 2409 Example:: |
| 2410 Keyword("start").parseString("start") # -> ['start'] |
| 2411 Keyword("start").parseString("starting") # -> Exception |
| 2412 |
| 2413 For case-insensitive matching, use L{CaselessKeyword}. |
| 2414 """ |
| 2415 DEFAULT_KEYWORD_CHARS = alphanums+"_$" |
| 2416 |
| 2417 def __init__( self, matchString, identChars=None, caseless=False ): |
| 2418 super(Keyword,self).__init__() |
| 2419 if identChars is None: |
| 2420 identChars = Keyword.DEFAULT_KEYWORD_CHARS |
| 2421 self.match = matchString |
| 2422 self.matchLen = len(matchString) |
| 2423 try: |
| 2424 self.firstMatchChar = matchString[0] |
| 2425 except IndexError: |
| 2426 warnings.warn("null string passed to Keyword; use Empty() instead", |
| 2427 SyntaxWarning, stacklevel=2) |
| 2428 self.name = '"%s"' % self.match |
| 2429 self.errmsg = "Expected " + self.name |
| 2430 self.mayReturnEmpty = False |
| 2431 self.mayIndexError = False |
| 2432 self.caseless = caseless |
| 2433 if caseless: |
| 2434 self.caselessmatch = matchString.upper() |
| 2435 identChars = identChars.upper() |
| 2436 self.identChars = set(identChars) |
| 2437 |
| 2438 def parseImpl( self, instring, loc, doActions=True ): |
| 2439 if self.caseless: |
| 2440 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatc
h) and |
| 2441 (loc >= len(instring)-self.matchLen or instring[loc+self.matchL
en].upper() not in self.identChars) and |
| 2442 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): |
| 2443 return loc+self.matchLen, self.match |
| 2444 else: |
| 2445 if (instring[loc] == self.firstMatchChar and |
| 2446 (self.matchLen==1 or instring.startswith(self.match,loc)) and |
| 2447 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLe
n] not in self.identChars) and |
| 2448 (loc == 0 or instring[loc-1] not in self.identChars) ): |
| 2449 return loc+self.matchLen, self.match |
| 2450 raise ParseException(instring, loc, self.errmsg, self) |
| 2451 |
| 2452 def copy(self): |
| 2453 c = super(Keyword,self).copy() |
| 2454 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS |
| 2455 return c |
| 2456 |
| 2457 @staticmethod |
| 2458 def setDefaultKeywordChars( chars ): |
| 2459 """Overrides the default Keyword chars |
| 2460 """ |
| 2461 Keyword.DEFAULT_KEYWORD_CHARS = chars |
| 2462 |
| 2463 class CaselessLiteral(Literal): |
| 2464 """ |
| 2465 Token to match a specified string, ignoring case of letters. |
| 2466 Note: the matched results will always be in the case of the given |
| 2467 match string, NOT the case of the input text. |
| 2468 |
| 2469 Example:: |
| 2470 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CM
D', 'CMD', 'CMD'] |
| 2471 |
| 2472 (Contrast with example for L{CaselessKeyword}.) |
| 2473 """ |
| 2474 def __init__( self, matchString ): |
| 2475 super(CaselessLiteral,self).__init__( matchString.upper() ) |
| 2476 # Preserve the defining literal. |
| 2477 self.returnString = matchString |
| 2478 self.name = "'%s'" % self.returnString |
| 2479 self.errmsg = "Expected " + self.name |
| 2480 |
| 2481 def parseImpl( self, instring, loc, doActions=True ): |
| 2482 if instring[ loc:loc+self.matchLen ].upper() == self.match: |
| 2483 return loc+self.matchLen, self.returnString |
| 2484 raise ParseException(instring, loc, self.errmsg, self) |
| 2485 |
| 2486 class CaselessKeyword(Keyword): |
| 2487 """ |
| 2488 Caseless version of L{Keyword}. |
| 2489 |
| 2490 Example:: |
| 2491 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CM
D', 'CMD'] |
| 2492 |
| 2493 (Contrast with example for L{CaselessLiteral}.) |
| 2494 """ |
| 2495 def __init__( self, matchString, identChars=None ): |
| 2496 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=
True ) |
| 2497 |
| 2498 def parseImpl( self, instring, loc, doActions=True ): |
| 2499 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) a
nd |
| 2500 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].
upper() not in self.identChars) ): |
| 2501 return loc+self.matchLen, self.match |
| 2502 raise ParseException(instring, loc, self.errmsg, self) |
| 2503 |
| 2504 class CloseMatch(Token): |
| 2505 """ |
| 2506 A variation on L{Literal} which matches "close" matches, that is, |
| 2507 strings with at most 'n' mismatching characters. C{CloseMatch} takes paramet
ers: |
| 2508 - C{match_string} - string to be matched |
| 2509 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to
count as a match |
| 2510 |
| 2511 The results from a successful parse will contain the matched text from the i
nput string and the following named results: |
| 2512 - C{mismatches} - a list of the positions within the match_string where mis
matches were found |
| 2513 - C{original} - the original match_string used to compare against the input
string |
| 2514 |
| 2515 If C{mismatches} is an empty list, then the match was an exact match. |
| 2516 |
| 2517 Example:: |
| 2518 patt = CloseMatch("ATCATCGAATGGA") |
| 2519 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches'
: [[9]], 'original': ['ATCATCGAATGGA']}) |
| 2520 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGG
A' (with up to 1 mismatches) (at char 0), (line:1, col:1) |
| 2521 |
| 2522 # exact match |
| 2523 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches'
: [[]], 'original': ['ATCATCGAATGGA']}) |
| 2524 |
| 2525 # close match allowing up to 2 mismatches |
| 2526 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) |
| 2527 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches'
: [[4, 9]], 'original': ['ATCATCGAATGGA']}) |
| 2528 """ |
| 2529 def __init__(self, match_string, maxMismatches=1): |
| 2530 super(CloseMatch,self).__init__() |
| 2531 self.name = match_string |
| 2532 self.match_string = match_string |
| 2533 self.maxMismatches = maxMismatches |
| 2534 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_str
ing, self.maxMismatches) |
| 2535 self.mayIndexError = False |
| 2536 self.mayReturnEmpty = False |
| 2537 |
| 2538 def parseImpl( self, instring, loc, doActions=True ): |
| 2539 start = loc |
| 2540 instrlen = len(instring) |
| 2541 maxloc = start + len(self.match_string) |
| 2542 |
| 2543 if maxloc <= instrlen: |
| 2544 match_string = self.match_string |
| 2545 match_stringloc = 0 |
| 2546 mismatches = [] |
| 2547 maxMismatches = self.maxMismatches |
| 2548 |
| 2549 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.
match_string)): |
| 2550 src,mat = s_m |
| 2551 if src != mat: |
| 2552 mismatches.append(match_stringloc) |
| 2553 if len(mismatches) > maxMismatches: |
| 2554 break |
| 2555 else: |
| 2556 loc = match_stringloc + 1 |
| 2557 results = ParseResults([instring[start:loc]]) |
| 2558 results['original'] = self.match_string |
| 2559 results['mismatches'] = mismatches |
| 2560 return loc, results |
| 2561 |
| 2562 raise ParseException(instring, loc, self.errmsg, self) |
| 2563 |
| 2564 |
| 2565 class Word(Token): |
| 2566 """ |
| 2567 Token for matching words composed of allowed character sets. |
| 2568 Defined with string containing all allowed initial characters, |
| 2569 an optional string containing allowed body characters (if omitted, |
| 2570 defaults to the initial character set), and an optional minimum, |
| 2571 maximum, and/or exact length. The default value for C{min} is 1 (a |
| 2572 minimum value < 1 is not valid); the default values for C{max} and C{exact} |
| 2573 are 0, meaning no maximum or exact length restriction. An optional |
| 2574 C{excludeChars} parameter can list characters that might be found in |
| 2575 the input C{bodyChars} string; useful to define a word of all printables |
| 2576 except for one or two characters, for instance. |
| 2577 |
| 2578 L{srange} is useful for defining custom character set strings for defining |
| 2579 C{Word} expressions, using range notation from regular expression character
sets. |
| 2580 |
| 2581 A common mistake is to use C{Word} to match a specific literal string, as in
|
| 2582 C{Word("Address")}. Remember that C{Word} uses the string argument to define |
| 2583 I{sets} of matchable characters. This expression would match "Add", "AAA", |
| 2584 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and
's'. |
| 2585 To match an exact literal string, use L{Literal} or L{Keyword}. |
| 2586 |
| 2587 pyparsing includes helper strings for building Words: |
| 2588 - L{alphas} |
| 2589 - L{nums} |
| 2590 - L{alphanums} |
| 2591 - L{hexnums} |
| 2592 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, t
ilded, umlauted, etc.) |
| 2593 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency,
symbols, superscripts, diacriticals, etc.) |
| 2594 - L{printables} (any non-whitespace character) |
| 2595 |
| 2596 Example:: |
| 2597 # a word composed of digits |
| 2598 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("
0-9")) |
| 2599 |
| 2600 # a word with a leading capital, and zero or more lowercase |
| 2601 capital_word = Word(alphas.upper(), alphas.lower()) |
| 2602 |
| 2603 # hostnames are alphanumeric, with leading alpha, and '-' |
| 2604 hostname = Word(alphas, alphanums+'-') |
| 2605 |
| 2606 # roman numeral (not a strict parser, accepts invalid mix of characters) |
| 2607 roman = Word("IVXLCDM") |
| 2608 |
| 2609 # any string of non-whitespace characters, except for ',' |
| 2610 csv_value = Word(printables, excludeChars=",") |
| 2611 """ |
| 2612 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyw
ord=False, excludeChars=None ): |
| 2613 super(Word,self).__init__() |
| 2614 if excludeChars: |
| 2615 initChars = ''.join(c for c in initChars if c not in excludeChars) |
| 2616 if bodyChars: |
| 2617 bodyChars = ''.join(c for c in bodyChars if c not in excludeChar
s) |
| 2618 self.initCharsOrig = initChars |
| 2619 self.initChars = set(initChars) |
| 2620 if bodyChars : |
| 2621 self.bodyCharsOrig = bodyChars |
| 2622 self.bodyChars = set(bodyChars) |
| 2623 else: |
| 2624 self.bodyCharsOrig = initChars |
| 2625 self.bodyChars = set(initChars) |
| 2626 |
| 2627 self.maxSpecified = max > 0 |
| 2628 |
| 2629 if min < 1: |
| 2630 raise ValueError("cannot specify a minimum length < 1; use Optional(
Word()) if zero-length word is permitted") |
| 2631 |
| 2632 self.minLen = min |
| 2633 |
| 2634 if max > 0: |
| 2635 self.maxLen = max |
| 2636 else: |
| 2637 self.maxLen = _MAX_INT |
| 2638 |
| 2639 if exact > 0: |
| 2640 self.maxLen = exact |
| 2641 self.minLen = exact |
| 2642 |
| 2643 self.name = _ustr(self) |
| 2644 self.errmsg = "Expected " + self.name |
| 2645 self.mayIndexError = False |
| 2646 self.asKeyword = asKeyword |
| 2647 |
| 2648 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max=
=0 and exact==0): |
| 2649 if self.bodyCharsOrig == self.initCharsOrig: |
| 2650 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsO
rig) |
| 2651 elif len(self.initCharsOrig) == 1: |
| 2652 self.reString = "%s[%s]*" % \ |
| 2653 (re.escape(self.initCharsOrig), |
| 2654 _escapeRegexRangeChars(self.bodyCharsOrig)
,) |
| 2655 else: |
| 2656 self.reString = "[%s][%s]*" % \ |
| 2657 (_escapeRegexRangeChars(self.initCharsOrig
), |
| 2658 _escapeRegexRangeChars(self.bodyCharsOrig)
,) |
| 2659 if self.asKeyword: |
| 2660 self.reString = r"\b"+self.reString+r"\b" |
| 2661 try: |
| 2662 self.re = re.compile( self.reString ) |
| 2663 except Exception: |
| 2664 self.re = None |
| 2665 |
| 2666 def parseImpl( self, instring, loc, doActions=True ): |
| 2667 if self.re: |
| 2668 result = self.re.match(instring,loc) |
| 2669 if not result: |
| 2670 raise ParseException(instring, loc, self.errmsg, self) |
| 2671 |
| 2672 loc = result.end() |
| 2673 return loc, result.group() |
| 2674 |
| 2675 if not(instring[ loc ] in self.initChars): |
| 2676 raise ParseException(instring, loc, self.errmsg, self) |
| 2677 |
| 2678 start = loc |
| 2679 loc += 1 |
| 2680 instrlen = len(instring) |
| 2681 bodychars = self.bodyChars |
| 2682 maxloc = start + self.maxLen |
| 2683 maxloc = min( maxloc, instrlen ) |
| 2684 while loc < maxloc and instring[loc] in bodychars: |
| 2685 loc += 1 |
| 2686 |
| 2687 throwException = False |
| 2688 if loc - start < self.minLen: |
| 2689 throwException = True |
| 2690 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: |
| 2691 throwException = True |
| 2692 if self.asKeyword: |
| 2693 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and
instring[loc] in bodychars): |
| 2694 throwException = True |
| 2695 |
| 2696 if throwException: |
| 2697 raise ParseException(instring, loc, self.errmsg, self) |
| 2698 |
| 2699 return loc, instring[start:loc] |
| 2700 |
| 2701 def __str__( self ): |
| 2702 try: |
| 2703 return super(Word,self).__str__() |
| 2704 except Exception: |
| 2705 pass |
| 2706 |
| 2707 |
| 2708 if self.strRepr is None: |
| 2709 |
| 2710 def charsAsStr(s): |
| 2711 if len(s)>4: |
| 2712 return s[:4]+"..." |
| 2713 else: |
| 2714 return s |
| 2715 |
| 2716 if ( self.initCharsOrig != self.bodyCharsOrig ): |
| 2717 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), c
harsAsStr(self.bodyCharsOrig) ) |
| 2718 else: |
| 2719 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) |
| 2720 |
| 2721 return self.strRepr |
| 2722 |
| 2723 |
| 2724 class Regex(Token): |
| 2725 """ |
| 2726 Token for matching strings that match a given regular expression. |
| 2727 Defined with string specifying the regular expression in a form recognized b
y the inbuilt Python re module. |
| 2728 If the given regex contains named groups (defined using C{(?P<name>...)}), t
hese will be preserved as |
| 2729 named parse results. |
| 2730 |
| 2731 Example:: |
| 2732 realnum = Regex(r"[+-]?\d+\.\d*") |
| 2733 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') |
| 2734 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-v
alid-roman-numerals-with-a-regular-expression |
| 2735 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") |
| 2736 """ |
| 2737 compiledREtype = type(re.compile("[A-Z]")) |
| 2738 def __init__( self, pattern, flags=0): |
| 2739 """The parameters C{pattern} and C{flags} are passed to the C{re.compile
()} function as-is. See the Python C{re} module for an explanation of the accept
able patterns and flags.""" |
| 2740 super(Regex,self).__init__() |
| 2741 |
| 2742 if isinstance(pattern, basestring): |
| 2743 if not pattern: |
| 2744 warnings.warn("null string passed to Regex; use Empty() instead"
, |
| 2745 SyntaxWarning, stacklevel=2) |
| 2746 |
| 2747 self.pattern = pattern |
| 2748 self.flags = flags |
| 2749 |
| 2750 try: |
| 2751 self.re = re.compile(self.pattern, self.flags) |
| 2752 self.reString = self.pattern |
| 2753 except sre_constants.error: |
| 2754 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, |
| 2755 SyntaxWarning, stacklevel=2) |
| 2756 raise |
| 2757 |
| 2758 elif isinstance(pattern, Regex.compiledREtype): |
| 2759 self.re = pattern |
| 2760 self.pattern = \ |
| 2761 self.reString = str(pattern) |
| 2762 self.flags = flags |
| 2763 |
| 2764 else: |
| 2765 raise ValueError("Regex may only be constructed with a string or a c
ompiled RE object") |
| 2766 |
| 2767 self.name = _ustr(self) |
| 2768 self.errmsg = "Expected " + self.name |
| 2769 self.mayIndexError = False |
| 2770 self.mayReturnEmpty = True |
| 2771 |
| 2772 def parseImpl( self, instring, loc, doActions=True ): |
| 2773 result = self.re.match(instring,loc) |
| 2774 if not result: |
| 2775 raise ParseException(instring, loc, self.errmsg, self) |
| 2776 |
| 2777 loc = result.end() |
| 2778 d = result.groupdict() |
| 2779 ret = ParseResults(result.group()) |
| 2780 if d: |
| 2781 for k in d: |
| 2782 ret[k] = d[k] |
| 2783 return loc,ret |
| 2784 |
| 2785 def __str__( self ): |
| 2786 try: |
| 2787 return super(Regex,self).__str__() |
| 2788 except Exception: |
| 2789 pass |
| 2790 |
| 2791 if self.strRepr is None: |
| 2792 self.strRepr = "Re:(%s)" % repr(self.pattern) |
| 2793 |
| 2794 return self.strRepr |
| 2795 |
| 2796 |
| 2797 class QuotedString(Token): |
| 2798 r""" |
| 2799 Token for matching strings that are delimited by quoting characters. |
| 2800 |
| 2801 Defined with the following parameters: |
| 2802 - quoteChar - string of one or more characters defining the quote delimi
ting string |
| 2803 - escChar - character to escape quotes, typically backslash (default=C{N
one}) |
| 2804 - escQuote - special quote sequence to escape an embedded quote string (
such as SQL's "" to escape an embedded ") (default=C{None}) |
| 2805 - multiline - boolean indicating whether quotes can span multiple lines
(default=C{False}) |
| 2806 - unquoteResults - boolean indicating whether the matched text should be
unquoted (default=C{True}) |
| 2807 - endQuoteChar - string of one or more characters defining the end of th
e quote delimited string (default=C{None} => same as quoteChar) |
| 2808 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'
}, etc.) to actual whitespace (default=C{True}) |
| 2809 |
| 2810 Example:: |
| 2811 qs = QuotedString('"') |
| 2812 print(qs.searchString('lsjdf "This is the quote" sldjf')) |
| 2813 complex_qs = QuotedString('{{', endQuoteChar='}}') |
| 2814 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) |
| 2815 sql_qs = QuotedString('"', escQuote='""') |
| 2816 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" qu
otes" sldjf')) |
| 2817 prints:: |
| 2818 [['This is the quote']] |
| 2819 [['This is the "quote"']] |
| 2820 [['This is the quote with "embedded" quotes']] |
| 2821 """ |
| 2822 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False,
unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): |
| 2823 super(QuotedString,self).__init__() |
| 2824 |
| 2825 # remove white space from quote chars - wont work anyway |
| 2826 quoteChar = quoteChar.strip() |
| 2827 if not quoteChar: |
| 2828 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,s
tacklevel=2) |
| 2829 raise SyntaxError() |
| 2830 |
| 2831 if endQuoteChar is None: |
| 2832 endQuoteChar = quoteChar |
| 2833 else: |
| 2834 endQuoteChar = endQuoteChar.strip() |
| 2835 if not endQuoteChar: |
| 2836 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWa
rning,stacklevel=2) |
| 2837 raise SyntaxError() |
| 2838 |
| 2839 self.quoteChar = quoteChar |
| 2840 self.quoteCharLen = len(quoteChar) |
| 2841 self.firstQuoteChar = quoteChar[0] |
| 2842 self.endQuoteChar = endQuoteChar |
| 2843 self.endQuoteCharLen = len(endQuoteChar) |
| 2844 self.escChar = escChar |
| 2845 self.escQuote = escQuote |
| 2846 self.unquoteResults = unquoteResults |
| 2847 self.convertWhitespaceEscapes = convertWhitespaceEscapes |
| 2848 |
| 2849 if multiline: |
| 2850 self.flags = re.MULTILINE | re.DOTALL |
| 2851 self.pattern = r'%s(?:[^%s%s]' % \ |
| 2852 ( re.escape(self.quoteChar), |
| 2853 _escapeRegexRangeChars(self.endQuoteChar[0]), |
| 2854 (escChar is not None and _escapeRegexRangeChars(escChar) or ''
) ) |
| 2855 else: |
| 2856 self.flags = 0 |
| 2857 self.pattern = r'%s(?:[^%s\n\r%s]' % \ |
| 2858 ( re.escape(self.quoteChar), |
| 2859 _escapeRegexRangeChars(self.endQuoteChar[0]), |
| 2860 (escChar is not None and _escapeRegexRangeChars(escChar) or ''
) ) |
| 2861 if len(self.endQuoteChar) > 1: |
| 2862 self.pattern += ( |
| 2863 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:
i]), |
| 2864 _escapeRegexRangeChars(self.endQu
oteChar[i])) |
| 2865 for i in range(len(self.endQuoteChar)-1,0,-1
)) + ')' |
| 2866 ) |
| 2867 if escQuote: |
| 2868 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) |
| 2869 if escChar: |
| 2870 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) |
| 2871 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" |
| 2872 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) |
| 2873 |
| 2874 try: |
| 2875 self.re = re.compile(self.pattern, self.flags) |
| 2876 self.reString = self.pattern |
| 2877 except sre_constants.error: |
| 2878 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, |
| 2879 SyntaxWarning, stacklevel=2) |
| 2880 raise |
| 2881 |
| 2882 self.name = _ustr(self) |
| 2883 self.errmsg = "Expected " + self.name |
| 2884 self.mayIndexError = False |
| 2885 self.mayReturnEmpty = True |
| 2886 |
| 2887 def parseImpl( self, instring, loc, doActions=True ): |
| 2888 result = instring[loc] == self.firstQuoteChar and self.re.match(instring
,loc) or None |
| 2889 if not result: |
| 2890 raise ParseException(instring, loc, self.errmsg, self) |
| 2891 |
| 2892 loc = result.end() |
| 2893 ret = result.group() |
| 2894 |
| 2895 if self.unquoteResults: |
| 2896 |
| 2897 # strip off quotes |
| 2898 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] |
| 2899 |
| 2900 if isinstance(ret,basestring): |
| 2901 # replace escaped whitespace |
| 2902 if '\\' in ret and self.convertWhitespaceEscapes: |
| 2903 ws_map = { |
| 2904 r'\t' : '\t', |
| 2905 r'\n' : '\n', |
| 2906 r'\f' : '\f', |
| 2907 r'\r' : '\r', |
| 2908 } |
| 2909 for wslit,wschar in ws_map.items(): |
| 2910 ret = ret.replace(wslit, wschar) |
| 2911 |
| 2912 # replace escaped characters |
| 2913 if self.escChar: |
| 2914 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) |
| 2915 |
| 2916 # replace escaped quotes |
| 2917 if self.escQuote: |
| 2918 ret = ret.replace(self.escQuote, self.endQuoteChar) |
| 2919 |
| 2920 return loc, ret |
| 2921 |
| 2922 def __str__( self ): |
| 2923 try: |
| 2924 return super(QuotedString,self).__str__() |
| 2925 except Exception: |
| 2926 pass |
| 2927 |
| 2928 if self.strRepr is None: |
| 2929 self.strRepr = "quoted string, starting with %s ending with %s" % (s
elf.quoteChar, self.endQuoteChar) |
| 2930 |
| 2931 return self.strRepr |
| 2932 |
| 2933 |
| 2934 class CharsNotIn(Token): |
| 2935 """ |
| 2936 Token for matching words composed of characters I{not} in a given set (will |
| 2937 include whitespace in matched characters if not listed in the provided exclu
sion set - see example). |
| 2938 Defined with string containing all disallowed characters, and an optional |
| 2939 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a |
| 2940 minimum value < 1 is not valid); the default values for C{max} and C{exact} |
| 2941 are 0, meaning no maximum or exact length restriction. |
| 2942 |
| 2943 Example:: |
| 2944 # define a comma-separated-value as anything that is not a ',' |
| 2945 csv_value = CharsNotIn(',') |
| 2946 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")
) |
| 2947 prints:: |
| 2948 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] |
| 2949 """ |
| 2950 def __init__( self, notChars, min=1, max=0, exact=0 ): |
| 2951 super(CharsNotIn,self).__init__() |
| 2952 self.skipWhitespace = False |
| 2953 self.notChars = notChars |
| 2954 |
| 2955 if min < 1: |
| 2956 raise ValueError("cannot specify a minimum length < 1; use Optional(
CharsNotIn()) if zero-length char group is permitted") |
| 2957 |
| 2958 self.minLen = min |
| 2959 |
| 2960 if max > 0: |
| 2961 self.maxLen = max |
| 2962 else: |
| 2963 self.maxLen = _MAX_INT |
| 2964 |
| 2965 if exact > 0: |
| 2966 self.maxLen = exact |
| 2967 self.minLen = exact |
| 2968 |
| 2969 self.name = _ustr(self) |
| 2970 self.errmsg = "Expected " + self.name |
| 2971 self.mayReturnEmpty = ( self.minLen == 0 ) |
| 2972 self.mayIndexError = False |
| 2973 |
| 2974 def parseImpl( self, instring, loc, doActions=True ): |
| 2975 if instring[loc] in self.notChars: |
| 2976 raise ParseException(instring, loc, self.errmsg, self) |
| 2977 |
| 2978 start = loc |
| 2979 loc += 1 |
| 2980 notchars = self.notChars |
| 2981 maxlen = min( start+self.maxLen, len(instring) ) |
| 2982 while loc < maxlen and \ |
| 2983 (instring[loc] not in notchars): |
| 2984 loc += 1 |
| 2985 |
| 2986 if loc - start < self.minLen: |
| 2987 raise ParseException(instring, loc, self.errmsg, self) |
| 2988 |
| 2989 return loc, instring[start:loc] |
| 2990 |
| 2991 def __str__( self ): |
| 2992 try: |
| 2993 return super(CharsNotIn, self).__str__() |
| 2994 except Exception: |
| 2995 pass |
| 2996 |
| 2997 if self.strRepr is None: |
| 2998 if len(self.notChars) > 4: |
| 2999 self.strRepr = "!W:(%s...)" % self.notChars[:4] |
| 3000 else: |
| 3001 self.strRepr = "!W:(%s)" % self.notChars |
| 3002 |
| 3003 return self.strRepr |
| 3004 |
| 3005 class White(Token): |
| 3006 """ |
| 3007 Special matching class for matching whitespace. Normally, whitespace is ign
ored |
| 3008 by pyparsing grammars. This class is included when some whitespace structur
es |
| 3009 are significant. Define with a string containing the whitespace characters
to be |
| 3010 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, an
d C{exact} arguments, |
| 3011 as defined for the C{L{Word}} class. |
| 3012 """ |
| 3013 whiteStrs = { |
| 3014 " " : "<SPC>", |
| 3015 "\t": "<TAB>", |
| 3016 "\n": "<LF>", |
| 3017 "\r": "<CR>", |
| 3018 "\f": "<FF>", |
| 3019 } |
| 3020 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): |
| 3021 super(White,self).__init__() |
| 3022 self.matchWhite = ws |
| 3023 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in
self.matchWhite) ) |
| 3024 #~ self.leaveWhitespace() |
| 3025 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) |
| 3026 self.mayReturnEmpty = True |
| 3027 self.errmsg = "Expected " + self.name |
| 3028 |
| 3029 self.minLen = min |
| 3030 |
| 3031 if max > 0: |
| 3032 self.maxLen = max |
| 3033 else: |
| 3034 self.maxLen = _MAX_INT |
| 3035 |
| 3036 if exact > 0: |
| 3037 self.maxLen = exact |
| 3038 self.minLen = exact |
| 3039 |
| 3040 def parseImpl( self, instring, loc, doActions=True ): |
| 3041 if not(instring[ loc ] in self.matchWhite): |
| 3042 raise ParseException(instring, loc, self.errmsg, self) |
| 3043 start = loc |
| 3044 loc += 1 |
| 3045 maxloc = start + self.maxLen |
| 3046 maxloc = min( maxloc, len(instring) ) |
| 3047 while loc < maxloc and instring[loc] in self.matchWhite: |
| 3048 loc += 1 |
| 3049 |
| 3050 if loc - start < self.minLen: |
| 3051 raise ParseException(instring, loc, self.errmsg, self) |
| 3052 |
| 3053 return loc, instring[start:loc] |
| 3054 |
| 3055 |
| 3056 class _PositionToken(Token): |
| 3057 def __init__( self ): |
| 3058 super(_PositionToken,self).__init__() |
| 3059 self.name=self.__class__.__name__ |
| 3060 self.mayReturnEmpty = True |
| 3061 self.mayIndexError = False |
| 3062 |
| 3063 class GoToColumn(_PositionToken): |
| 3064 """ |
| 3065 Token to advance to a specific column of input text; useful for tabular repo
rt scraping. |
| 3066 """ |
| 3067 def __init__( self, colno ): |
| 3068 super(GoToColumn,self).__init__() |
| 3069 self.col = colno |
| 3070 |
| 3071 def preParse( self, instring, loc ): |
| 3072 if col(loc,instring) != self.col: |
| 3073 instrlen = len(instring) |
| 3074 if self.ignoreExprs: |
| 3075 loc = self._skipIgnorables( instring, loc ) |
| 3076 while loc < instrlen and instring[loc].isspace() and col( loc, instr
ing ) != self.col : |
| 3077 loc += 1 |
| 3078 return loc |
| 3079 |
| 3080 def parseImpl( self, instring, loc, doActions=True ): |
| 3081 thiscol = col( loc, instring ) |
| 3082 if thiscol > self.col: |
| 3083 raise ParseException( instring, loc, "Text not in expected column",
self ) |
| 3084 newloc = loc + self.col - thiscol |
| 3085 ret = instring[ loc: newloc ] |
| 3086 return newloc, ret |
| 3087 |
| 3088 |
| 3089 class LineStart(_PositionToken): |
| 3090 """ |
| 3091 Matches if current position is at the beginning of a line within the parse s
tring |
| 3092 |
| 3093 Example:: |
| 3094 |
| 3095 test = '''\ |
| 3096 AAA this line |
| 3097 AAA and this line |
| 3098 AAA but not this one |
| 3099 B AAA and definitely not this one |
| 3100 ''' |
| 3101 |
| 3102 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): |
| 3103 print(t) |
| 3104 |
| 3105 Prints:: |
| 3106 ['AAA', ' this line'] |
| 3107 ['AAA', ' and this line'] |
| 3108 |
| 3109 """ |
| 3110 def __init__( self ): |
| 3111 super(LineStart,self).__init__() |
| 3112 self.errmsg = "Expected start of line" |
| 3113 |
| 3114 def parseImpl( self, instring, loc, doActions=True ): |
| 3115 if col(loc, instring) == 1: |
| 3116 return loc, [] |
| 3117 raise ParseException(instring, loc, self.errmsg, self) |
| 3118 |
| 3119 class LineEnd(_PositionToken): |
| 3120 """ |
| 3121 Matches if current position is at the end of a line within the parse string |
| 3122 """ |
| 3123 def __init__( self ): |
| 3124 super(LineEnd,self).__init__() |
| 3125 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n",
"") ) |
| 3126 self.errmsg = "Expected end of line" |
| 3127 |
| 3128 def parseImpl( self, instring, loc, doActions=True ): |
| 3129 if loc<len(instring): |
| 3130 if instring[loc] == "\n": |
| 3131 return loc+1, "\n" |
| 3132 else: |
| 3133 raise ParseException(instring, loc, self.errmsg, self) |
| 3134 elif loc == len(instring): |
| 3135 return loc+1, [] |
| 3136 else: |
| 3137 raise ParseException(instring, loc, self.errmsg, self) |
| 3138 |
| 3139 class StringStart(_PositionToken): |
| 3140 """ |
| 3141 Matches if current position is at the beginning of the parse string |
| 3142 """ |
| 3143 def __init__( self ): |
| 3144 super(StringStart,self).__init__() |
| 3145 self.errmsg = "Expected start of text" |
| 3146 |
| 3147 def parseImpl( self, instring, loc, doActions=True ): |
| 3148 if loc != 0: |
| 3149 # see if entire string up to here is just whitespace and ignoreables |
| 3150 if loc != self.preParse( instring, 0 ): |
| 3151 raise ParseException(instring, loc, self.errmsg, self) |
| 3152 return loc, [] |
| 3153 |
| 3154 class StringEnd(_PositionToken): |
| 3155 """ |
| 3156 Matches if current position is at the end of the parse string |
| 3157 """ |
| 3158 def __init__( self ): |
| 3159 super(StringEnd,self).__init__() |
| 3160 self.errmsg = "Expected end of text" |
| 3161 |
| 3162 def parseImpl( self, instring, loc, doActions=True ): |
| 3163 if loc < len(instring): |
| 3164 raise ParseException(instring, loc, self.errmsg, self) |
| 3165 elif loc == len(instring): |
| 3166 return loc+1, [] |
| 3167 elif loc > len(instring): |
| 3168 return loc, [] |
| 3169 else: |
| 3170 raise ParseException(instring, loc, self.errmsg, self) |
| 3171 |
| 3172 class WordStart(_PositionToken): |
| 3173 """ |
| 3174 Matches if the current position is at the beginning of a Word, and |
| 3175 is not preceded by any character in a given set of C{wordChars} |
| 3176 (default=C{printables}). To emulate the C{\b} behavior of regular expression
s, |
| 3177 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning o
f |
| 3178 the string being parsed, or at the beginning of a line. |
| 3179 """ |
| 3180 def __init__(self, wordChars = printables): |
| 3181 super(WordStart,self).__init__() |
| 3182 self.wordChars = set(wordChars) |
| 3183 self.errmsg = "Not at the start of a word" |
| 3184 |
| 3185 def parseImpl(self, instring, loc, doActions=True ): |
| 3186 if loc != 0: |
| 3187 if (instring[loc-1] in self.wordChars or |
| 3188 instring[loc] not in self.wordChars): |
| 3189 raise ParseException(instring, loc, self.errmsg, self) |
| 3190 return loc, [] |
| 3191 |
| 3192 class WordEnd(_PositionToken): |
| 3193 """ |
| 3194 Matches if the current position is at the end of a Word, and |
| 3195 is not followed by any character in a given set of C{wordChars} |
| 3196 (default=C{printables}). To emulate the C{\b} behavior of regular expression
s, |
| 3197 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of |
| 3198 the string being parsed, or at the end of a line. |
| 3199 """ |
| 3200 def __init__(self, wordChars = printables): |
| 3201 super(WordEnd,self).__init__() |
| 3202 self.wordChars = set(wordChars) |
| 3203 self.skipWhitespace = False |
| 3204 self.errmsg = "Not at the end of a word" |
| 3205 |
| 3206 def parseImpl(self, instring, loc, doActions=True ): |
| 3207 instrlen = len(instring) |
| 3208 if instrlen>0 and loc<instrlen: |
| 3209 if (instring[loc] in self.wordChars or |
| 3210 instring[loc-1] not in self.wordChars): |
| 3211 raise ParseException(instring, loc, self.errmsg, self) |
| 3212 return loc, [] |
| 3213 |
| 3214 |
| 3215 class ParseExpression(ParserElement): |
| 3216 """ |
| 3217 Abstract subclass of ParserElement, for combining and post-processing parsed
tokens. |
| 3218 """ |
| 3219 def __init__( self, exprs, savelist = False ): |
| 3220 super(ParseExpression,self).__init__(savelist) |
| 3221 if isinstance( exprs, _generatorType ): |
| 3222 exprs = list(exprs) |
| 3223 |
| 3224 if isinstance( exprs, basestring ): |
| 3225 self.exprs = [ ParserElement._literalStringClass( exprs ) ] |
| 3226 elif isinstance( exprs, collections.Iterable ): |
| 3227 exprs = list(exprs) |
| 3228 # if sequence of strings provided, wrap with Literal |
| 3229 if all(isinstance(expr, basestring) for expr in exprs): |
| 3230 exprs = map(ParserElement._literalStringClass, exprs) |
| 3231 self.exprs = list(exprs) |
| 3232 else: |
| 3233 try: |
| 3234 self.exprs = list( exprs ) |
| 3235 except TypeError: |
| 3236 self.exprs = [ exprs ] |
| 3237 self.callPreparse = False |
| 3238 |
| 3239 def __getitem__( self, i ): |
| 3240 return self.exprs[i] |
| 3241 |
| 3242 def append( self, other ): |
| 3243 self.exprs.append( other ) |
| 3244 self.strRepr = None |
| 3245 return self |
| 3246 |
| 3247 def leaveWhitespace( self ): |
| 3248 """Extends C{leaveWhitespace} defined in base class, and also invokes C{
leaveWhitespace} on |
| 3249 all contained expressions.""" |
| 3250 self.skipWhitespace = False |
| 3251 self.exprs = [ e.copy() for e in self.exprs ] |
| 3252 for e in self.exprs: |
| 3253 e.leaveWhitespace() |
| 3254 return self |
| 3255 |
| 3256 def ignore( self, other ): |
| 3257 if isinstance( other, Suppress ): |
| 3258 if other not in self.ignoreExprs: |
| 3259 super( ParseExpression, self).ignore( other ) |
| 3260 for e in self.exprs: |
| 3261 e.ignore( self.ignoreExprs[-1] ) |
| 3262 else: |
| 3263 super( ParseExpression, self).ignore( other ) |
| 3264 for e in self.exprs: |
| 3265 e.ignore( self.ignoreExprs[-1] ) |
| 3266 return self |
| 3267 |
| 3268 def __str__( self ): |
| 3269 try: |
| 3270 return super(ParseExpression,self).__str__() |
| 3271 except Exception: |
| 3272 pass |
| 3273 |
| 3274 if self.strRepr is None: |
| 3275 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exp
rs) ) |
| 3276 return self.strRepr |
| 3277 |
| 3278 def streamline( self ): |
| 3279 super(ParseExpression,self).streamline() |
| 3280 |
| 3281 for e in self.exprs: |
| 3282 e.streamline() |
| 3283 |
| 3284 # collapse nested And's of the form And( And( And( a,b), c), d) to And(
a,b,c,d ) |
| 3285 # but only if there are no parse actions or resultsNames on the nested A
nd's |
| 3286 # (likewise for Or's and MatchFirst's) |
| 3287 if ( len(self.exprs) == 2 ): |
| 3288 other = self.exprs[0] |
| 3289 if ( isinstance( other, self.__class__ ) and |
| 3290 not(other.parseAction) and |
| 3291 other.resultsName is None and |
| 3292 not other.debug ): |
| 3293 self.exprs = other.exprs[:] + [ self.exprs[1] ] |
| 3294 self.strRepr = None |
| 3295 self.mayReturnEmpty |= other.mayReturnEmpty |
| 3296 self.mayIndexError |= other.mayIndexError |
| 3297 |
| 3298 other = self.exprs[-1] |
| 3299 if ( isinstance( other, self.__class__ ) and |
| 3300 not(other.parseAction) and |
| 3301 other.resultsName is None and |
| 3302 not other.debug ): |
| 3303 self.exprs = self.exprs[:-1] + other.exprs[:] |
| 3304 self.strRepr = None |
| 3305 self.mayReturnEmpty |= other.mayReturnEmpty |
| 3306 self.mayIndexError |= other.mayIndexError |
| 3307 |
| 3308 self.errmsg = "Expected " + _ustr(self) |
| 3309 |
| 3310 return self |
| 3311 |
| 3312 def setResultsName( self, name, listAllMatches=False ): |
| 3313 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) |
| 3314 return ret |
| 3315 |
| 3316 def validate( self, validateTrace=[] ): |
| 3317 tmp = validateTrace[:]+[self] |
| 3318 for e in self.exprs: |
| 3319 e.validate(tmp) |
| 3320 self.checkRecursion( [] ) |
| 3321 |
| 3322 def copy(self): |
| 3323 ret = super(ParseExpression,self).copy() |
| 3324 ret.exprs = [e.copy() for e in self.exprs] |
| 3325 return ret |
| 3326 |
| 3327 class And(ParseExpression): |
| 3328 """ |
| 3329 Requires all given C{ParseExpression}s to be found in the given order. |
| 3330 Expressions may be separated by whitespace. |
| 3331 May be constructed using the C{'+'} operator. |
| 3332 May also be constructed using the C{'-'} operator, which will suppress backt
racking. |
| 3333 |
| 3334 Example:: |
| 3335 integer = Word(nums) |
| 3336 name_expr = OneOrMore(Word(alphas)) |
| 3337 |
| 3338 expr = And([integer("id"),name_expr("name"),integer("age")]) |
| 3339 # more easily written as: |
| 3340 expr = integer("id") + name_expr("name") + integer("age") |
| 3341 """ |
| 3342 |
| 3343 class _ErrorStop(Empty): |
| 3344 def __init__(self, *args, **kwargs): |
| 3345 super(And._ErrorStop,self).__init__(*args, **kwargs) |
| 3346 self.name = '-' |
| 3347 self.leaveWhitespace() |
| 3348 |
| 3349 def __init__( self, exprs, savelist = True ): |
| 3350 super(And,self).__init__(exprs, savelist) |
| 3351 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) |
| 3352 self.setWhitespaceChars( self.exprs[0].whiteChars ) |
| 3353 self.skipWhitespace = self.exprs[0].skipWhitespace |
| 3354 self.callPreparse = True |
| 3355 |
| 3356 def parseImpl( self, instring, loc, doActions=True ): |
| 3357 # pass False as last arg to _parse for first element, since we already |
| 3358 # pre-parsed the string as part of our And pre-parsing |
| 3359 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPr
eParse=False ) |
| 3360 errorStop = False |
| 3361 for e in self.exprs[1:]: |
| 3362 if isinstance(e, And._ErrorStop): |
| 3363 errorStop = True |
| 3364 continue |
| 3365 if errorStop: |
| 3366 try: |
| 3367 loc, exprtokens = e._parse( instring, loc, doActions ) |
| 3368 except ParseSyntaxException: |
| 3369 raise |
| 3370 except ParseBaseException as pe: |
| 3371 pe.__traceback__ = None |
| 3372 raise ParseSyntaxException._from_exception(pe) |
| 3373 except IndexError: |
| 3374 raise ParseSyntaxException(instring, len(instring), self.err
msg, self) |
| 3375 else: |
| 3376 loc, exprtokens = e._parse( instring, loc, doActions ) |
| 3377 if exprtokens or exprtokens.haskeys(): |
| 3378 resultlist += exprtokens |
| 3379 return loc, resultlist |
| 3380 |
| 3381 def __iadd__(self, other ): |
| 3382 if isinstance( other, basestring ): |
| 3383 other = ParserElement._literalStringClass( other ) |
| 3384 return self.append( other ) #And( [ self, other ] ) |
| 3385 |
| 3386 def checkRecursion( self, parseElementList ): |
| 3387 subRecCheckList = parseElementList[:] + [ self ] |
| 3388 for e in self.exprs: |
| 3389 e.checkRecursion( subRecCheckList ) |
| 3390 if not e.mayReturnEmpty: |
| 3391 break |
| 3392 |
| 3393 def __str__( self ): |
| 3394 if hasattr(self,"name"): |
| 3395 return self.name |
| 3396 |
| 3397 if self.strRepr is None: |
| 3398 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" |
| 3399 |
| 3400 return self.strRepr |
| 3401 |
| 3402 |
| 3403 class Or(ParseExpression): |
| 3404 """ |
| 3405 Requires that at least one C{ParseExpression} is found. |
| 3406 If two expressions match, the expression that matches the longest string wil
l be used. |
| 3407 May be constructed using the C{'^'} operator. |
| 3408 |
| 3409 Example:: |
| 3410 # construct Or using '^' operator |
| 3411 |
| 3412 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) |
| 3413 print(number.searchString("123 3.1416 789")) |
| 3414 prints:: |
| 3415 [['123'], ['3.1416'], ['789']] |
| 3416 """ |
| 3417 def __init__( self, exprs, savelist = False ): |
| 3418 super(Or,self).__init__(exprs, savelist) |
| 3419 if self.exprs: |
| 3420 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| 3421 else: |
| 3422 self.mayReturnEmpty = True |
| 3423 |
| 3424 def parseImpl( self, instring, loc, doActions=True ): |
| 3425 maxExcLoc = -1 |
| 3426 maxException = None |
| 3427 matches = [] |
| 3428 for e in self.exprs: |
| 3429 try: |
| 3430 loc2 = e.tryParse( instring, loc ) |
| 3431 except ParseException as err: |
| 3432 err.__traceback__ = None |
| 3433 if err.loc > maxExcLoc: |
| 3434 maxException = err |
| 3435 maxExcLoc = err.loc |
| 3436 except IndexError: |
| 3437 if len(instring) > maxExcLoc: |
| 3438 maxException = ParseException(instring,len(instring),e.errms
g,self) |
| 3439 maxExcLoc = len(instring) |
| 3440 else: |
| 3441 # save match among all matches, to retry longest to shortest |
| 3442 matches.append((loc2, e)) |
| 3443 |
| 3444 if matches: |
| 3445 matches.sort(key=lambda x: -x[0]) |
| 3446 for _,e in matches: |
| 3447 try: |
| 3448 return e._parse( instring, loc, doActions ) |
| 3449 except ParseException as err: |
| 3450 err.__traceback__ = None |
| 3451 if err.loc > maxExcLoc: |
| 3452 maxException = err |
| 3453 maxExcLoc = err.loc |
| 3454 |
| 3455 if maxException is not None: |
| 3456 maxException.msg = self.errmsg |
| 3457 raise maxException |
| 3458 else: |
| 3459 raise ParseException(instring, loc, "no defined alternatives to matc
h", self) |
| 3460 |
| 3461 |
| 3462 def __ixor__(self, other ): |
| 3463 if isinstance( other, basestring ): |
| 3464 other = ParserElement._literalStringClass( other ) |
| 3465 return self.append( other ) #Or( [ self, other ] ) |
| 3466 |
| 3467 def __str__( self ): |
| 3468 if hasattr(self,"name"): |
| 3469 return self.name |
| 3470 |
| 3471 if self.strRepr is None: |
| 3472 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" |
| 3473 |
| 3474 return self.strRepr |
| 3475 |
| 3476 def checkRecursion( self, parseElementList ): |
| 3477 subRecCheckList = parseElementList[:] + [ self ] |
| 3478 for e in self.exprs: |
| 3479 e.checkRecursion( subRecCheckList ) |
| 3480 |
| 3481 |
| 3482 class MatchFirst(ParseExpression): |
| 3483 """ |
| 3484 Requires that at least one C{ParseExpression} is found. |
| 3485 If two expressions match, the first one listed is the one that will match. |
| 3486 May be constructed using the C{'|'} operator. |
| 3487 |
| 3488 Example:: |
| 3489 # construct MatchFirst using '|' operator |
| 3490 |
| 3491 # watch the order of expressions to match |
| 3492 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) |
| 3493 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3']
, ['1416'], ['789']] |
| 3494 |
| 3495 # put more selective expression first |
| 3496 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) |
| 3497 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.
1416'], ['789']] |
| 3498 """ |
| 3499 def __init__( self, exprs, savelist = False ): |
| 3500 super(MatchFirst,self).__init__(exprs, savelist) |
| 3501 if self.exprs: |
| 3502 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) |
| 3503 else: |
| 3504 self.mayReturnEmpty = True |
| 3505 |
| 3506 def parseImpl( self, instring, loc, doActions=True ): |
| 3507 maxExcLoc = -1 |
| 3508 maxException = None |
| 3509 for e in self.exprs: |
| 3510 try: |
| 3511 ret = e._parse( instring, loc, doActions ) |
| 3512 return ret |
| 3513 except ParseException as err: |
| 3514 if err.loc > maxExcLoc: |
| 3515 maxException = err |
| 3516 maxExcLoc = err.loc |
| 3517 except IndexError: |
| 3518 if len(instring) > maxExcLoc: |
| 3519 maxException = ParseException(instring,len(instring),e.errms
g,self) |
| 3520 maxExcLoc = len(instring) |
| 3521 |
| 3522 # only got here if no expression matched, raise exception for match that
made it the furthest |
| 3523 else: |
| 3524 if maxException is not None: |
| 3525 maxException.msg = self.errmsg |
| 3526 raise maxException |
| 3527 else: |
| 3528 raise ParseException(instring, loc, "no defined alternatives to
match", self) |
| 3529 |
| 3530 def __ior__(self, other ): |
| 3531 if isinstance( other, basestring ): |
| 3532 other = ParserElement._literalStringClass( other ) |
| 3533 return self.append( other ) #MatchFirst( [ self, other ] ) |
| 3534 |
| 3535 def __str__( self ): |
| 3536 if hasattr(self,"name"): |
| 3537 return self.name |
| 3538 |
| 3539 if self.strRepr is None: |
| 3540 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" |
| 3541 |
| 3542 return self.strRepr |
| 3543 |
| 3544 def checkRecursion( self, parseElementList ): |
| 3545 subRecCheckList = parseElementList[:] + [ self ] |
| 3546 for e in self.exprs: |
| 3547 e.checkRecursion( subRecCheckList ) |
| 3548 |
| 3549 |
| 3550 class Each(ParseExpression): |
| 3551 """ |
| 3552 Requires all given C{ParseExpression}s to be found, but in any order. |
| 3553 Expressions may be separated by whitespace. |
| 3554 May be constructed using the C{'&'} operator. |
| 3555 |
| 3556 Example:: |
| 3557 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") |
| 3558 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") |
| 3559 integer = Word(nums) |
| 3560 shape_attr = "shape:" + shape_type("shape") |
| 3561 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") |
| 3562 color_attr = "color:" + color("color") |
| 3563 size_attr = "size:" + integer("size") |
| 3564 |
| 3565 # use Each (using operator '&') to accept attributes in any order |
| 3566 # (shape and posn are required, color and size are optional) |
| 3567 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(si
ze_attr) |
| 3568 |
| 3569 shape_spec.runTests(''' |
| 3570 shape: SQUARE color: BLACK posn: 100, 120 |
| 3571 shape: CIRCLE size: 50 color: BLUE posn: 50,80 |
| 3572 color:GREEN size:20 shape:TRIANGLE posn:20,40 |
| 3573 ''' |
| 3574 ) |
| 3575 prints:: |
| 3576 shape: SQUARE color: BLACK posn: 100, 120 |
| 3577 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] |
| 3578 - color: BLACK |
| 3579 - posn: ['100', ',', '120'] |
| 3580 - x: 100 |
| 3581 - y: 120 |
| 3582 - shape: SQUARE |
| 3583 |
| 3584 |
| 3585 shape: CIRCLE size: 50 color: BLUE posn: 50,80 |
| 3586 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',
', '80']] |
| 3587 - color: BLUE |
| 3588 - posn: ['50', ',', '80'] |
| 3589 - x: 50 |
| 3590 - y: 80 |
| 3591 - shape: CIRCLE |
| 3592 - size: 50 |
| 3593 |
| 3594 |
| 3595 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 |
| 3596 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20',
',', '40']] |
| 3597 - color: GREEN |
| 3598 - posn: ['20', ',', '40'] |
| 3599 - x: 20 |
| 3600 - y: 40 |
| 3601 - shape: TRIANGLE |
| 3602 - size: 20 |
| 3603 """ |
| 3604 def __init__( self, exprs, savelist = True ): |
| 3605 super(Each,self).__init__(exprs, savelist) |
| 3606 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) |
| 3607 self.skipWhitespace = True |
| 3608 self.initExprGroups = True |
| 3609 |
| 3610 def parseImpl( self, instring, loc, doActions=True ): |
| 3611 if self.initExprGroups: |
| 3612 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance
(e,Optional)) |
| 3613 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] |
| 3614 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstanc
e(e,Optional)] |
| 3615 self.optionals = opt1 + opt2 |
| 3616 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,Z
eroOrMore) ] |
| 3617 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,On
eOrMore) ] |
| 3618 self.required = [ e for e in self.exprs if not isinstance(e,(Optiona
l,ZeroOrMore,OneOrMore)) ] |
| 3619 self.required += self.multirequired |
| 3620 self.initExprGroups = False |
| 3621 tmpLoc = loc |
| 3622 tmpReqd = self.required[:] |
| 3623 tmpOpt = self.optionals[:] |
| 3624 matchOrder = [] |
| 3625 |
| 3626 keepMatching = True |
| 3627 while keepMatching: |
| 3628 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequir
ed |
| 3629 failed = [] |
| 3630 for e in tmpExprs: |
| 3631 try: |
| 3632 tmpLoc = e.tryParse( instring, tmpLoc ) |
| 3633 except ParseException: |
| 3634 failed.append(e) |
| 3635 else: |
| 3636 matchOrder.append(self.opt1map.get(id(e),e)) |
| 3637 if e in tmpReqd: |
| 3638 tmpReqd.remove(e) |
| 3639 elif e in tmpOpt: |
| 3640 tmpOpt.remove(e) |
| 3641 if len(failed) == len(tmpExprs): |
| 3642 keepMatching = False |
| 3643 |
| 3644 if tmpReqd: |
| 3645 missing = ", ".join(_ustr(e) for e in tmpReqd) |
| 3646 raise ParseException(instring,loc,"Missing one or more required elem
ents (%s)" % missing ) |
| 3647 |
| 3648 # add any unmatched Optionals, in case they have default values defined |
| 3649 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.exp
r in tmpOpt] |
| 3650 |
| 3651 resultlist = [] |
| 3652 for e in matchOrder: |
| 3653 loc,results = e._parse(instring,loc,doActions) |
| 3654 resultlist.append(results) |
| 3655 |
| 3656 finalResults = sum(resultlist, ParseResults([])) |
| 3657 return loc, finalResults |
| 3658 |
| 3659 def __str__( self ): |
| 3660 if hasattr(self,"name"): |
| 3661 return self.name |
| 3662 |
| 3663 if self.strRepr is None: |
| 3664 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" |
| 3665 |
| 3666 return self.strRepr |
| 3667 |
| 3668 def checkRecursion( self, parseElementList ): |
| 3669 subRecCheckList = parseElementList[:] + [ self ] |
| 3670 for e in self.exprs: |
| 3671 e.checkRecursion( subRecCheckList ) |
| 3672 |
| 3673 |
| 3674 class ParseElementEnhance(ParserElement): |
| 3675 """ |
| 3676 Abstract subclass of C{ParserElement}, for combining and post-processing par
sed tokens. |
| 3677 """ |
| 3678 def __init__( self, expr, savelist=False ): |
| 3679 super(ParseElementEnhance,self).__init__(savelist) |
| 3680 if isinstance( expr, basestring ): |
| 3681 if issubclass(ParserElement._literalStringClass, Token): |
| 3682 expr = ParserElement._literalStringClass(expr) |
| 3683 else: |
| 3684 expr = ParserElement._literalStringClass(Literal(expr)) |
| 3685 self.expr = expr |
| 3686 self.strRepr = None |
| 3687 if expr is not None: |
| 3688 self.mayIndexError = expr.mayIndexError |
| 3689 self.mayReturnEmpty = expr.mayReturnEmpty |
| 3690 self.setWhitespaceChars( expr.whiteChars ) |
| 3691 self.skipWhitespace = expr.skipWhitespace |
| 3692 self.saveAsList = expr.saveAsList |
| 3693 self.callPreparse = expr.callPreparse |
| 3694 self.ignoreExprs.extend(expr.ignoreExprs) |
| 3695 |
| 3696 def parseImpl( self, instring, loc, doActions=True ): |
| 3697 if self.expr is not None: |
| 3698 return self.expr._parse( instring, loc, doActions, callPreParse=Fals
e ) |
| 3699 else: |
| 3700 raise ParseException("",loc,self.errmsg,self) |
| 3701 |
| 3702 def leaveWhitespace( self ): |
| 3703 self.skipWhitespace = False |
| 3704 self.expr = self.expr.copy() |
| 3705 if self.expr is not None: |
| 3706 self.expr.leaveWhitespace() |
| 3707 return self |
| 3708 |
| 3709 def ignore( self, other ): |
| 3710 if isinstance( other, Suppress ): |
| 3711 if other not in self.ignoreExprs: |
| 3712 super( ParseElementEnhance, self).ignore( other ) |
| 3713 if self.expr is not None: |
| 3714 self.expr.ignore( self.ignoreExprs[-1] ) |
| 3715 else: |
| 3716 super( ParseElementEnhance, self).ignore( other ) |
| 3717 if self.expr is not None: |
| 3718 self.expr.ignore( self.ignoreExprs[-1] ) |
| 3719 return self |
| 3720 |
| 3721 def streamline( self ): |
| 3722 super(ParseElementEnhance,self).streamline() |
| 3723 if self.expr is not None: |
| 3724 self.expr.streamline() |
| 3725 return self |
| 3726 |
| 3727 def checkRecursion( self, parseElementList ): |
| 3728 if self in parseElementList: |
| 3729 raise RecursiveGrammarException( parseElementList+[self] ) |
| 3730 subRecCheckList = parseElementList[:] + [ self ] |
| 3731 if self.expr is not None: |
| 3732 self.expr.checkRecursion( subRecCheckList ) |
| 3733 |
| 3734 def validate( self, validateTrace=[] ): |
| 3735 tmp = validateTrace[:]+[self] |
| 3736 if self.expr is not None: |
| 3737 self.expr.validate(tmp) |
| 3738 self.checkRecursion( [] ) |
| 3739 |
| 3740 def __str__( self ): |
| 3741 try: |
| 3742 return super(ParseElementEnhance,self).__str__() |
| 3743 except Exception: |
| 3744 pass |
| 3745 |
| 3746 if self.strRepr is None and self.expr is not None: |
| 3747 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exp
r) ) |
| 3748 return self.strRepr |
| 3749 |
| 3750 |
| 3751 class FollowedBy(ParseElementEnhance): |
| 3752 """ |
| 3753 Lookahead matching of the given parse expression. C{FollowedBy} |
| 3754 does I{not} advance the parsing position within the input string, it only |
| 3755 verifies that the specified parse expression matches at the current |
| 3756 position. C{FollowedBy} always returns a null token list. |
| 3757 |
| 3758 Example:: |
| 3759 # use FollowedBy to match a label only if it is followed by a ':' |
| 3760 data_word = Word(alphas) |
| 3761 label = data_word + FollowedBy(':') |
| 3762 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=la
bel).setParseAction(' '.join)) |
| 3763 |
| 3764 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper
left").pprint() |
| 3765 prints:: |
| 3766 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] |
| 3767 """ |
| 3768 def __init__( self, expr ): |
| 3769 super(FollowedBy,self).__init__(expr) |
| 3770 self.mayReturnEmpty = True |
| 3771 |
| 3772 def parseImpl( self, instring, loc, doActions=True ): |
| 3773 self.expr.tryParse( instring, loc ) |
| 3774 return loc, [] |
| 3775 |
| 3776 |
| 3777 class NotAny(ParseElementEnhance): |
| 3778 """ |
| 3779 Lookahead to disallow matching with the given parse expression. C{NotAny} |
| 3780 does I{not} advance the parsing position within the input string, it only |
| 3781 verifies that the specified parse expression does I{not} match at the curren
t |
| 3782 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAn
y} |
| 3783 always returns a null token list. May be constructed using the '~' operator
. |
| 3784 |
| 3785 Example:: |
| 3786 |
| 3787 """ |
| 3788 def __init__( self, expr ): |
| 3789 super(NotAny,self).__init__(expr) |
| 3790 #~ self.leaveWhitespace() |
| 3791 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't
want to propagate to exprs |
| 3792 self.mayReturnEmpty = True |
| 3793 self.errmsg = "Found unwanted token, "+_ustr(self.expr) |
| 3794 |
| 3795 def parseImpl( self, instring, loc, doActions=True ): |
| 3796 if self.expr.canParseNext(instring, loc): |
| 3797 raise ParseException(instring, loc, self.errmsg, self) |
| 3798 return loc, [] |
| 3799 |
| 3800 def __str__( self ): |
| 3801 if hasattr(self,"name"): |
| 3802 return self.name |
| 3803 |
| 3804 if self.strRepr is None: |
| 3805 self.strRepr = "~{" + _ustr(self.expr) + "}" |
| 3806 |
| 3807 return self.strRepr |
| 3808 |
| 3809 class _MultipleMatch(ParseElementEnhance): |
| 3810 def __init__( self, expr, stopOn=None): |
| 3811 super(_MultipleMatch, self).__init__(expr) |
| 3812 self.saveAsList = True |
| 3813 ender = stopOn |
| 3814 if isinstance(ender, basestring): |
| 3815 ender = ParserElement._literalStringClass(ender) |
| 3816 self.not_ender = ~ender if ender is not None else None |
| 3817 |
| 3818 def parseImpl( self, instring, loc, doActions=True ): |
| 3819 self_expr_parse = self.expr._parse |
| 3820 self_skip_ignorables = self._skipIgnorables |
| 3821 check_ender = self.not_ender is not None |
| 3822 if check_ender: |
| 3823 try_not_ender = self.not_ender.tryParse |
| 3824 |
| 3825 # must be at least one (but first see if we are the stopOn sentinel; |
| 3826 # if so, fail) |
| 3827 if check_ender: |
| 3828 try_not_ender(instring, loc) |
| 3829 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=Fa
lse ) |
| 3830 try: |
| 3831 hasIgnoreExprs = (not not self.ignoreExprs) |
| 3832 while 1: |
| 3833 if check_ender: |
| 3834 try_not_ender(instring, loc) |
| 3835 if hasIgnoreExprs: |
| 3836 preloc = self_skip_ignorables( instring, loc ) |
| 3837 else: |
| 3838 preloc = loc |
| 3839 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) |
| 3840 if tmptokens or tmptokens.haskeys(): |
| 3841 tokens += tmptokens |
| 3842 except (ParseException,IndexError): |
| 3843 pass |
| 3844 |
| 3845 return loc, tokens |
| 3846 |
| 3847 class OneOrMore(_MultipleMatch): |
| 3848 """ |
| 3849 Repetition of one or more of the given expression. |
| 3850 |
| 3851 Parameters: |
| 3852 - expr - expression that must match one or more times |
| 3853 - stopOn - (default=C{None}) - expression for a terminating sentinel |
| 3854 (only required if the sentinel would ordinarily match the repetition |
| 3855 expression) |
| 3856 |
| 3857 Example:: |
| 3858 data_word = Word(alphas) |
| 3859 label = data_word + FollowedBy(':') |
| 3860 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseA
ction(' '.join)) |
| 3861 |
| 3862 text = "shape: SQUARE posn: upper left color: BLACK" |
| 3863 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as
data instead of next label -> [['shape', 'SQUARE color']] |
| 3864 |
| 3865 # use stopOn attribute for OneOrMore to avoid reading label string as pa
rt of the data |
| 3866 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=la
bel).setParseAction(' '.join)) |
| 3867 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', '
SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] |
| 3868 |
| 3869 # could also be written as |
| 3870 (attr_expr * (1,)).parseString(text).pprint() |
| 3871 """ |
| 3872 |
| 3873 def __str__( self ): |
| 3874 if hasattr(self,"name"): |
| 3875 return self.name |
| 3876 |
| 3877 if self.strRepr is None: |
| 3878 self.strRepr = "{" + _ustr(self.expr) + "}..." |
| 3879 |
| 3880 return self.strRepr |
| 3881 |
| 3882 class ZeroOrMore(_MultipleMatch): |
| 3883 """ |
| 3884 Optional repetition of zero or more of the given expression. |
| 3885 |
| 3886 Parameters: |
| 3887 - expr - expression that must match zero or more times |
| 3888 - stopOn - (default=C{None}) - expression for a terminating sentinel |
| 3889 (only required if the sentinel would ordinarily match the repetition |
| 3890 expression) |
| 3891 |
| 3892 Example: similar to L{OneOrMore} |
| 3893 """ |
| 3894 def __init__( self, expr, stopOn=None): |
| 3895 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) |
| 3896 self.mayReturnEmpty = True |
| 3897 |
| 3898 def parseImpl( self, instring, loc, doActions=True ): |
| 3899 try: |
| 3900 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) |
| 3901 except (ParseException,IndexError): |
| 3902 return loc, [] |
| 3903 |
| 3904 def __str__( self ): |
| 3905 if hasattr(self,"name"): |
| 3906 return self.name |
| 3907 |
| 3908 if self.strRepr is None: |
| 3909 self.strRepr = "[" + _ustr(self.expr) + "]..." |
| 3910 |
| 3911 return self.strRepr |
| 3912 |
| 3913 class _NullToken(object): |
| 3914 def __bool__(self): |
| 3915 return False |
| 3916 __nonzero__ = __bool__ |
| 3917 def __str__(self): |
| 3918 return "" |
| 3919 |
| 3920 _optionalNotMatched = _NullToken() |
| 3921 class Optional(ParseElementEnhance): |
| 3922 """ |
| 3923 Optional matching of the given expression. |
| 3924 |
| 3925 Parameters: |
| 3926 - expr - expression that must match zero or more times |
| 3927 - default (optional) - value to be returned if the optional expression is n
ot found. |
| 3928 |
| 3929 Example:: |
| 3930 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier |
| 3931 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) |
| 3932 zip.runTests(''' |
| 3933 # traditional ZIP code |
| 3934 12345 |
| 3935 |
| 3936 # ZIP+4 form |
| 3937 12101-0001 |
| 3938 |
| 3939 # invalid ZIP |
| 3940 98765- |
| 3941 ''') |
| 3942 prints:: |
| 3943 # traditional ZIP code |
| 3944 12345 |
| 3945 ['12345'] |
| 3946 |
| 3947 # ZIP+4 form |
| 3948 12101-0001 |
| 3949 ['12101-0001'] |
| 3950 |
| 3951 # invalid ZIP |
| 3952 98765- |
| 3953 ^ |
| 3954 FAIL: Expected end of text (at char 5), (line:1, col:6) |
| 3955 """ |
| 3956 def __init__( self, expr, default=_optionalNotMatched ): |
| 3957 super(Optional,self).__init__( expr, savelist=False ) |
| 3958 self.saveAsList = self.expr.saveAsList |
| 3959 self.defaultValue = default |
| 3960 self.mayReturnEmpty = True |
| 3961 |
| 3962 def parseImpl( self, instring, loc, doActions=True ): |
| 3963 try: |
| 3964 loc, tokens = self.expr._parse( instring, loc, doActions, callPrePar
se=False ) |
| 3965 except (ParseException,IndexError): |
| 3966 if self.defaultValue is not _optionalNotMatched: |
| 3967 if self.expr.resultsName: |
| 3968 tokens = ParseResults([ self.defaultValue ]) |
| 3969 tokens[self.expr.resultsName] = self.defaultValue |
| 3970 else: |
| 3971 tokens = [ self.defaultValue ] |
| 3972 else: |
| 3973 tokens = [] |
| 3974 return loc, tokens |
| 3975 |
| 3976 def __str__( self ): |
| 3977 if hasattr(self,"name"): |
| 3978 return self.name |
| 3979 |
| 3980 if self.strRepr is None: |
| 3981 self.strRepr = "[" + _ustr(self.expr) + "]" |
| 3982 |
| 3983 return self.strRepr |
| 3984 |
| 3985 class SkipTo(ParseElementEnhance): |
| 3986 """ |
| 3987 Token for skipping over all undefined text until the matched expression is f
ound. |
| 3988 |
| 3989 Parameters: |
| 3990 - expr - target expression marking the end of the data to be skipped |
| 3991 - include - (default=C{False}) if True, the target expression is also parse
d |
| 3992 (the skipped text and target expression are returned as a 2-element li
st). |
| 3993 - ignore - (default=C{None}) used to define grammars (typically quoted stri
ngs and |
| 3994 comments) that might contain false matches to the target expression |
| 3995 - failOn - (default=C{None}) define expressions that are not allowed to be |
| 3996 included in the skipped test; if found before the target expression is
found, |
| 3997 the SkipTo is not a match |
| 3998 |
| 3999 Example:: |
| 4000 report = ''' |
| 4001 Outstanding Issues Report - 1 Jan 2000 |
| 4002 |
| 4003 # | Severity | Description | Days
Open |
| 4004 -----+----------+-------------------------------------------+-------
---- |
| 4005 101 | Critical | Intermittent system crash |
6 |
| 4006 94 | Cosmetic | Spelling error on Login ('log|n') |
14 |
| 4007 79 | Minor | System slow when running too many reports |
47 |
| 4008 ''' |
| 4009 integer = Word(nums) |
| 4010 SEP = Suppress('|') |
| 4011 # use SkipTo to simply match everything up until the next SEP |
| 4012 # - ignore quoted strings, so that a '|' character inside a quoted strin
g does not match |
| 4013 # - parse action will call token.strip() for each matched token, i.e., t
he description body |
| 4014 string_data = SkipTo(SEP, ignore=quotedString) |
| 4015 string_data.setParseAction(tokenMap(str.strip)) |
| 4016 ticket_expr = (integer("issue_num") + SEP |
| 4017 + string_data("sev") + SEP |
| 4018 + string_data("desc") + SEP |
| 4019 + integer("days_open")) |
| 4020 |
| 4021 for tkt in ticket_expr.searchString(report): |
| 4022 print tkt.dump() |
| 4023 prints:: |
| 4024 ['101', 'Critical', 'Intermittent system crash', '6'] |
| 4025 - days_open: 6 |
| 4026 - desc: Intermittent system crash |
| 4027 - issue_num: 101 |
| 4028 - sev: Critical |
| 4029 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] |
| 4030 - days_open: 14 |
| 4031 - desc: Spelling error on Login ('log|n') |
| 4032 - issue_num: 94 |
| 4033 - sev: Cosmetic |
| 4034 ['79', 'Minor', 'System slow when running too many reports', '47'] |
| 4035 - days_open: 47 |
| 4036 - desc: System slow when running too many reports |
| 4037 - issue_num: 79 |
| 4038 - sev: Minor |
| 4039 """ |
| 4040 def __init__( self, other, include=False, ignore=None, failOn=None ): |
| 4041 super( SkipTo, self ).__init__( other ) |
| 4042 self.ignoreExpr = ignore |
| 4043 self.mayReturnEmpty = True |
| 4044 self.mayIndexError = False |
| 4045 self.includeMatch = include |
| 4046 self.asList = False |
| 4047 if isinstance(failOn, basestring): |
| 4048 self.failOn = ParserElement._literalStringClass(failOn) |
| 4049 else: |
| 4050 self.failOn = failOn |
| 4051 self.errmsg = "No match found for "+_ustr(self.expr) |
| 4052 |
| 4053 def parseImpl( self, instring, loc, doActions=True ): |
| 4054 startloc = loc |
| 4055 instrlen = len(instring) |
| 4056 expr = self.expr |
| 4057 expr_parse = self.expr._parse |
| 4058 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is no
t None else None |
| 4059 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr i
s not None else None |
| 4060 |
| 4061 tmploc = loc |
| 4062 while tmploc <= instrlen: |
| 4063 if self_failOn_canParseNext is not None: |
| 4064 # break if failOn expression matches |
| 4065 if self_failOn_canParseNext(instring, tmploc): |
| 4066 break |
| 4067 |
| 4068 if self_ignoreExpr_tryParse is not None: |
| 4069 # advance past ignore expressions |
| 4070 while 1: |
| 4071 try: |
| 4072 tmploc = self_ignoreExpr_tryParse(instring, tmploc) |
| 4073 except ParseBaseException: |
| 4074 break |
| 4075 |
| 4076 try: |
| 4077 expr_parse(instring, tmploc, doActions=False, callPreParse=False
) |
| 4078 except (ParseException, IndexError): |
| 4079 # no match, advance loc in string |
| 4080 tmploc += 1 |
| 4081 else: |
| 4082 # matched skipto expr, done |
| 4083 break |
| 4084 |
| 4085 else: |
| 4086 # ran off the end of the input string without matching skipto expr,
fail |
| 4087 raise ParseException(instring, loc, self.errmsg, self) |
| 4088 |
| 4089 # build up return values |
| 4090 loc = tmploc |
| 4091 skiptext = instring[startloc:loc] |
| 4092 skipresult = ParseResults(skiptext) |
| 4093 |
| 4094 if self.includeMatch: |
| 4095 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) |
| 4096 skipresult += mat |
| 4097 |
| 4098 return loc, skipresult |
| 4099 |
| 4100 class Forward(ParseElementEnhance): |
| 4101 """ |
| 4102 Forward declaration of an expression to be defined later - |
| 4103 used for recursive grammars, such as algebraic infix notation. |
| 4104 When the expression is known, it is assigned to the C{Forward} variable usin
g the '<<' operator. |
| 4105 |
| 4106 Note: take care when assigning to C{Forward} not to overlook precedence of o
perators. |
| 4107 Specifically, '|' has a lower precedence than '<<', so that:: |
| 4108 fwdExpr << a | b | c |
| 4109 will actually be evaluated as:: |
| 4110 (fwdExpr << a) | b | c |
| 4111 thereby leaving b and c out as parseable alternatives. It is recommended th
at you |
| 4112 explicitly group the values inserted into the C{Forward}:: |
| 4113 fwdExpr << (a | b | c) |
| 4114 Converting to use the '<<=' operator instead will avoid this problem. |
| 4115 |
| 4116 See L{ParseResults.pprint} for an example of a recursive parser created usin
g |
| 4117 C{Forward}. |
| 4118 """ |
| 4119 def __init__( self, other=None ): |
| 4120 super(Forward,self).__init__( other, savelist=False ) |
| 4121 |
| 4122 def __lshift__( self, other ): |
| 4123 if isinstance( other, basestring ): |
| 4124 other = ParserElement._literalStringClass(other) |
| 4125 self.expr = other |
| 4126 self.strRepr = None |
| 4127 self.mayIndexError = self.expr.mayIndexError |
| 4128 self.mayReturnEmpty = self.expr.mayReturnEmpty |
| 4129 self.setWhitespaceChars( self.expr.whiteChars ) |
| 4130 self.skipWhitespace = self.expr.skipWhitespace |
| 4131 self.saveAsList = self.expr.saveAsList |
| 4132 self.ignoreExprs.extend(self.expr.ignoreExprs) |
| 4133 return self |
| 4134 |
| 4135 def __ilshift__(self, other): |
| 4136 return self << other |
| 4137 |
| 4138 def leaveWhitespace( self ): |
| 4139 self.skipWhitespace = False |
| 4140 return self |
| 4141 |
| 4142 def streamline( self ): |
| 4143 if not self.streamlined: |
| 4144 self.streamlined = True |
| 4145 if self.expr is not None: |
| 4146 self.expr.streamline() |
| 4147 return self |
| 4148 |
| 4149 def validate( self, validateTrace=[] ): |
| 4150 if self not in validateTrace: |
| 4151 tmp = validateTrace[:]+[self] |
| 4152 if self.expr is not None: |
| 4153 self.expr.validate(tmp) |
| 4154 self.checkRecursion([]) |
| 4155 |
| 4156 def __str__( self ): |
| 4157 if hasattr(self,"name"): |
| 4158 return self.name |
| 4159 return self.__class__.__name__ + ": ..." |
| 4160 |
| 4161 # stubbed out for now - creates awful memory and perf issues |
| 4162 self._revertClass = self.__class__ |
| 4163 self.__class__ = _ForwardNoRecurse |
| 4164 try: |
| 4165 if self.expr is not None: |
| 4166 retString = _ustr(self.expr) |
| 4167 else: |
| 4168 retString = "None" |
| 4169 finally: |
| 4170 self.__class__ = self._revertClass |
| 4171 return self.__class__.__name__ + ": " + retString |
| 4172 |
| 4173 def copy(self): |
| 4174 if self.expr is not None: |
| 4175 return super(Forward,self).copy() |
| 4176 else: |
| 4177 ret = Forward() |
| 4178 ret <<= self |
| 4179 return ret |
| 4180 |
| 4181 class _ForwardNoRecurse(Forward): |
| 4182 def __str__( self ): |
| 4183 return "..." |
| 4184 |
| 4185 class TokenConverter(ParseElementEnhance): |
| 4186 """ |
| 4187 Abstract subclass of C{ParseExpression}, for converting parsed results. |
| 4188 """ |
| 4189 def __init__( self, expr, savelist=False ): |
| 4190 super(TokenConverter,self).__init__( expr )#, savelist ) |
| 4191 self.saveAsList = False |
| 4192 |
| 4193 class Combine(TokenConverter): |
| 4194 """ |
| 4195 Converter to concatenate all matching tokens to a single string. |
| 4196 By default, the matching patterns must also be contiguous in the input strin
g; |
| 4197 this can be disabled by specifying C{'adjacent=False'} in the constructor. |
| 4198 |
| 4199 Example:: |
| 4200 real = Word(nums) + '.' + Word(nums) |
| 4201 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] |
| 4202 # will also erroneously match the following |
| 4203 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] |
| 4204 |
| 4205 real = Combine(Word(nums) + '.' + Word(nums)) |
| 4206 print(real.parseString('3.1416')) # -> ['3.1416'] |
| 4207 # no match when there are internal spaces |
| 4208 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) |
| 4209 """ |
| 4210 def __init__( self, expr, joinString="", adjacent=True ): |
| 4211 super(Combine,self).__init__( expr ) |
| 4212 # suppress whitespace-stripping in contained parse expressions, but re-e
nable it on the Combine itself |
| 4213 if adjacent: |
| 4214 self.leaveWhitespace() |
| 4215 self.adjacent = adjacent |
| 4216 self.skipWhitespace = True |
| 4217 self.joinString = joinString |
| 4218 self.callPreparse = True |
| 4219 |
| 4220 def ignore( self, other ): |
| 4221 if self.adjacent: |
| 4222 ParserElement.ignore(self, other) |
| 4223 else: |
| 4224 super( Combine, self).ignore( other ) |
| 4225 return self |
| 4226 |
| 4227 def postParse( self, instring, loc, tokenlist ): |
| 4228 retToks = tokenlist.copy() |
| 4229 del retToks[:] |
| 4230 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinStrin
g)) ], modal=self.modalResults) |
| 4231 |
| 4232 if self.resultsName and retToks.haskeys(): |
| 4233 return [ retToks ] |
| 4234 else: |
| 4235 return retToks |
| 4236 |
| 4237 class Group(TokenConverter): |
| 4238 """ |
| 4239 Converter to return the matched tokens as a list - useful for returning toke
ns of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. |
| 4240 |
| 4241 Example:: |
| 4242 ident = Word(alphas) |
| 4243 num = Word(nums) |
| 4244 term = ident | num |
| 4245 func = ident + Optional(delimitedList(term)) |
| 4246 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] |
| 4247 |
| 4248 func = ident + Group(Optional(delimitedList(term))) |
| 4249 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] |
| 4250 """ |
| 4251 def __init__( self, expr ): |
| 4252 super(Group,self).__init__( expr ) |
| 4253 self.saveAsList = True |
| 4254 |
| 4255 def postParse( self, instring, loc, tokenlist ): |
| 4256 return [ tokenlist ] |
| 4257 |
| 4258 class Dict(TokenConverter): |
| 4259 """ |
| 4260 Converter to return a repetitive expression as a list, but also as a diction
ary. |
| 4261 Each element can also be referenced using the first token in the expression
as its key. |
| 4262 Useful for tabular report scraping when the first column can be used as a it
em key. |
| 4263 |
| 4264 Example:: |
| 4265 data_word = Word(alphas) |
| 4266 label = data_word + FollowedBy(':') |
| 4267 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseA
ction(' '.join)) |
| 4268 |
| 4269 text = "shape: SQUARE posn: upper left color: light blue texture: burlap
" |
| 4270 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).
setParseAction(' '.join)) |
| 4271 |
| 4272 # print attributes as plain groups |
| 4273 print(OneOrMore(attr_expr).parseString(text).dump()) |
| 4274 |
| 4275 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) -
Dict will auto-assign names |
| 4276 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) |
| 4277 print(result.dump()) |
| 4278 |
| 4279 # access named fields as dict entries, or output as dict |
| 4280 print(result['shape']) |
| 4281 print(result.asDict()) |
| 4282 prints:: |
| 4283 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'textur
e', 'burlap'] |
| 4284 |
| 4285 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], [
'texture', 'burlap']] |
| 4286 - color: light blue |
| 4287 - posn: upper left |
| 4288 - shape: SQUARE |
| 4289 - texture: burlap |
| 4290 SQUARE |
| 4291 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shap
e': 'SQUARE'} |
| 4292 See more examples at L{ParseResults} of accessing fields by results name. |
| 4293 """ |
| 4294 def __init__( self, expr ): |
| 4295 super(Dict,self).__init__( expr ) |
| 4296 self.saveAsList = True |
| 4297 |
| 4298 def postParse( self, instring, loc, tokenlist ): |
| 4299 for i,tok in enumerate(tokenlist): |
| 4300 if len(tok) == 0: |
| 4301 continue |
| 4302 ikey = tok[0] |
| 4303 if isinstance(ikey,int): |
| 4304 ikey = _ustr(tok[0]).strip() |
| 4305 if len(tok)==1: |
| 4306 tokenlist[ikey] = _ParseResultsWithOffset("",i) |
| 4307 elif len(tok)==2 and not isinstance(tok[1],ParseResults): |
| 4308 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) |
| 4309 else: |
| 4310 dictvalue = tok.copy() #ParseResults(i) |
| 4311 del dictvalue[0] |
| 4312 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and
dictvalue.haskeys()): |
| 4313 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) |
| 4314 else: |
| 4315 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) |
| 4316 |
| 4317 if self.resultsName: |
| 4318 return [ tokenlist ] |
| 4319 else: |
| 4320 return tokenlist |
| 4321 |
| 4322 |
| 4323 class Suppress(TokenConverter): |
| 4324 """ |
| 4325 Converter for ignoring the results of a parsed expression. |
| 4326 |
| 4327 Example:: |
| 4328 source = "a, b, c,d" |
| 4329 wd = Word(alphas) |
| 4330 wd_list1 = wd + ZeroOrMore(',' + wd) |
| 4331 print(wd_list1.parseString(source)) |
| 4332 |
| 4333 # often, delimiters that are useful during parsing are just in the |
| 4334 # way afterward - use Suppress to keep them out of the parsed output |
| 4335 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) |
| 4336 print(wd_list2.parseString(source)) |
| 4337 prints:: |
| 4338 ['a', ',', 'b', ',', 'c', ',', 'd'] |
| 4339 ['a', 'b', 'c', 'd'] |
| 4340 (See also L{delimitedList}.) |
| 4341 """ |
| 4342 def postParse( self, instring, loc, tokenlist ): |
| 4343 return [] |
| 4344 |
| 4345 def suppress( self ): |
| 4346 return self |
| 4347 |
| 4348 |
| 4349 class OnlyOnce(object): |
| 4350 """ |
| 4351 Wrapper for parse actions, to ensure they are only called once. |
| 4352 """ |
| 4353 def __init__(self, methodCall): |
| 4354 self.callable = _trim_arity(methodCall) |
| 4355 self.called = False |
| 4356 def __call__(self,s,l,t): |
| 4357 if not self.called: |
| 4358 results = self.callable(s,l,t) |
| 4359 self.called = True |
| 4360 return results |
| 4361 raise ParseException(s,l,"") |
| 4362 def reset(self): |
| 4363 self.called = False |
| 4364 |
| 4365 def traceParseAction(f): |
| 4366 """ |
| 4367 Decorator for debugging parse actions. |
| 4368 |
| 4369 When the parse action is called, this decorator will print C{">> entering I{
method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})"
.} |
| 4370 When the parse action completes, the decorator will print C{"<<"} followed b
y the returned value, or any exception that the parse action raised. |
| 4371 |
| 4372 Example:: |
| 4373 wd = Word(alphas) |
| 4374 |
| 4375 @traceParseAction |
| 4376 def remove_duplicate_chars(tokens): |
| 4377 return ''.join(sorted(set(''.join(tokens))) |
| 4378 |
| 4379 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) |
| 4380 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) |
| 4381 prints:: |
| 4382 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0,
(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) |
| 4383 <<leaving remove_duplicate_chars (ret: 'dfjkls') |
| 4384 ['dfjkls'] |
| 4385 """ |
| 4386 f = _trim_arity(f) |
| 4387 def z(*paArgs): |
| 4388 thisFunc = f.__name__ |
| 4389 s,l,t = paArgs[-3:] |
| 4390 if len(paArgs)>3: |
| 4391 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc |
| 4392 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line
(l,s),l,t) ) |
| 4393 try: |
| 4394 ret = f(*paArgs) |
| 4395 except Exception as exc: |
| 4396 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc)
) |
| 4397 raise |
| 4398 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) |
| 4399 return ret |
| 4400 try: |
| 4401 z.__name__ = f.__name__ |
| 4402 except AttributeError: |
| 4403 pass |
| 4404 return z |
| 4405 |
| 4406 # |
| 4407 # global helpers |
| 4408 # |
| 4409 def delimitedList( expr, delim=",", combine=False ): |
| 4410 """ |
| 4411 Helper to define a delimited list of expressions - the delimiter defaults to
','. |
| 4412 By default, the list elements and delimiters can have intervening whitespace
, and |
| 4413 comments, but this can be overridden by passing C{combine=True} in the const
ructor. |
| 4414 If C{combine} is set to C{True}, the matching tokens are returned as a singl
e token |
| 4415 string, with the delimiters included; otherwise, the matching tokens are ret
urned |
| 4416 as a list of tokens, with the delimiters suppressed. |
| 4417 |
| 4418 Example:: |
| 4419 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'c
c'] |
| 4420 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB
:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] |
| 4421 """ |
| 4422 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." |
| 4423 if combine: |
| 4424 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) |
| 4425 else: |
| 4426 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) |
| 4427 |
| 4428 def countedArray( expr, intExpr=None ): |
| 4429 """ |
| 4430 Helper to define a counted list of expressions. |
| 4431 This helper defines a pattern of the form:: |
| 4432 integer expr expr expr... |
| 4433 where the leading integer tells how many expr expressions follow. |
| 4434 The matched tokens returns the array of expr tokens as a list - the leading
count token is suppressed. |
| 4435 |
| 4436 If C{intExpr} is specified, it should be a pyparsing expression that produce
s an integer value. |
| 4437 |
| 4438 Example:: |
| 4439 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] |
| 4440 |
| 4441 # in this parser, the leading integer value is given in binary, |
| 4442 # '10' indicating that 2 values are in the array |
| 4443 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) |
| 4444 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd
ef') # -> ['ab', 'cd'] |
| 4445 """ |
| 4446 arrayExpr = Forward() |
| 4447 def countFieldParseAction(s,l,t): |
| 4448 n = t[0] |
| 4449 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) |
| 4450 return [] |
| 4451 if intExpr is None: |
| 4452 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) |
| 4453 else: |
| 4454 intExpr = intExpr.copy() |
| 4455 intExpr.setName("arrayLen") |
| 4456 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) |
| 4457 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') |
| 4458 |
| 4459 def _flatten(L): |
| 4460 ret = [] |
| 4461 for i in L: |
| 4462 if isinstance(i,list): |
| 4463 ret.extend(_flatten(i)) |
| 4464 else: |
| 4465 ret.append(i) |
| 4466 return ret |
| 4467 |
| 4468 def matchPreviousLiteral(expr): |
| 4469 """ |
| 4470 Helper to define an expression that is indirectly defined from |
| 4471 the tokens matched in a previous expression, that is, it looks |
| 4472 for a 'repeat' of a previous expression. For example:: |
| 4473 first = Word(nums) |
| 4474 second = matchPreviousLiteral(first) |
| 4475 matchExpr = first + ":" + second |
| 4476 will match C{"1:1"}, but not C{"1:2"}. Because this matches a |
| 4477 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. |
| 4478 If this is not desired, use C{matchPreviousExpr}. |
| 4479 Do I{not} use with packrat parsing enabled. |
| 4480 """ |
| 4481 rep = Forward() |
| 4482 def copyTokenToRepeater(s,l,t): |
| 4483 if t: |
| 4484 if len(t) == 1: |
| 4485 rep << t[0] |
| 4486 else: |
| 4487 # flatten t tokens |
| 4488 tflat = _flatten(t.asList()) |
| 4489 rep << And(Literal(tt) for tt in tflat) |
| 4490 else: |
| 4491 rep << Empty() |
| 4492 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) |
| 4493 rep.setName('(prev) ' + _ustr(expr)) |
| 4494 return rep |
| 4495 |
| 4496 def matchPreviousExpr(expr): |
| 4497 """ |
| 4498 Helper to define an expression that is indirectly defined from |
| 4499 the tokens matched in a previous expression, that is, it looks |
| 4500 for a 'repeat' of a previous expression. For example:: |
| 4501 first = Word(nums) |
| 4502 second = matchPreviousExpr(first) |
| 4503 matchExpr = first + ":" + second |
| 4504 will match C{"1:1"}, but not C{"1:2"}. Because this matches by |
| 4505 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; |
| 4506 the expressions are evaluated first, and then compared, so |
| 4507 C{"1"} is compared with C{"10"}. |
| 4508 Do I{not} use with packrat parsing enabled. |
| 4509 """ |
| 4510 rep = Forward() |
| 4511 e2 = expr.copy() |
| 4512 rep <<= e2 |
| 4513 def copyTokenToRepeater(s,l,t): |
| 4514 matchTokens = _flatten(t.asList()) |
| 4515 def mustMatchTheseTokens(s,l,t): |
| 4516 theseTokens = _flatten(t.asList()) |
| 4517 if theseTokens != matchTokens: |
| 4518 raise ParseException("",0,"") |
| 4519 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) |
| 4520 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) |
| 4521 rep.setName('(prev) ' + _ustr(expr)) |
| 4522 return rep |
| 4523 |
| 4524 def _escapeRegexRangeChars(s): |
| 4525 #~ escape these chars: ^-] |
| 4526 for c in r"\^-]": |
| 4527 s = s.replace(c,_bslash+c) |
| 4528 s = s.replace("\n",r"\n") |
| 4529 s = s.replace("\t",r"\t") |
| 4530 return _ustr(s) |
| 4531 |
| 4532 def oneOf( strs, caseless=False, useRegex=True ): |
| 4533 """ |
| 4534 Helper to quickly define a set of alternative Literals, and makes sure to do |
| 4535 longest-first testing when there is a conflict, regardless of the input orde
r, |
| 4536 but returns a C{L{MatchFirst}} for best performance. |
| 4537 |
| 4538 Parameters: |
| 4539 - strs - a string of space-delimited literals, or a collection of string li
terals |
| 4540 - caseless - (default=C{False}) - treat all literals as caseless |
| 4541 - useRegex - (default=C{True}) - as an optimization, will generate a Regex |
| 4542 object; otherwise, will generate a C{MatchFirst} object (if C{caseless
=True}, or |
| 4543 if creating a C{Regex} raises an exception) |
| 4544 |
| 4545 Example:: |
| 4546 comp_oper = oneOf("< = > <= >= !=") |
| 4547 var = Word(alphas) |
| 4548 number = Word(nums) |
| 4549 term = var | number |
| 4550 comparison_expr = term + comp_oper + term |
| 4551 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) |
| 4552 prints:: |
| 4553 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12
']] |
| 4554 """ |
| 4555 if caseless: |
| 4556 isequal = ( lambda a,b: a.upper() == b.upper() ) |
| 4557 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) |
| 4558 parseElementClass = CaselessLiteral |
| 4559 else: |
| 4560 isequal = ( lambda a,b: a == b ) |
| 4561 masks = ( lambda a,b: b.startswith(a) ) |
| 4562 parseElementClass = Literal |
| 4563 |
| 4564 symbols = [] |
| 4565 if isinstance(strs,basestring): |
| 4566 symbols = strs.split() |
| 4567 elif isinstance(strs, collections.Iterable): |
| 4568 symbols = list(strs) |
| 4569 else: |
| 4570 warnings.warn("Invalid argument to oneOf, expected string or iterable", |
| 4571 SyntaxWarning, stacklevel=2) |
| 4572 if not symbols: |
| 4573 return NoMatch() |
| 4574 |
| 4575 i = 0 |
| 4576 while i < len(symbols)-1: |
| 4577 cur = symbols[i] |
| 4578 for j,other in enumerate(symbols[i+1:]): |
| 4579 if ( isequal(other, cur) ): |
| 4580 del symbols[i+j+1] |
| 4581 break |
| 4582 elif ( masks(cur, other) ): |
| 4583 del symbols[i+j+1] |
| 4584 symbols.insert(i,other) |
| 4585 cur = other |
| 4586 break |
| 4587 else: |
| 4588 i += 1 |
| 4589 |
| 4590 if not caseless and useRegex: |
| 4591 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbo
ls] )) |
| 4592 try: |
| 4593 if len(symbols)==len("".join(symbols)): |
| 4594 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for s
ym in symbols) ).setName(' | '.join(symbols)) |
| 4595 else: |
| 4596 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setN
ame(' | '.join(symbols)) |
| 4597 except Exception: |
| 4598 warnings.warn("Exception creating Regex for oneOf, building MatchFir
st", |
| 4599 SyntaxWarning, stacklevel=2) |
| 4600 |
| 4601 |
| 4602 # last resort, just use MatchFirst |
| 4603 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.j
oin(symbols)) |
| 4604 |
| 4605 def dictOf( key, value ): |
| 4606 """ |
| 4607 Helper to easily and clearly define a dictionary by specifying the respectiv
e patterns |
| 4608 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMor
e}}, and C{L{Group}} tokens |
| 4609 in the proper order. The key pattern can include delimiting markers or punc
tuation, |
| 4610 as long as they are suppressed, thereby leaving the significant key text. T
he value |
| 4611 pattern can include named results, so that the C{Dict} results can include n
amed token |
| 4612 fields. |
| 4613 |
| 4614 Example:: |
| 4615 text = "shape: SQUARE posn: upper left color: light blue texture: burlap
" |
| 4616 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).
setParseAction(' '.join)) |
| 4617 print(OneOrMore(attr_expr).parseString(text).dump()) |
| 4618 |
| 4619 attr_label = label |
| 4620 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParse
Action(' '.join) |
| 4621 |
| 4622 # similar to Dict, but simpler call format |
| 4623 result = dictOf(attr_label, attr_value).parseString(text) |
| 4624 print(result.dump()) |
| 4625 print(result['shape']) |
| 4626 print(result.shape) # object attribute access works too |
| 4627 print(result.asDict()) |
| 4628 prints:: |
| 4629 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], [
'texture', 'burlap']] |
| 4630 - color: light blue |
| 4631 - posn: upper left |
| 4632 - shape: SQUARE |
| 4633 - texture: burlap |
| 4634 SQUARE |
| 4635 SQUARE |
| 4636 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'textur
e': 'burlap'} |
| 4637 """ |
| 4638 return Dict( ZeroOrMore( Group ( key + value ) ) ) |
| 4639 |
| 4640 def originalTextFor(expr, asString=True): |
| 4641 """ |
| 4642 Helper to return the original, untokenized text for a given expression. Use
ful to |
| 4643 restore the parsed fields of an HTML start tag into the raw tag text itself,
or to |
| 4644 revert separate tokens with intervening whitespace back to the original matc
hing |
| 4645 input text. By default, returns astring containing the original parsed text.
|
| 4646 |
| 4647 If the optional C{asString} argument is passed as C{False}, then the return
value is a |
| 4648 C{L{ParseResults}} containing any results names that were originally matched
, and a |
| 4649 single token containing the original matched text from the input string. So
if |
| 4650 the expression passed to C{L{originalTextFor}} contains expressions with def
ined |
| 4651 results names, you must set C{asString} to C{False} if you want to preserve
those |
| 4652 results name values. |
| 4653 |
| 4654 Example:: |
| 4655 src = "this is test <b> bold <i>text</i> </b> normal text " |
| 4656 for tag in ("b","i"): |
| 4657 opener,closer = makeHTMLTags(tag) |
| 4658 patt = originalTextFor(opener + SkipTo(closer) + closer) |
| 4659 print(patt.searchString(src)[0]) |
| 4660 prints:: |
| 4661 ['<b> bold <i>text</i> </b>'] |
| 4662 ['<i>text</i>'] |
| 4663 """ |
| 4664 locMarker = Empty().setParseAction(lambda s,loc,t: loc) |
| 4665 endlocMarker = locMarker.copy() |
| 4666 endlocMarker.callPreparse = False |
| 4667 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_en
d") |
| 4668 if asString: |
| 4669 extractText = lambda s,l,t: s[t._original_start:t._original_end] |
| 4670 else: |
| 4671 def extractText(s,l,t): |
| 4672 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] |
| 4673 matchExpr.setParseAction(extractText) |
| 4674 matchExpr.ignoreExprs = expr.ignoreExprs |
| 4675 return matchExpr |
| 4676 |
| 4677 def ungroup(expr): |
| 4678 """ |
| 4679 Helper to undo pyparsing's default grouping of And expressions, even |
| 4680 if all but one are non-empty. |
| 4681 """ |
| 4682 return TokenConverter(expr).setParseAction(lambda t:t[0]) |
| 4683 |
| 4684 def locatedExpr(expr): |
| 4685 """ |
| 4686 Helper to decorate a returned token with its starting and ending locations i
n the input string. |
| 4687 This helper adds the following results names: |
| 4688 - locn_start = location where matched expression begins |
| 4689 - locn_end = location where matched expression ends |
| 4690 - value = the actual parsed results |
| 4691 |
| 4692 Be careful if the input text contains C{<TAB>} characters, you may want to c
all |
| 4693 C{L{ParserElement.parseWithTabs}} |
| 4694 |
| 4695 Example:: |
| 4696 wd = Word(alphas) |
| 4697 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222")
: |
| 4698 print(match) |
| 4699 prints:: |
| 4700 [[0, 'ljsdf', 5]] |
| 4701 [[8, 'lksdjjf', 15]] |
| 4702 [[18, 'lkkjj', 23]] |
| 4703 """ |
| 4704 locator = Empty().setParseAction(lambda s,l,t: l) |
| 4705 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhi
tespace()("locn_end")) |
| 4706 |
| 4707 |
| 4708 # convenience constants for positional expressions |
| 4709 empty = Empty().setName("empty") |
| 4710 lineStart = LineStart().setName("lineStart") |
| 4711 lineEnd = LineEnd().setName("lineEnd") |
| 4712 stringStart = StringStart().setName("stringStart") |
| 4713 stringEnd = StringEnd().setName("stringEnd") |
| 4714 |
| 4715 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda
s,l,t:t[0][1]) |
| 4716 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:uni
chr(int(t[0].lstrip(r'\0x'),16))) |
| 4717 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0
][1:],8))) |
| 4718 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables
, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) |
| 4719 _charRange = Group(_singleChar + Suppress("-") + _singleChar) |
| 4720 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(
OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" |
| 4721 |
| 4722 def srange(s): |
| 4723 r""" |
| 4724 Helper to easily define string ranges for use in Word construction. Borrows |
| 4725 syntax from regexp '[]' string range definitions:: |
| 4726 srange("[0-9]") -> "0123456789" |
| 4727 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" |
| 4728 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" |
| 4729 The input string must be enclosed in []'s, and the returned string is the ex
panded |
| 4730 character set joined into a single string. |
| 4731 The values enclosed in the []'s may be: |
| 4732 - a single character |
| 4733 - an escaped character with a leading backslash (such as C{\-} or C{\]}) |
| 4734 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!
'} character) |
| 4735 (C{\0x##} is also supported for backwards compatibility) |
| 4736 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{
'!'} character) |
| 4737 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) |
| 4738 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) |
| 4739 """ |
| 4740 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unich
r(c) for c in range(ord(p[0]),ord(p[1])+1)) |
| 4741 try: |
| 4742 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s)
.body) |
| 4743 except Exception: |
| 4744 return "" |
| 4745 |
| 4746 def matchOnlyAtCol(n): |
| 4747 """ |
| 4748 Helper method for defining parse actions that require matching at a specific |
| 4749 column in the input text. |
| 4750 """ |
| 4751 def verifyCol(strg,locn,toks): |
| 4752 if col(locn,strg) != n: |
| 4753 raise ParseException(strg,locn,"matched token not at column %d" % n) |
| 4754 return verifyCol |
| 4755 |
| 4756 def replaceWith(replStr): |
| 4757 """ |
| 4758 Helper method for common parse actions that simply return a literal value.
Especially |
| 4759 useful when used with C{L{transformString<ParserElement.transformString>}()}
. |
| 4760 |
| 4761 Example:: |
| 4762 num = Word(nums).setParseAction(lambda toks: int(toks[0])) |
| 4763 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) |
| 4764 term = na | num |
| 4765 |
| 4766 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] |
| 4767 """ |
| 4768 return lambda s,l,t: [replStr] |
| 4769 |
| 4770 def removeQuotes(s,l,t): |
| 4771 """ |
| 4772 Helper parse action for removing quotation marks from parsed quoted strings. |
| 4773 |
| 4774 Example:: |
| 4775 # by default, quotation marks are included in parsed results |
| 4776 quotedString.parseString("'Now is the Winter of our Discontent'") # -> [
"'Now is the Winter of our Discontent'"] |
| 4777 |
| 4778 # use removeQuotes to strip quotation marks from parsed results |
| 4779 quotedString.setParseAction(removeQuotes) |
| 4780 quotedString.parseString("'Now is the Winter of our Discontent'") # -> [
"Now is the Winter of our Discontent"] |
| 4781 """ |
| 4782 return t[0][1:-1] |
| 4783 |
| 4784 def tokenMap(func, *args): |
| 4785 """ |
| 4786 Helper to define a parse action by mapping a function to all elements of a P
arseResults list.If any additional |
| 4787 args are passed, they are forwarded to the given function as additional argu
ments after |
| 4788 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int,
16))}, which will convert the |
| 4789 parsed data to an integer using base 16. |
| 4790 |
| 4791 Example (compare the last to example in L{ParserElement.transformString}:: |
| 4792 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) |
| 4793 hex_ints.runTests(''' |
| 4794 00 11 22 aa FF 0a 0d 1a |
| 4795 ''') |
| 4796 |
| 4797 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) |
| 4798 OneOrMore(upperword).runTests(''' |
| 4799 my kingdom for a horse |
| 4800 ''') |
| 4801 |
| 4802 wd = Word(alphas).setParseAction(tokenMap(str.title)) |
| 4803 OneOrMore(wd).setParseAction(' '.join).runTests(''' |
| 4804 now is the winter of our discontent made glorious summer by this sun
of york |
| 4805 ''') |
| 4806 prints:: |
| 4807 00 11 22 aa FF 0a 0d 1a |
| 4808 [0, 17, 34, 170, 255, 10, 13, 26] |
| 4809 |
| 4810 my kingdom for a horse |
| 4811 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] |
| 4812 |
| 4813 now is the winter of our discontent made glorious summer by this sun of
york |
| 4814 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun O
f York'] |
| 4815 """ |
| 4816 def pa(s,l,t): |
| 4817 return [func(tokn, *args) for tokn in t] |
| 4818 |
| 4819 try: |
| 4820 func_name = getattr(func, '__name__', |
| 4821 getattr(func, '__class__').__name__) |
| 4822 except Exception: |
| 4823 func_name = str(func) |
| 4824 pa.__name__ = func_name |
| 4825 |
| 4826 return pa |
| 4827 |
| 4828 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) |
| 4829 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated
in favor of L{pyparsing_common.upcaseTokens}""" |
| 4830 |
| 4831 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) |
| 4832 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated
in favor of L{pyparsing_common.downcaseTokens}""" |
| 4833 |
| 4834 def _makeTags(tagStr, xml): |
| 4835 """Internal helper to construct opening and closing tag expressions, given a
tag name""" |
| 4836 if isinstance(tagStr,basestring): |
| 4837 resname = tagStr |
| 4838 tagStr = Keyword(tagStr, caseless=not xml) |
| 4839 else: |
| 4840 resname = tagStr.name |
| 4841 |
| 4842 tagAttrName = Word(alphas,alphanums+"_-:") |
| 4843 if (xml): |
| 4844 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) |
| 4845 openTag = Suppress("<") + tagStr("tag") + \ |
| 4846 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValu
e ))) + \ |
| 4847 Optional("/",default=[False]).setResultsName("empty").setParseAc
tion(lambda s,l,t:t[0]=='/') + Suppress(">") |
| 4848 else: |
| 4849 printablesLessRAbrack = "".join(c for c in printables if c not in ">") |
| 4850 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word
(printablesLessRAbrack) |
| 4851 openTag = Suppress("<") + tagStr("tag") + \ |
| 4852 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens
) + \ |
| 4853 Optional( Suppress("=") + tagAttrValue ) ))) + \ |
| 4854 Optional("/",default=[False]).setResultsName("empty").setParseAc
tion(lambda s,l,t:t[0]=='/') + Suppress(">") |
| 4855 closeTag = Combine(_L("</") + tagStr + ">") |
| 4856 |
| 4857 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").ti
tle().split())).setName("<%s>" % resname) |
| 4858 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").ti
tle().split())).setName("</%s>" % resname) |
| 4859 openTag.tag = resname |
| 4860 closeTag.tag = resname |
| 4861 return openTag, closeTag |
| 4862 |
| 4863 def makeHTMLTags(tagStr): |
| 4864 """ |
| 4865 Helper to construct opening and closing tag expressions for HTML, given a ta
g name. Matches |
| 4866 tags in either upper or lower case, attributes with namespaces and with quot
ed or unquoted values. |
| 4867 |
| 4868 Example:: |
| 4869 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">p
yparsing</a> wiki page</td>' |
| 4870 # makeHTMLTags returns pyparsing expressions for the opening and closing
tags as a 2-tuple |
| 4871 a,a_end = makeHTMLTags("A") |
| 4872 link_expr = a + SkipTo(a_end)("link_text") + a_end |
| 4873 |
| 4874 for link in link_expr.searchString(text): |
| 4875 # attributes in the <A> tag (like "href" shown here) are also access
ible as named results |
| 4876 print(link.link_text, '->', link.href) |
| 4877 prints:: |
| 4878 pyparsing -> http://pyparsing.wikispaces.com |
| 4879 """ |
| 4880 return _makeTags( tagStr, False ) |
| 4881 |
| 4882 def makeXMLTags(tagStr): |
| 4883 """ |
| 4884 Helper to construct opening and closing tag expressions for XML, given a tag
name. Matches |
| 4885 tags only in the given upper/lower case. |
| 4886 |
| 4887 Example: similar to L{makeHTMLTags} |
| 4888 """ |
| 4889 return _makeTags( tagStr, True ) |
| 4890 |
| 4891 def withAttribute(*args,**attrDict): |
| 4892 """ |
| 4893 Helper to create a validating parse action to be used with start tags create
d |
| 4894 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualif
y a starting tag |
| 4895 with a required attribute value, to avoid false matches on common tags such
as |
| 4896 C{<TD>} or C{<DIV>}. |
| 4897 |
| 4898 Call C{withAttribute} with a series of attribute names and values. Specify t
he list |
| 4899 of filter attributes names and values as: |
| 4900 - keyword arguments, as in C{(align="right")}, or |
| 4901 - as an explicit dict with C{**} operator, when an attribute name is also a
Python |
| 4902 reserved word, as in C{**{"class":"Customer", "align":"right"}} |
| 4903 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:ali
gn","right") ) |
| 4904 For attribute names with a namespace prefix, you must use the second form.
Attribute |
| 4905 names are matched insensitive to upper/lower case. |
| 4906 |
| 4907 If just testing for C{class} (with or without a namespace), use C{L{withClas
s}}. |
| 4908 |
| 4909 To verify that the attribute exists, but without specifying a value, pass |
| 4910 C{withAttribute.ANY_VALUE} as the value. |
| 4911 |
| 4912 Example:: |
| 4913 html = ''' |
| 4914 <div> |
| 4915 Some text |
| 4916 <div type="grid">1 4 0 1 0</div> |
| 4917 <div type="graph">1,3 2,3 1,1</div> |
| 4918 <div>this has no type</div> |
| 4919 </div> |
| 4920 |
| 4921 ''' |
| 4922 div,div_end = makeHTMLTags("div") |
| 4923 |
| 4924 # only match div tag having a type attribute with value "grid" |
| 4925 div_grid = div().setParseAction(withAttribute(type="grid")) |
| 4926 grid_expr = div_grid + SkipTo(div | div_end)("body") |
| 4927 for grid_header in grid_expr.searchString(html): |
| 4928 print(grid_header.body) |
| 4929 |
| 4930 # construct a match with any div tag having a type attribute, regardless
of the value |
| 4931 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY
_VALUE)) |
| 4932 div_expr = div_any_type + SkipTo(div | div_end)("body") |
| 4933 for div_header in div_expr.searchString(html): |
| 4934 print(div_header.body) |
| 4935 prints:: |
| 4936 1 4 0 1 0 |
| 4937 |
| 4938 1 4 0 1 0 |
| 4939 1,3 2,3 1,1 |
| 4940 """ |
| 4941 if args: |
| 4942 attrs = args[:] |
| 4943 else: |
| 4944 attrs = attrDict.items() |
| 4945 attrs = [(k,v) for k,v in attrs] |
| 4946 def pa(s,l,tokens): |
| 4947 for attrName,attrValue in attrs: |
| 4948 if attrName not in tokens: |
| 4949 raise ParseException(s,l,"no matching attribute " + attrName) |
| 4950 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attr
Value: |
| 4951 raise ParseException(s,l,"attribute '%s' has value '%s', must be
'%s'" % |
| 4952 (attrName, tokens[attrName], attrVal
ue)) |
| 4953 return pa |
| 4954 withAttribute.ANY_VALUE = object() |
| 4955 |
| 4956 def withClass(classname, namespace=''): |
| 4957 """ |
| 4958 Simplified version of C{L{withAttribute}} when matching on a div class - mad
e |
| 4959 difficult because C{class} is a reserved word in Python. |
| 4960 |
| 4961 Example:: |
| 4962 html = ''' |
| 4963 <div> |
| 4964 Some text |
| 4965 <div class="grid">1 4 0 1 0</div> |
| 4966 <div class="graph">1,3 2,3 1,1</div> |
| 4967 <div>this <div> has no class</div> |
| 4968 </div> |
| 4969 |
| 4970 ''' |
| 4971 div,div_end = makeHTMLTags("div") |
| 4972 div_grid = div().setParseAction(withClass("grid")) |
| 4973 |
| 4974 grid_expr = div_grid + SkipTo(div | div_end)("body") |
| 4975 for grid_header in grid_expr.searchString(html): |
| 4976 print(grid_header.body) |
| 4977 |
| 4978 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) |
| 4979 div_expr = div_any_type + SkipTo(div | div_end)("body") |
| 4980 for div_header in div_expr.searchString(html): |
| 4981 print(div_header.body) |
| 4982 prints:: |
| 4983 1 4 0 1 0 |
| 4984 |
| 4985 1 4 0 1 0 |
| 4986 1,3 2,3 1,1 |
| 4987 """ |
| 4988 classattr = "%s:class" % namespace if namespace else "class" |
| 4989 return withAttribute(**{classattr : classname}) |
| 4990 |
| 4991 opAssoc = _Constants() |
| 4992 opAssoc.LEFT = object() |
| 4993 opAssoc.RIGHT = object() |
| 4994 |
| 4995 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): |
| 4996 """ |
| 4997 Helper method for constructing grammars of expressions made up of |
| 4998 operators working in a precedence hierarchy. Operators may be unary or |
| 4999 binary, left- or right-associative. Parse actions can also be attached |
| 5000 to operator expressions. The generated parser will also recognize the use |
| 5001 of parentheses to override operator precedences (see example below). |
| 5002 |
| 5003 Note: if you define a deep operator list, you may see performance issues |
| 5004 when using infixNotation. See L{ParserElement.enablePackrat} for a |
| 5005 mechanism to potentially improve your parser performance. |
| 5006 |
| 5007 Parameters: |
| 5008 - baseExpr - expression representing the most basic element for the nested |
| 5009 - opList - list of tuples, one for each operator precedence level in the |
| 5010 expression grammar; each tuple is of the form |
| 5011 (opExpr, numTerms, rightLeftAssoc, parseAction), where: |
| 5012 - opExpr is the pyparsing expression for the operator; |
| 5013 may also be a string, which will be converted to a Literal; |
| 5014 if numTerms is 3, opExpr is a tuple of two expressions, for the |
| 5015 two operators separating the 3 terms |
| 5016 - numTerms is the number of terms for this operator (must |
| 5017 be 1, 2, or 3) |
| 5018 - rightLeftAssoc is the indicator whether the operator is |
| 5019 right or left associative, using the pyparsing-defined |
| 5020 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. |
| 5021 - parseAction is the parse action to be associated with |
| 5022 expressions matching this operator expression (the |
| 5023 parse action tuple member may be omitted) |
| 5024 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}
) |
| 5025 - rpar - expression for matching right-parentheses (default=C{Suppress(')')
}) |
| 5026 |
| 5027 Example:: |
| 5028 # simple example of four-function arithmetic with ints and variable name
s |
| 5029 integer = pyparsing_common.signed_integer |
| 5030 varname = pyparsing_common.identifier |
| 5031 |
| 5032 arith_expr = infixNotation(integer | varname, |
| 5033 [ |
| 5034 ('-', 1, opAssoc.RIGHT), |
| 5035 (oneOf('* /'), 2, opAssoc.LEFT), |
| 5036 (oneOf('+ -'), 2, opAssoc.LEFT), |
| 5037 ]) |
| 5038 |
| 5039 arith_expr.runTests(''' |
| 5040 5+3*6 |
| 5041 (5+3)*6 |
| 5042 -2--11 |
| 5043 ''', fullDump=False) |
| 5044 prints:: |
| 5045 5+3*6 |
| 5046 [[5, '+', [3, '*', 6]]] |
| 5047 |
| 5048 (5+3)*6 |
| 5049 [[[5, '+', 3], '*', 6]] |
| 5050 |
| 5051 -2--11 |
| 5052 [[['-', 2], '-', ['-', 11]]] |
| 5053 """ |
| 5054 ret = Forward() |
| 5055 lastExpr = baseExpr | ( lpar + ret + rpar ) |
| 5056 for i,operDef in enumerate(opList): |
| 5057 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] |
| 5058 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr |
| 5059 if arity == 3: |
| 5060 if opExpr is None or len(opExpr) != 2: |
| 5061 raise ValueError("if numterms=3, opExpr must be a tuple or list
of two expressions") |
| 5062 opExpr1, opExpr2 = opExpr |
| 5063 thisExpr = Forward().setName(termName) |
| 5064 if rightLeftAssoc == opAssoc.LEFT: |
| 5065 if arity == 1: |
| 5066 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + On
eOrMore( opExpr ) ) |
| 5067 elif arity == 2: |
| 5068 if opExpr is not None: |
| 5069 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group
( lastExpr + OneOrMore( opExpr + lastExpr ) ) |
| 5070 else: |
| 5071 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr
+ OneOrMore(lastExpr) ) |
| 5072 elif arity == 3: |
| 5073 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 +
lastExpr) + \ |
| 5074 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + las
tExpr ) |
| 5075 else: |
| 5076 raise ValueError("operator must be unary (1), binary (2), or ter
nary (3)") |
| 5077 elif rightLeftAssoc == opAssoc.RIGHT: |
| 5078 if arity == 1: |
| 5079 # try to avoid LR with this extra test |
| 5080 if not isinstance(opExpr, Optional): |
| 5081 opExpr = Optional(opExpr) |
| 5082 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr +
thisExpr ) |
| 5083 elif arity == 2: |
| 5084 if opExpr is not None: |
| 5085 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group
( lastExpr + OneOrMore( opExpr + thisExpr ) ) |
| 5086 else: |
| 5087 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExp
r + OneOrMore( thisExpr ) ) |
| 5088 elif arity == 3: |
| 5089 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 +
thisExpr) + \ |
| 5090 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thi
sExpr ) |
| 5091 else: |
| 5092 raise ValueError("operator must be unary (1), binary (2), or ter
nary (3)") |
| 5093 else: |
| 5094 raise ValueError("operator must indicate right or left associativity
") |
| 5095 if pa: |
| 5096 matchExpr.setParseAction( pa ) |
| 5097 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) |
| 5098 lastExpr = thisExpr |
| 5099 ret <<= lastExpr |
| 5100 return ret |
| 5101 |
| 5102 operatorPrecedence = infixNotation |
| 5103 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future
release.""" |
| 5104 |
| 5105 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F
]+)))*')+'"').setName("string enclosed in double quotes") |
| 5106 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F
]+)))*")+"'").setName("string enclosed in single quotes") |
| 5107 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)
))*')+'"'| |
| 5108 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)
))*")+"'").setName("quotedString using single or double quotes") |
| 5109 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string l
iteral") |
| 5110 |
| 5111 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
y()): |
| 5112 """ |
| 5113 Helper method for defining nested lists enclosed in opening and closing |
| 5114 delimiters ("(" and ")" are the default). |
| 5115 |
| 5116 Parameters: |
| 5117 - opener - opening character for a nested list (default=C{"("}); can also b
e a pyparsing expression |
| 5118 - closer - closing character for a nested list (default=C{")"}); can also b
e a pyparsing expression |
| 5119 - content - expression for items within the nested lists (default=C{None}) |
| 5120 - ignoreExpr - expression for ignoring opening and closing delimiters (defa
ult=C{quotedString}) |
| 5121 |
| 5122 If an expression is not provided for the content argument, the nested |
| 5123 expression will capture all whitespace-delimited content between delimiters |
| 5124 as a list of separate values. |
| 5125 |
| 5126 Use the C{ignoreExpr} argument to define expressions that may contain |
| 5127 opening or closing characters that should not be treated as opening |
| 5128 or closing characters for nesting, such as quotedString or a comment |
| 5129 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirs
t}}. |
| 5130 The default is L{quotedString}, but if no expressions are to be ignored, |
| 5131 then pass C{None} for this argument. |
| 5132 |
| 5133 Example:: |
| 5134 data_type = oneOf("void int short long char float double") |
| 5135 decl_data_type = Combine(data_type + Optional(Word('*'))) |
| 5136 ident = Word(alphas+'_', alphanums+'_') |
| 5137 number = pyparsing_common.number |
| 5138 arg = Group(decl_data_type + ident) |
| 5139 LPAR,RPAR = map(Suppress, "()") |
| 5140 |
| 5141 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleCommen
t)) |
| 5142 |
| 5143 c_function = (decl_data_type("type") |
| 5144 + ident("name") |
| 5145 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR |
| 5146 + code_body("body")) |
| 5147 c_function.ignore(cStyleComment) |
| 5148 |
| 5149 source_code = ''' |
| 5150 int is_odd(int x) { |
| 5151 return (x%2); |
| 5152 } |
| 5153 |
| 5154 int dec_to_hex(char hchar) { |
| 5155 if (hchar >= '0' && hchar <= '9') { |
| 5156 return (ord(hchar)-ord('0')); |
| 5157 } else { |
| 5158 return (10+ord(hchar)-ord('A')); |
| 5159 } |
| 5160 } |
| 5161 ''' |
| 5162 for func in c_function.searchString(source_code): |
| 5163 print("%(name)s (%(type)s) args: %(args)s" % func) |
| 5164 |
| 5165 prints:: |
| 5166 is_odd (int) args: [['int', 'x']] |
| 5167 dec_to_hex (int) args: [['char', 'hchar']] |
| 5168 """ |
| 5169 if opener == closer: |
| 5170 raise ValueError("opening and closing strings cannot be the same") |
| 5171 if content is None: |
| 5172 if isinstance(opener,basestring) and isinstance(closer,basestring): |
| 5173 if len(opener) == 1 and len(closer)==1: |
| 5174 if ignoreExpr is not None: |
| 5175 content = (Combine(OneOrMore(~ignoreExpr + |
| 5176 CharsNotIn(opener+closer+ParserElement.DEFAU
LT_WHITE_CHARS,exact=1)) |
| 5177 ).setParseAction(lambda t:t[0].strip())) |
| 5178 else: |
| 5179 content = (empty.copy()+CharsNotIn(opener+closer+ParserEleme
nt.DEFAULT_WHITE_CHARS |
| 5180 ).setParseAction(lambda t:t[0].strip())) |
| 5181 else: |
| 5182 if ignoreExpr is not None: |
| 5183 content = (Combine(OneOrMore(~ignoreExpr + |
| 5184 ~Literal(opener) + ~Literal(closer) + |
| 5185 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS
,exact=1)) |
| 5186 ).setParseAction(lambda t:t[0].strip())) |
| 5187 else: |
| 5188 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(clo
ser) + |
| 5189 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS
,exact=1)) |
| 5190 ).setParseAction(lambda t:t[0].strip())) |
| 5191 else: |
| 5192 raise ValueError("opening and closing arguments must be strings if n
o content expression is given") |
| 5193 ret = Forward() |
| 5194 if ignoreExpr is not None: |
| 5195 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content
) + Suppress(closer) ) |
| 5196 else: |
| 5197 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppres
s(closer) ) |
| 5198 ret.setName('nested %s%s expression' % (opener,closer)) |
| 5199 return ret |
| 5200 |
| 5201 def indentedBlock(blockStatementExpr, indentStack, indent=True): |
| 5202 """ |
| 5203 Helper method for defining space-delimited indentation blocks, such as |
| 5204 those used to define block statements in Python source code. |
| 5205 |
| 5206 Parameters: |
| 5207 - blockStatementExpr - expression defining syntax of statement that |
| 5208 is repeated within the indented block |
| 5209 - indentStack - list created by caller to manage indentation stack |
| 5210 (multiple statementWithIndentedBlock expressions within a single gra
mmar |
| 5211 should share a common indentStack) |
| 5212 - indent - boolean indicating whether block must be indented beyond the |
| 5213 the current level; set to False for block of left-most statements |
| 5214 (default=C{True}) |
| 5215 |
| 5216 A valid block must contain at least one C{blockStatement}. |
| 5217 |
| 5218 Example:: |
| 5219 data = ''' |
| 5220 def A(z): |
| 5221 A1 |
| 5222 B = 100 |
| 5223 G = A2 |
| 5224 A2 |
| 5225 A3 |
| 5226 B |
| 5227 def BB(a,b,c): |
| 5228 BB1 |
| 5229 def BBA(): |
| 5230 bba1 |
| 5231 bba2 |
| 5232 bba3 |
| 5233 C |
| 5234 D |
| 5235 def spam(x,y): |
| 5236 def eggs(z): |
| 5237 pass |
| 5238 ''' |
| 5239 |
| 5240 |
| 5241 indentStack = [1] |
| 5242 stmt = Forward() |
| 5243 |
| 5244 identifier = Word(alphas, alphanums) |
| 5245 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(id
entifier) ) + ")" ) + ":") |
| 5246 func_body = indentedBlock(stmt, indentStack) |
| 5247 funcDef = Group( funcDecl + func_body ) |
| 5248 |
| 5249 rvalue = Forward() |
| 5250 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")
") |
| 5251 rvalue << (funcCall | identifier | Word(nums)) |
| 5252 assignment = Group(identifier + "=" + rvalue) |
| 5253 stmt << ( funcDef | assignment | identifier ) |
| 5254 |
| 5255 module_body = OneOrMore(stmt) |
| 5256 |
| 5257 parseTree = module_body.parseString(data) |
| 5258 parseTree.pprint() |
| 5259 prints:: |
| 5260 [['def', |
| 5261 'A', |
| 5262 ['(', 'z', ')'], |
| 5263 ':', |
| 5264 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], |
| 5265 'B', |
| 5266 ['def', |
| 5267 'BB', |
| 5268 ['(', 'a', 'b', 'c', ')'], |
| 5269 ':', |
| 5270 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3
']]]]]], |
| 5271 'C', |
| 5272 'D', |
| 5273 ['def', |
| 5274 'spam', |
| 5275 ['(', 'x', 'y', ')'], |
| 5276 ':', |
| 5277 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] |
| 5278 """ |
| 5279 def checkPeerIndent(s,l,t): |
| 5280 if l >= len(s): return |
| 5281 curCol = col(l,s) |
| 5282 if curCol != indentStack[-1]: |
| 5283 if curCol > indentStack[-1]: |
| 5284 raise ParseFatalException(s,l,"illegal nesting") |
| 5285 raise ParseException(s,l,"not a peer entry") |
| 5286 |
| 5287 def checkSubIndent(s,l,t): |
| 5288 curCol = col(l,s) |
| 5289 if curCol > indentStack[-1]: |
| 5290 indentStack.append( curCol ) |
| 5291 else: |
| 5292 raise ParseException(s,l,"not a subentry") |
| 5293 |
| 5294 def checkUnindent(s,l,t): |
| 5295 if l >= len(s): return |
| 5296 curCol = col(l,s) |
| 5297 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStac
k[-2]): |
| 5298 raise ParseException(s,l,"not an unindent") |
| 5299 indentStack.pop() |
| 5300 |
| 5301 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) |
| 5302 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT'
) |
| 5303 PEER = Empty().setParseAction(checkPeerIndent).setName('') |
| 5304 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') |
| 5305 if indent: |
| 5306 smExpr = Group( Optional(NL) + |
| 5307 #~ FollowedBy(blockStatementExpr) + |
| 5308 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL)
)) + UNDENT) |
| 5309 else: |
| 5310 smExpr = Group( Optional(NL) + |
| 5311 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) |
| 5312 blockStatementExpr.ignore(_bslash + LineEnd()) |
| 5313 return smExpr.setName('indented block') |
| 5314 |
| 5315 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") |
| 5316 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") |
| 5317 |
| 5318 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any t
ag')) |
| 5319 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) |
| 5320 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");")
.setName("common HTML entity") |
| 5321 def replaceHTMLEntity(t): |
| 5322 """Helper parser action to replace common HTML entities with their special c
haracters""" |
| 5323 return _htmlEntityMap.get(t.entity) |
| 5324 |
| 5325 # it's easy to get these comment structures wrong - they're very common, so may
as well make them available |
| 5326 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style
comment") |
| 5327 "Comment of the form C{/* ... */}" |
| 5328 |
| 5329 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") |
| 5330 "Comment of the form C{<!-- ... -->}" |
| 5331 |
| 5332 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") |
| 5333 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") |
| 5334 "Comment of the form C{// ... (to end of line)}" |
| 5335 |
| 5336 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment
).setName("C++ style comment") |
| 5337 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" |
| 5338 |
| 5339 javaStyleComment = cppStyleComment |
| 5340 "Same as C{L{cppStyleComment}}" |
| 5341 |
| 5342 pythonStyleComment = Regex(r"#.*").setName("Python style comment") |
| 5343 "Comment of the form C{# ... (to end of line)}" |
| 5344 |
| 5345 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + |
| 5346 Optional( Word(" \t") + |
| 5347 ~Literal(",") + ~LineEnd() ) ) ).str
eamline().setName("commaItem") |
| 5348 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepite
m, default="") ).setName("commaSeparatedList") |
| 5349 """(Deprecated) Predefined expression of 1 or more printable words or quoted str
ings, separated by commas. |
| 5350 This expression is deprecated in favor of L{pyparsing_common.comma_separated_
list}.""" |
| 5351 |
| 5352 # some other useful expressions - using lower-case class name since we are reall
y using this as a namespace |
| 5353 class pyparsing_common: |
| 5354 """ |
| 5355 Here are some common low-level expressions that may be useful in jump-starti
ng parser development: |
| 5356 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notatio
n<sci_real>}) |
| 5357 - common L{programming identifiers<identifier>} |
| 5358 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv
6_address>}) |
| 5359 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} |
| 5360 - L{UUID<uuid>} |
| 5361 - L{comma-separated list<comma_separated_list>} |
| 5362 Parse actions: |
| 5363 - C{L{convertToInteger}} |
| 5364 - C{L{convertToFloat}} |
| 5365 - C{L{convertToDate}} |
| 5366 - C{L{convertToDatetime}} |
| 5367 - C{L{stripHTMLTags}} |
| 5368 - C{L{upcaseTokens}} |
| 5369 - C{L{downcaseTokens}} |
| 5370 |
| 5371 Example:: |
| 5372 pyparsing_common.number.runTests(''' |
| 5373 # any int or real number, returned as the appropriate type |
| 5374 100 |
| 5375 -100 |
| 5376 +100 |
| 5377 3.14159 |
| 5378 6.02e23 |
| 5379 1e-12 |
| 5380 ''') |
| 5381 |
| 5382 pyparsing_common.fnumber.runTests(''' |
| 5383 # any int or real number, returned as float |
| 5384 100 |
| 5385 -100 |
| 5386 +100 |
| 5387 3.14159 |
| 5388 6.02e23 |
| 5389 1e-12 |
| 5390 ''') |
| 5391 |
| 5392 pyparsing_common.hex_integer.runTests(''' |
| 5393 # hex numbers |
| 5394 100 |
| 5395 FF |
| 5396 ''') |
| 5397 |
| 5398 pyparsing_common.fraction.runTests(''' |
| 5399 # fractions |
| 5400 1/2 |
| 5401 -3/4 |
| 5402 ''') |
| 5403 |
| 5404 pyparsing_common.mixed_integer.runTests(''' |
| 5405 # mixed fractions |
| 5406 1 |
| 5407 1/2 |
| 5408 -3/4 |
| 5409 1-3/4 |
| 5410 ''') |
| 5411 |
| 5412 import uuid |
| 5413 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) |
| 5414 pyparsing_common.uuid.runTests(''' |
| 5415 # uuid |
| 5416 12345678-1234-5678-1234-567812345678 |
| 5417 ''') |
| 5418 prints:: |
| 5419 # any int or real number, returned as the appropriate type |
| 5420 100 |
| 5421 [100] |
| 5422 |
| 5423 -100 |
| 5424 [-100] |
| 5425 |
| 5426 +100 |
| 5427 [100] |
| 5428 |
| 5429 3.14159 |
| 5430 [3.14159] |
| 5431 |
| 5432 6.02e23 |
| 5433 [6.02e+23] |
| 5434 |
| 5435 1e-12 |
| 5436 [1e-12] |
| 5437 |
| 5438 # any int or real number, returned as float |
| 5439 100 |
| 5440 [100.0] |
| 5441 |
| 5442 -100 |
| 5443 [-100.0] |
| 5444 |
| 5445 +100 |
| 5446 [100.0] |
| 5447 |
| 5448 3.14159 |
| 5449 [3.14159] |
| 5450 |
| 5451 6.02e23 |
| 5452 [6.02e+23] |
| 5453 |
| 5454 1e-12 |
| 5455 [1e-12] |
| 5456 |
| 5457 # hex numbers |
| 5458 100 |
| 5459 [256] |
| 5460 |
| 5461 FF |
| 5462 [255] |
| 5463 |
| 5464 # fractions |
| 5465 1/2 |
| 5466 [0.5] |
| 5467 |
| 5468 -3/4 |
| 5469 [-0.75] |
| 5470 |
| 5471 # mixed fractions |
| 5472 1 |
| 5473 [1] |
| 5474 |
| 5475 1/2 |
| 5476 [0.5] |
| 5477 |
| 5478 -3/4 |
| 5479 [-0.75] |
| 5480 |
| 5481 1-3/4 |
| 5482 [1.75] |
| 5483 |
| 5484 # uuid |
| 5485 12345678-1234-5678-1234-567812345678 |
| 5486 [UUID('12345678-1234-5678-1234-567812345678')] |
| 5487 """ |
| 5488 |
| 5489 convertToInteger = tokenMap(int) |
| 5490 """ |
| 5491 Parse action for converting parsed integers to Python int |
| 5492 """ |
| 5493 |
| 5494 convertToFloat = tokenMap(float) |
| 5495 """ |
| 5496 Parse action for converting parsed numbers to Python float |
| 5497 """ |
| 5498 |
| 5499 integer = Word(nums).setName("integer").setParseAction(convertToInteger) |
| 5500 """expression that parses an unsigned integer, returns an int""" |
| 5501 |
| 5502 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(i
nt,16)) |
| 5503 """expression that parses a hexadecimal integer, returns an int""" |
| 5504 |
| 5505 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction
(convertToInteger) |
| 5506 """expression that parses an integer with optional leading sign, returns an
int""" |
| 5507 |
| 5508 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_i
nteger().setParseAction(convertToFloat)).setName("fraction") |
| 5509 """fractional expression of an integer divided by an integer, returns a floa
t""" |
| 5510 fraction.addParseAction(lambda t: t[0]/t[-1]) |
| 5511 |
| 5512 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress
() + fraction)).setName("fraction or mixed integer-fraction") |
| 5513 """mixed integer of the form 'integer - fraction', with optional leading int
eger, returns float""" |
| 5514 mixed_integer.addParseAction(sum) |
| 5515 |
| 5516 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convert
ToFloat) |
| 5517 """expression that parses a floating point number and returns a float""" |
| 5518 |
| 5519 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("re
al number with scientific notation").setParseAction(convertToFloat) |
| 5520 """expression that parses a floating point number with optional scientific n
otation and returns a float""" |
| 5521 |
| 5522 # streamlining this expression makes the docs nicer-looking |
| 5523 number = (sci_real | real | signed_integer).streamline() |
| 5524 """any numeric expression, returns the corresponding Python type""" |
| 5525 |
| 5526 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setPars
eAction(convertToFloat) |
| 5527 """any int or real number, returned as float""" |
| 5528 |
| 5529 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") |
| 5530 """typical code identifier (leading alpha or '_', followed by 0 or more alph
as, nums, or '_')""" |
| 5531 |
| 5532 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][
0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") |
| 5533 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" |
| 5534 |
| 5535 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") |
| 5536 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6
address") |
| 5537 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::
" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address
") |
| 5538 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_co
mmon._ipv6_part.matches(tt)) < 8) |
| 5539 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address
") |
| 5540 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ip
v6_address).setName("IPv6 address")).setName("IPv6 address") |
| 5541 "IPv6 address (long, short, or mixed form)" |
| 5542 |
| 5543 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}
){4}').setName("MAC address") |
| 5544 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" |
| 5545 |
| 5546 @staticmethod |
| 5547 def convertToDate(fmt="%Y-%m-%d"): |
| 5548 """ |
| 5549 Helper to create a parse action for converting parsed date string to Pyt
hon datetime.date |
| 5550 |
| 5551 Params - |
| 5552 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}
) |
| 5553 |
| 5554 Example:: |
| 5555 date_expr = pyparsing_common.iso8601_date.copy() |
| 5556 date_expr.setParseAction(pyparsing_common.convertToDate()) |
| 5557 print(date_expr.parseString("1999-12-31")) |
| 5558 prints:: |
| 5559 [datetime.date(1999, 12, 31)] |
| 5560 """ |
| 5561 def cvt_fn(s,l,t): |
| 5562 try: |
| 5563 return datetime.strptime(t[0], fmt).date() |
| 5564 except ValueError as ve: |
| 5565 raise ParseException(s, l, str(ve)) |
| 5566 return cvt_fn |
| 5567 |
| 5568 @staticmethod |
| 5569 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): |
| 5570 """ |
| 5571 Helper to create a parse action for converting parsed datetime string to
Python datetime.datetime |
| 5572 |
| 5573 Params - |
| 5574 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%
H:%M:%S.%f"}) |
| 5575 |
| 5576 Example:: |
| 5577 dt_expr = pyparsing_common.iso8601_datetime.copy() |
| 5578 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) |
| 5579 print(dt_expr.parseString("1999-12-31T23:59:59.999")) |
| 5580 prints:: |
| 5581 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] |
| 5582 """ |
| 5583 def cvt_fn(s,l,t): |
| 5584 try: |
| 5585 return datetime.strptime(t[0], fmt) |
| 5586 except ValueError as ve: |
| 5587 raise ParseException(s, l, str(ve)) |
| 5588 return cvt_fn |
| 5589 |
| 5590 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?
)?').setName("ISO8601 date") |
| 5591 "ISO8601 date (C{yyyy-mm-dd})" |
| 5592 |
| 5593 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ]
(?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\
d\d)?').setName("ISO8601 datetime") |
| 5594 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds,
milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" |
| 5595 |
| 5596 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName(
"UUID") |
| 5597 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" |
| 5598 |
| 5599 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() |
| 5600 @staticmethod |
| 5601 def stripHTMLTags(s, l, tokens): |
| 5602 """ |
| 5603 Parse action to remove HTML tags from web page HTML source |
| 5604 |
| 5605 Example:: |
| 5606 # strip HTML links from normal text |
| 5607 text = '<td>More info at the <a href="http://pyparsing.wikispaces.co
m">pyparsing</a> wiki page</td>' |
| 5608 td,td_end = makeHTMLTags("TD") |
| 5609 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.str
ipHTMLTags)("body") + td_end |
| 5610 |
| 5611 print(table_text.parseString(text).body) # -> 'More info at the pypa
rsing wiki page' |
| 5612 """ |
| 5613 return pyparsing_common._html_stripper.transformString(tokens[0]) |
| 5614 |
| 5615 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printabl
es, excludeChars=',') |
| 5616 + Optional( White(" \t") ) ) ).streamlin
e().setName("commaItem") |
| 5617 comma_separated_list = delimitedList( Optional( quotedString.copy() | _comma
sepitem, default="") ).setName("comma separated list") |
| 5618 """Predefined expression of 1 or more printable words or quoted strings, sep
arated by commas.""" |
| 5619 |
| 5620 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) |
| 5621 """Parse action to convert tokens to upper case.""" |
| 5622 |
| 5623 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) |
| 5624 """Parse action to convert tokens to lower case.""" |
| 5625 |
| 5626 |
| 5627 if __name__ == "__main__": |
| 5628 |
| 5629 selectToken = CaselessLiteral("select") |
| 5630 fromToken = CaselessLiteral("from") |
| 5631 |
| 5632 ident = Word(alphas, alphanums + "_$") |
| 5633 |
| 5634 columnName = delimitedList(ident, ".", combine=True).setParseAction(upca
seTokens) |
| 5635 columnNameList = Group(delimitedList(columnName)).setName("columns") |
| 5636 columnSpec = ('*' | columnNameList) |
| 5637 |
| 5638 tableName = delimitedList(ident, ".", combine=True).setParseAction(upca
seTokens) |
| 5639 tableNameList = Group(delimitedList(tableName)).setName("tables") |
| 5640 |
| 5641 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken
+ tableNameList("tables") |
| 5642 |
| 5643 # demo runTests method, including embedded comments in test string |
| 5644 simpleSQL.runTests(""" |
| 5645 # '*' as column list and dotted table name |
| 5646 select * from SYS.XYZZY |
| 5647 |
| 5648 # caseless match on "SELECT", and casts back to "select" |
| 5649 SELECT * from XYZZY, ABC |
| 5650 |
| 5651 # list of column names, and mixed case SELECT keyword |
| 5652 Select AA,BB,CC from Sys.dual |
| 5653 |
| 5654 # multiple tables |
| 5655 Select A, B, C from Sys.dual, Table2 |
| 5656 |
| 5657 # invalid SELECT keyword - should fail |
| 5658 Xelect A, B, C from Sys.dual |
| 5659 |
| 5660 # incomplete command - should fail |
| 5661 Select |
| 5662 |
| 5663 # invalid column name - should fail |
| 5664 Select ^^^ frox Sys.dual |
| 5665 |
| 5666 """) |
| 5667 |
| 5668 pyparsing_common.number.runTests(""" |
| 5669 100 |
| 5670 -100 |
| 5671 +100 |
| 5672 3.14159 |
| 5673 6.02e23 |
| 5674 1e-12 |
| 5675 """) |
| 5676 |
| 5677 # any int or real number, returned as float |
| 5678 pyparsing_common.fnumber.runTests(""" |
| 5679 100 |
| 5680 -100 |
| 5681 +100 |
| 5682 3.14159 |
| 5683 6.02e23 |
| 5684 1e-12 |
| 5685 """) |
| 5686 |
| 5687 pyparsing_common.hex_integer.runTests(""" |
| 5688 100 |
| 5689 FF |
| 5690 """) |
| 5691 |
| 5692 import uuid |
| 5693 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) |
| 5694 pyparsing_common.uuid.runTests(""" |
| 5695 12345678-1234-5678-1234-567812345678 |
| 5696 """) |
OLD | NEW |