| OLD | NEW |
| (Empty) |
| 1 # Copyright (c) 2001-2007 Twisted Matrix Laboratories. | |
| 2 # See LICENSE for details. | |
| 3 | |
| 4 # DO NOT EDIT xpathparser.py! | |
| 5 # | |
| 6 # It is generated from xpathparser.g using Yapps. Make needed changes there. | |
| 7 # This also means that the generated Python may not conform to Twisted's coding | |
| 8 # standards. | |
| 9 | |
| 10 # HOWTO Generate me: | |
| 11 # | |
| 12 # 1.) Grab a copy of yapps2, version 2.1.1: | |
| 13 # http://theory.stanford.edu/~amitp/Yapps/ | |
| 14 # | |
| 15 # Note: Do NOT use the package in debian/ubuntu as it has incompatible | |
| 16 # modifications. | |
| 17 # | |
| 18 # 2.) Generate the grammar: | |
| 19 # | |
| 20 # yapps2 xpathparser.g xpathparser.py.proto | |
| 21 # | |
| 22 # 3.) Edit the output to depend on the embedded runtime, not yappsrt. | |
| 23 # | |
| 24 # sed -e '/^import yapps/d' -e '/^[^#]/s/yappsrt\.//g' \ | |
| 25 # xpathparser.py.proto > xpathparser.py | |
| 26 | |
| 27 """ | |
| 28 XPath Parser. | |
| 29 | |
| 30 Besides the parser code produced by Yapps, this module also defines the | |
| 31 parse-time exception classes, a scanner class, a base class for parsers | |
| 32 produced by Yapps, and a context class that keeps track of the parse stack. | |
| 33 These have been copied from the Yapps runtime. | |
| 34 """ | |
| 35 | |
| 36 import sys, re | |
| 37 | |
| 38 class SyntaxError(Exception): | |
| 39 """When we run into an unexpected token, this is the exception to use""" | |
| 40 def __init__(self, charpos=-1, msg="Bad Token", context=None): | |
| 41 Exception.__init__(self) | |
| 42 self.charpos = charpos | |
| 43 self.msg = msg | |
| 44 self.context = context | |
| 45 | |
| 46 def __str__(self): | |
| 47 if self.charpos < 0: return 'SyntaxError' | |
| 48 else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg) | |
| 49 | |
| 50 class NoMoreTokens(Exception): | |
| 51 """Another exception object, for when we run out of tokens""" | |
| 52 pass | |
| 53 | |
| 54 class Scanner: | |
| 55 """Yapps scanner. | |
| 56 | |
| 57 The Yapps scanner can work in context sensitive or context | |
| 58 insensitive modes. The token(i) method is used to retrieve the | |
| 59 i-th token. It takes a restrict set that limits the set of tokens | |
| 60 it is allowed to return. In context sensitive mode, this restrict | |
| 61 set guides the scanner. In context insensitive mode, there is no | |
| 62 restriction (the set is always the full set of tokens). | |
| 63 | |
| 64 """ | |
| 65 | |
| 66 def __init__(self, patterns, ignore, input): | |
| 67 """Initialize the scanner. | |
| 68 | |
| 69 @param patterns: [(terminal, uncompiled regex), ...] or C{None} | |
| 70 @param ignore: [terminal,...] | |
| 71 @param input: string | |
| 72 | |
| 73 If patterns is C{None}, we assume that the subclass has defined | |
| 74 C{self.patterns} : [(terminal, compiled regex), ...]. Note that the | |
| 75 patterns parameter expects uncompiled regexes, whereas the | |
| 76 C{self.patterns} field expects compiled regexes. | |
| 77 """ | |
| 78 self.tokens = [] # [(begin char pos, end char pos, token name, matched t
ext), ...] | |
| 79 self.restrictions = [] | |
| 80 self.input = input | |
| 81 self.pos = 0 | |
| 82 self.ignore = ignore | |
| 83 self.first_line_number = 1 | |
| 84 | |
| 85 if patterns is not None: | |
| 86 # Compile the regex strings into regex objects | |
| 87 self.patterns = [] | |
| 88 for terminal, regex in patterns: | |
| 89 self.patterns.append( (terminal, re.compile(regex)) ) | |
| 90 | |
| 91 def get_token_pos(self): | |
| 92 """Get the current token position in the input text.""" | |
| 93 return len(self.tokens) | |
| 94 | |
| 95 def get_char_pos(self): | |
| 96 """Get the current char position in the input text.""" | |
| 97 return self.pos | |
| 98 | |
| 99 def get_prev_char_pos(self, i=None): | |
| 100 """Get the previous position (one token back) in the input text.""" | |
| 101 if self.pos == 0: return 0 | |
| 102 if i is None: i = -1 | |
| 103 return self.tokens[i][0] | |
| 104 | |
| 105 def get_line_number(self): | |
| 106 """Get the line number of the current position in the input text.""" | |
| 107 # TODO: make this work at any token/char position | |
| 108 return self.first_line_number + self.get_input_scanned().count('\n') | |
| 109 | |
| 110 def get_column_number(self): | |
| 111 """Get the column number of the current position in the input text.""" | |
| 112 s = self.get_input_scanned() | |
| 113 i = s.rfind('\n') # may be -1, but that's okay in this case | |
| 114 return len(s) - (i+1) | |
| 115 | |
| 116 def get_input_scanned(self): | |
| 117 """Get the portion of the input that has been tokenized.""" | |
| 118 return self.input[:self.pos] | |
| 119 | |
| 120 def get_input_unscanned(self): | |
| 121 """Get the portion of the input that has not yet been tokenized.""" | |
| 122 return self.input[self.pos:] | |
| 123 | |
| 124 def token(self, i, restrict=None): | |
| 125 """Get the i'th token in the input. | |
| 126 | |
| 127 If C{i} is one past the end, then scan for another token. | |
| 128 | |
| 129 @param i: token index | |
| 130 | |
| 131 @param restrict: [token, ...] or C{None}; if restrict is | |
| 132 C{None}, then any token is allowed. You may call | |
| 133 token(i) more than once. However, the restrict set | |
| 134 may never be larger than what was passed in on the | |
| 135 first call to token(i). | |
| 136 """ | |
| 137 if i == len(self.tokens): | |
| 138 self.scan(restrict) | |
| 139 if i < len(self.tokens): | |
| 140 # Make sure the restriction is more restricted. This | |
| 141 # invariant is needed to avoid ruining tokenization at | |
| 142 # position i+1 and higher. | |
| 143 if restrict and self.restrictions[i]: | |
| 144 for r in restrict: | |
| 145 if r not in self.restrictions[i]: | |
| 146 raise NotImplementedError("Unimplemented: restriction se
t changed") | |
| 147 return self.tokens[i] | |
| 148 raise NoMoreTokens() | |
| 149 | |
| 150 def __repr__(self): | |
| 151 """Print the last 10 tokens that have been scanned in""" | |
| 152 output = '' | |
| 153 for t in self.tokens[-10:]: | |
| 154 output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3])) | |
| 155 return output | |
| 156 | |
| 157 def scan(self, restrict): | |
| 158 """Should scan another token and add it to the list, self.tokens, | |
| 159 and add the restriction to self.restrictions""" | |
| 160 # Keep looking for a token, ignoring any in self.ignore | |
| 161 while 1: | |
| 162 # Search the patterns for the longest match, with earlier | |
| 163 # tokens in the list having preference | |
| 164 best_match = -1 | |
| 165 best_pat = '(error)' | |
| 166 for p, regexp in self.patterns: | |
| 167 # First check to see if we're ignoring this token | |
| 168 if restrict and p not in restrict and p not in self.ignore: | |
| 169 continue | |
| 170 m = regexp.match(self.input, self.pos) | |
| 171 if m and len(m.group(0)) > best_match: | |
| 172 # We got a match that's better than the previous one | |
| 173 best_pat = p | |
| 174 best_match = len(m.group(0)) | |
| 175 | |
| 176 # If we didn't find anything, raise an error | |
| 177 if best_pat == '(error)' and best_match < 0: | |
| 178 msg = 'Bad Token' | |
| 179 if restrict: | |
| 180 msg = 'Trying to find one of '+', '.join(restrict) | |
| 181 raise SyntaxError(self.pos, msg) | |
| 182 | |
| 183 # If we found something that isn't to be ignored, return it | |
| 184 if best_pat not in self.ignore: | |
| 185 # Create a token with this data | |
| 186 token = (self.pos, self.pos+best_match, best_pat, | |
| 187 self.input[self.pos:self.pos+best_match]) | |
| 188 self.pos = self.pos + best_match | |
| 189 # Only add this token if it's not in the list | |
| 190 # (to prevent looping) | |
| 191 if not self.tokens or token != self.tokens[-1]: | |
| 192 self.tokens.append(token) | |
| 193 self.restrictions.append(restrict) | |
| 194 return | |
| 195 else: | |
| 196 # This token should be ignored .. | |
| 197 self.pos = self.pos + best_match | |
| 198 | |
| 199 class Parser: | |
| 200 """Base class for Yapps-generated parsers. | |
| 201 | |
| 202 """ | |
| 203 | |
| 204 def __init__(self, scanner): | |
| 205 self._scanner = scanner | |
| 206 self._pos = 0 | |
| 207 | |
| 208 def _peek(self, *types): | |
| 209 """Returns the token type for lookahead; if there are any args | |
| 210 then the list of args is the set of token types to allow""" | |
| 211 tok = self._scanner.token(self._pos, types) | |
| 212 return tok[2] | |
| 213 | |
| 214 def _scan(self, type): | |
| 215 """Returns the matched text, and moves to the next token""" | |
| 216 tok = self._scanner.token(self._pos, [type]) | |
| 217 if tok[2] != type: | |
| 218 raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(sel
f._scanner.restrictions[self._pos])) | |
| 219 self._pos = 1 + self._pos | |
| 220 return tok[3] | |
| 221 | |
| 222 class Context: | |
| 223 """Class to represent the parser's call stack. | |
| 224 | |
| 225 Every rule creates a Context that links to its parent rule. The | |
| 226 contexts can be used for debugging. | |
| 227 | |
| 228 """ | |
| 229 | |
| 230 def __init__(self, parent, scanner, tokenpos, rule, args=()): | |
| 231 """Create a new context. | |
| 232 | |
| 233 @param parent: Context object or C{None} | |
| 234 @param scanner: Scanner object | |
| 235 @param tokenpos: scanner token position | |
| 236 @type tokenpos: L{int} | |
| 237 @param rule: name of the rule | |
| 238 @type rule: L{str} | |
| 239 @param args: tuple listing parameters to the rule | |
| 240 | |
| 241 """ | |
| 242 self.parent = parent | |
| 243 self.scanner = scanner | |
| 244 self.tokenpos = tokenpos | |
| 245 self.rule = rule | |
| 246 self.args = args | |
| 247 | |
| 248 def __str__(self): | |
| 249 output = '' | |
| 250 if self.parent: output = str(self.parent) + ' > ' | |
| 251 output += self.rule | |
| 252 return output | |
| 253 | |
| 254 def print_line_with_pointer(text, p): | |
| 255 """Print the line of 'text' that includes position 'p', | |
| 256 along with a second line with a single caret (^) at position p""" | |
| 257 | |
| 258 # TODO: separate out the logic for determining the line/character | |
| 259 # location from the logic for determining how to display an | |
| 260 # 80-column line to stderr. | |
| 261 | |
| 262 # Now try printing part of the line | |
| 263 text = text[max(p-80, 0):p+80] | |
| 264 p = p - max(p-80, 0) | |
| 265 | |
| 266 # Strip to the left | |
| 267 i = text[:p].rfind('\n') | |
| 268 j = text[:p].rfind('\r') | |
| 269 if i < 0 or (0 <= j < i): i = j | |
| 270 if 0 <= i < p: | |
| 271 p = p - i - 1 | |
| 272 text = text[i+1:] | |
| 273 | |
| 274 # Strip to the right | |
| 275 i = text.find('\n', p) | |
| 276 j = text.find('\r', p) | |
| 277 if i < 0 or (0 <= j < i): i = j | |
| 278 if i >= 0: | |
| 279 text = text[:i] | |
| 280 | |
| 281 # Now shorten the text | |
| 282 while len(text) > 70 and p > 60: | |
| 283 # Cut off 10 chars | |
| 284 text = "..." + text[10:] | |
| 285 p = p - 7 | |
| 286 | |
| 287 # Now print the string, along with an indicator | |
| 288 print >>sys.stderr, '> ',text | |
| 289 print >>sys.stderr, '> ',' '*p + '^' | |
| 290 | |
| 291 def print_error(input, err, scanner): | |
| 292 """Print error messages, the parser stack, and the input text -- for human-r
eadable error messages.""" | |
| 293 # NOTE: this function assumes 80 columns :-( | |
| 294 # Figure out the line number | |
| 295 line_number = scanner.get_line_number() | |
| 296 column_number = scanner.get_column_number() | |
| 297 print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg) | |
| 298 | |
| 299 context = err.context | |
| 300 if not context: | |
| 301 print_line_with_pointer(input, err.charpos) | |
| 302 | |
| 303 while context: | |
| 304 # TODO: add line number | |
| 305 print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context
.args)) | |
| 306 print_line_with_pointer(input, context.scanner.get_prev_char_pos(context
.tokenpos)) | |
| 307 context = context.parent | |
| 308 | |
| 309 def wrap_error_reporter(parser, rule): | |
| 310 try: | |
| 311 return getattr(parser, rule)() | |
| 312 except SyntaxError, e: | |
| 313 input = parser._scanner.input | |
| 314 print_error(input, e, parser._scanner) | |
| 315 except NoMoreTokens: | |
| 316 print >>sys.stderr, 'Could not complete parsing; stopped around here:' | |
| 317 print >>sys.stderr, parser._scanner | |
| 318 | |
| 319 | |
| 320 from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue | |
| 321 from twisted.words.xish.xpath import Function, IndexValue, LiteralValue | |
| 322 from twisted.words.xish.xpath import _AnyLocation, _Location | |
| 323 | |
| 324 %% | |
| 325 parser XPathParser: | |
| 326 ignore: "\\s+" | |
| 327 token INDEX: "[0-9]+" | |
| 328 token WILDCARD: "\*" | |
| 329 token IDENTIFIER: "[a-zA-Z][a-zA-Z0-9_\-]*" | |
| 330 token ATTRIBUTE: "\@[a-zA-Z][a-zA-Z0-9_\-]*" | |
| 331 token FUNCNAME: "[a-zA-Z][a-zA-Z0-9_]*" | |
| 332 token CMP_EQ: "\=" | |
| 333 token CMP_NE: "\!\=" | |
| 334 token STR_DQ: '"([^"]|(\\"))*?"' | |
| 335 token STR_SQ: "'([^']|(\\'))*?'" | |
| 336 token OP_AND: "and" | |
| 337 token OP_OR: "or" | |
| 338 token END: "$" | |
| 339 | |
| 340 rule XPATH: PATH {{ result = PATH; current = result }} | |
| 341 ( PATH {{ current.childLocation = PATH; current = cur
rent.childLocation }} ) * END | |
| 342 {{ return result }} | |
| 343 | |
| 344 rule PATH: ("/" {{ result = _Location() }} | "//" {{ result = _Any
Location() }} ) | |
| 345 ( IDENTIFIER {{ result.elementName = IDENTIFIER }} |
WILDCARD {{ result.elementName = None }} ) | |
| 346 ( "\[" PREDICATE {{ result.predicates.append(PREDICAT
E) }} "\]")* | |
| 347 {{ return result }} | |
| 348 | |
| 349 rule PREDICATE: EXPR {{ return EXPR }} | | |
| 350 INDEX {{ return IndexValue(INDEX) }} | |
| 351 | |
| 352 rule EXPR: FACTOR {{ e = FACTOR }} | |
| 353 ( BOOLOP FACTOR {{ e = BooleanValue(e, BOOLOP, FACTOR
) }} )* | |
| 354 {{ return e }} | |
| 355 | |
| 356 rule BOOLOP: ( OP_AND {{ return OP_AND }} | OP_OR {{ return OP_OR }}
) | |
| 357 | |
| 358 rule FACTOR: TERM {{ return TERM }} | |
| 359 | "\(" EXPR "\)" {{ return EXPR }} | |
| 360 | |
| 361 rule TERM: VALUE {{ t = VALUE }} | |
| 362 [ CMP VALUE {{ t = CompareValue(t, CMP, VALUE) }} ] | |
| 363 {{ return t }} | |
| 364 | |
| 365 rule VALUE: "@" IDENTIFIER {{ return AttribValue(IDENTIFIER) }} | | |
| 366 FUNCNAME {{ f = Function(FUNCNAME); args = [] }
} | |
| 367 "\(" [ VALUE {{ args.append(VALUE) }} | |
| 368 ( | |
| 369 "," VALUE {{ args.append(VALUE) }} | |
| 370 )* | |
| 371 ] "\)" {{ f.setParams(*args); return f }} | | |
| 372 STR {{ return LiteralValue(STR[1:len(STR)-
1]) }} | |
| 373 | |
| 374 rule CMP: (CMP_EQ {{ return CMP_EQ }} | CMP_NE {{ return CMP_NE }}) | |
| 375 rule STR: (STR_DQ {{ return STR_DQ }} | STR_SQ {{ return STR_SQ }}) | |
| OLD | NEW |