Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: third_party/twisted_8_1/twisted/words/xish/xpathparser.g

Issue 12261012: Remove third_party/twisted_8_1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright (c) 2001-2007 Twisted Matrix Laboratories.
2 # See LICENSE for details.
3
4 # DO NOT EDIT xpathparser.py!
5 #
6 # It is generated from xpathparser.g using Yapps. Make needed changes there.
7 # This also means that the generated Python may not conform to Twisted's coding
8 # standards.
9
10 # HOWTO Generate me:
11 #
12 # 1.) Grab a copy of yapps2, version 2.1.1:
13 # http://theory.stanford.edu/~amitp/Yapps/
14 #
15 # Note: Do NOT use the package in debian/ubuntu as it has incompatible
16 # modifications.
17 #
18 # 2.) Generate the grammar:
19 #
20 # yapps2 xpathparser.g xpathparser.py.proto
21 #
22 # 3.) Edit the output to depend on the embedded runtime, not yappsrt.
23 #
24 # sed -e '/^import yapps/d' -e '/^[^#]/s/yappsrt\.//g' \
25 # xpathparser.py.proto > xpathparser.py
26
27 """
28 XPath Parser.
29
30 Besides the parser code produced by Yapps, this module also defines the
31 parse-time exception classes, a scanner class, a base class for parsers
32 produced by Yapps, and a context class that keeps track of the parse stack.
33 These have been copied from the Yapps runtime.
34 """
35
36 import sys, re
37
38 class SyntaxError(Exception):
39 """When we run into an unexpected token, this is the exception to use"""
40 def __init__(self, charpos=-1, msg="Bad Token", context=None):
41 Exception.__init__(self)
42 self.charpos = charpos
43 self.msg = msg
44 self.context = context
45
46 def __str__(self):
47 if self.charpos < 0: return 'SyntaxError'
48 else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)
49
50 class NoMoreTokens(Exception):
51 """Another exception object, for when we run out of tokens"""
52 pass
53
54 class Scanner:
55 """Yapps scanner.
56
57 The Yapps scanner can work in context sensitive or context
58 insensitive modes. The token(i) method is used to retrieve the
59 i-th token. It takes a restrict set that limits the set of tokens
60 it is allowed to return. In context sensitive mode, this restrict
61 set guides the scanner. In context insensitive mode, there is no
62 restriction (the set is always the full set of tokens).
63
64 """
65
66 def __init__(self, patterns, ignore, input):
67 """Initialize the scanner.
68
69 @param patterns: [(terminal, uncompiled regex), ...] or C{None}
70 @param ignore: [terminal,...]
71 @param input: string
72
73 If patterns is C{None}, we assume that the subclass has defined
74 C{self.patterns} : [(terminal, compiled regex), ...]. Note that the
75 patterns parameter expects uncompiled regexes, whereas the
76 C{self.patterns} field expects compiled regexes.
77 """
78 self.tokens = [] # [(begin char pos, end char pos, token name, matched t ext), ...]
79 self.restrictions = []
80 self.input = input
81 self.pos = 0
82 self.ignore = ignore
83 self.first_line_number = 1
84
85 if patterns is not None:
86 # Compile the regex strings into regex objects
87 self.patterns = []
88 for terminal, regex in patterns:
89 self.patterns.append( (terminal, re.compile(regex)) )
90
91 def get_token_pos(self):
92 """Get the current token position in the input text."""
93 return len(self.tokens)
94
95 def get_char_pos(self):
96 """Get the current char position in the input text."""
97 return self.pos
98
99 def get_prev_char_pos(self, i=None):
100 """Get the previous position (one token back) in the input text."""
101 if self.pos == 0: return 0
102 if i is None: i = -1
103 return self.tokens[i][0]
104
105 def get_line_number(self):
106 """Get the line number of the current position in the input text."""
107 # TODO: make this work at any token/char position
108 return self.first_line_number + self.get_input_scanned().count('\n')
109
110 def get_column_number(self):
111 """Get the column number of the current position in the input text."""
112 s = self.get_input_scanned()
113 i = s.rfind('\n') # may be -1, but that's okay in this case
114 return len(s) - (i+1)
115
116 def get_input_scanned(self):
117 """Get the portion of the input that has been tokenized."""
118 return self.input[:self.pos]
119
120 def get_input_unscanned(self):
121 """Get the portion of the input that has not yet been tokenized."""
122 return self.input[self.pos:]
123
124 def token(self, i, restrict=None):
125 """Get the i'th token in the input.
126
127 If C{i} is one past the end, then scan for another token.
128
129 @param i: token index
130
131 @param restrict: [token, ...] or C{None}; if restrict is
132 C{None}, then any token is allowed. You may call
133 token(i) more than once. However, the restrict set
134 may never be larger than what was passed in on the
135 first call to token(i).
136 """
137 if i == len(self.tokens):
138 self.scan(restrict)
139 if i < len(self.tokens):
140 # Make sure the restriction is more restricted. This
141 # invariant is needed to avoid ruining tokenization at
142 # position i+1 and higher.
143 if restrict and self.restrictions[i]:
144 for r in restrict:
145 if r not in self.restrictions[i]:
146 raise NotImplementedError("Unimplemented: restriction se t changed")
147 return self.tokens[i]
148 raise NoMoreTokens()
149
150 def __repr__(self):
151 """Print the last 10 tokens that have been scanned in"""
152 output = ''
153 for t in self.tokens[-10:]:
154 output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
155 return output
156
157 def scan(self, restrict):
158 """Should scan another token and add it to the list, self.tokens,
159 and add the restriction to self.restrictions"""
160 # Keep looking for a token, ignoring any in self.ignore
161 while 1:
162 # Search the patterns for the longest match, with earlier
163 # tokens in the list having preference
164 best_match = -1
165 best_pat = '(error)'
166 for p, regexp in self.patterns:
167 # First check to see if we're ignoring this token
168 if restrict and p not in restrict and p not in self.ignore:
169 continue
170 m = regexp.match(self.input, self.pos)
171 if m and len(m.group(0)) > best_match:
172 # We got a match that's better than the previous one
173 best_pat = p
174 best_match = len(m.group(0))
175
176 # If we didn't find anything, raise an error
177 if best_pat == '(error)' and best_match < 0:
178 msg = 'Bad Token'
179 if restrict:
180 msg = 'Trying to find one of '+', '.join(restrict)
181 raise SyntaxError(self.pos, msg)
182
183 # If we found something that isn't to be ignored, return it
184 if best_pat not in self.ignore:
185 # Create a token with this data
186 token = (self.pos, self.pos+best_match, best_pat,
187 self.input[self.pos:self.pos+best_match])
188 self.pos = self.pos + best_match
189 # Only add this token if it's not in the list
190 # (to prevent looping)
191 if not self.tokens or token != self.tokens[-1]:
192 self.tokens.append(token)
193 self.restrictions.append(restrict)
194 return
195 else:
196 # This token should be ignored ..
197 self.pos = self.pos + best_match
198
199 class Parser:
200 """Base class for Yapps-generated parsers.
201
202 """
203
204 def __init__(self, scanner):
205 self._scanner = scanner
206 self._pos = 0
207
208 def _peek(self, *types):
209 """Returns the token type for lookahead; if there are any args
210 then the list of args is the set of token types to allow"""
211 tok = self._scanner.token(self._pos, types)
212 return tok[2]
213
214 def _scan(self, type):
215 """Returns the matched text, and moves to the next token"""
216 tok = self._scanner.token(self._pos, [type])
217 if tok[2] != type:
218 raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(sel f._scanner.restrictions[self._pos]))
219 self._pos = 1 + self._pos
220 return tok[3]
221
222 class Context:
223 """Class to represent the parser's call stack.
224
225 Every rule creates a Context that links to its parent rule. The
226 contexts can be used for debugging.
227
228 """
229
230 def __init__(self, parent, scanner, tokenpos, rule, args=()):
231 """Create a new context.
232
233 @param parent: Context object or C{None}
234 @param scanner: Scanner object
235 @param tokenpos: scanner token position
236 @type tokenpos: L{int}
237 @param rule: name of the rule
238 @type rule: L{str}
239 @param args: tuple listing parameters to the rule
240
241 """
242 self.parent = parent
243 self.scanner = scanner
244 self.tokenpos = tokenpos
245 self.rule = rule
246 self.args = args
247
248 def __str__(self):
249 output = ''
250 if self.parent: output = str(self.parent) + ' > '
251 output += self.rule
252 return output
253
254 def print_line_with_pointer(text, p):
255 """Print the line of 'text' that includes position 'p',
256 along with a second line with a single caret (^) at position p"""
257
258 # TODO: separate out the logic for determining the line/character
259 # location from the logic for determining how to display an
260 # 80-column line to stderr.
261
262 # Now try printing part of the line
263 text = text[max(p-80, 0):p+80]
264 p = p - max(p-80, 0)
265
266 # Strip to the left
267 i = text[:p].rfind('\n')
268 j = text[:p].rfind('\r')
269 if i < 0 or (0 <= j < i): i = j
270 if 0 <= i < p:
271 p = p - i - 1
272 text = text[i+1:]
273
274 # Strip to the right
275 i = text.find('\n', p)
276 j = text.find('\r', p)
277 if i < 0 or (0 <= j < i): i = j
278 if i >= 0:
279 text = text[:i]
280
281 # Now shorten the text
282 while len(text) > 70 and p > 60:
283 # Cut off 10 chars
284 text = "..." + text[10:]
285 p = p - 7
286
287 # Now print the string, along with an indicator
288 print >>sys.stderr, '> ',text
289 print >>sys.stderr, '> ',' '*p + '^'
290
291 def print_error(input, err, scanner):
292 """Print error messages, the parser stack, and the input text -- for human-r eadable error messages."""
293 # NOTE: this function assumes 80 columns :-(
294 # Figure out the line number
295 line_number = scanner.get_line_number()
296 column_number = scanner.get_column_number()
297 print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
298
299 context = err.context
300 if not context:
301 print_line_with_pointer(input, err.charpos)
302
303 while context:
304 # TODO: add line number
305 print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context .args))
306 print_line_with_pointer(input, context.scanner.get_prev_char_pos(context .tokenpos))
307 context = context.parent
308
309 def wrap_error_reporter(parser, rule):
310 try:
311 return getattr(parser, rule)()
312 except SyntaxError, e:
313 input = parser._scanner.input
314 print_error(input, e, parser._scanner)
315 except NoMoreTokens:
316 print >>sys.stderr, 'Could not complete parsing; stopped around here:'
317 print >>sys.stderr, parser._scanner
318
319
320 from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue
321 from twisted.words.xish.xpath import Function, IndexValue, LiteralValue
322 from twisted.words.xish.xpath import _AnyLocation, _Location
323
324 %%
325 parser XPathParser:
326 ignore: "\\s+"
327 token INDEX: "[0-9]+"
328 token WILDCARD: "\*"
329 token IDENTIFIER: "[a-zA-Z][a-zA-Z0-9_\-]*"
330 token ATTRIBUTE: "\@[a-zA-Z][a-zA-Z0-9_\-]*"
331 token FUNCNAME: "[a-zA-Z][a-zA-Z0-9_]*"
332 token CMP_EQ: "\="
333 token CMP_NE: "\!\="
334 token STR_DQ: '"([^"]|(\\"))*?"'
335 token STR_SQ: "'([^']|(\\'))*?'"
336 token OP_AND: "and"
337 token OP_OR: "or"
338 token END: "$"
339
340 rule XPATH: PATH {{ result = PATH; current = result }}
341 ( PATH {{ current.childLocation = PATH; current = cur rent.childLocation }} ) * END
342 {{ return result }}
343
344 rule PATH: ("/" {{ result = _Location() }} | "//" {{ result = _Any Location() }} )
345 ( IDENTIFIER {{ result.elementName = IDENTIFIER }} | WILDCARD {{ result.elementName = None }} )
346 ( "\[" PREDICATE {{ result.predicates.append(PREDICAT E) }} "\]")*
347 {{ return result }}
348
349 rule PREDICATE: EXPR {{ return EXPR }} |
350 INDEX {{ return IndexValue(INDEX) }}
351
352 rule EXPR: FACTOR {{ e = FACTOR }}
353 ( BOOLOP FACTOR {{ e = BooleanValue(e, BOOLOP, FACTOR ) }} )*
354 {{ return e }}
355
356 rule BOOLOP: ( OP_AND {{ return OP_AND }} | OP_OR {{ return OP_OR }} )
357
358 rule FACTOR: TERM {{ return TERM }}
359 | "\(" EXPR "\)" {{ return EXPR }}
360
361 rule TERM: VALUE {{ t = VALUE }}
362 [ CMP VALUE {{ t = CompareValue(t, CMP, VALUE) }} ]
363 {{ return t }}
364
365 rule VALUE: "@" IDENTIFIER {{ return AttribValue(IDENTIFIER) }} |
366 FUNCNAME {{ f = Function(FUNCNAME); args = [] } }
367 "\(" [ VALUE {{ args.append(VALUE) }}
368 (
369 "," VALUE {{ args.append(VALUE) }}
370 )*
371 ] "\)" {{ f.setParams(*args); return f }} |
372 STR {{ return LiteralValue(STR[1:len(STR)- 1]) }}
373
374 rule CMP: (CMP_EQ {{ return CMP_EQ }} | CMP_NE {{ return CMP_NE }})
375 rule STR: (STR_DQ {{ return STR_DQ }} | STR_SQ {{ return STR_SQ }})
OLDNEW
« no previous file with comments | « third_party/twisted_8_1/twisted/words/xish/xpath.py ('k') | third_party/twisted_8_1/twisted/words/xish/xpathparser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698