third_party/twisted_8_1/twisted/words/xish/xpathparser.g - Issue 12261012: Remove third_party/twisted_8_1

Side by Side Diff: third_party/twisted_8_1/twisted/words/xish/xpathparser.g

Issue 12261012: Remove third_party/twisted_8_1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build

Patch Set: Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 # Copyright (c) 2001-2007 Twisted Matrix Laboratories.

2 # See LICENSE for details.

3

4 # DO NOT EDIT xpathparser.py!

5 #

6 # It is generated from xpathparser.g using Yapps. Make needed changes there.

7 # This also means that the generated Python may not conform to Twisted's coding

8 # standards.

9

10 # HOWTO Generate me:

11 #

12 # 1.) Grab a copy of yapps2, version 2.1.1:

13 # http://theory.stanford.edu/~amitp/Yapps/

14 #

15 # Note: Do NOT use the package in debian/ubuntu as it has incompatible

16 # modifications.

17 #

18 # 2.) Generate the grammar:

19 #

20 # yapps2 xpathparser.g xpathparser.py.proto

21 #

22 # 3.) Edit the output to depend on the embedded runtime, not yappsrt.

23 #

24 # sed -e '/^import yapps/d' -e '/^[^#]/s/yappsrt\.//g' \

25 # xpathparser.py.proto > xpathparser.py

26

27 """

28 XPath Parser.

29

30 Besides the parser code produced by Yapps, this module also defines the

31 parse-time exception classes, a scanner class, a base class for parsers

32 produced by Yapps, and a context class that keeps track of the parse stack.

33 These have been copied from the Yapps runtime.

34 """

35

36 import sys, re

37

38 class SyntaxError(Exception):

39 """When we run into an unexpected token, this is the exception to use"""

40 def __init__(self, charpos=-1, msg="Bad Token", context=None):

41 Exception.__init__(self)

42 self.charpos = charpos

43 self.msg = msg

44 self.context = context

45

46 def __str__(self):

47 if self.charpos < 0: return 'SyntaxError'

48 else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)

49

50 class NoMoreTokens(Exception):

51 """Another exception object, for when we run out of tokens"""

52 pass

53

54 class Scanner:

55 """Yapps scanner.

56

57 The Yapps scanner can work in context sensitive or context

58 insensitive modes. The token(i) method is used to retrieve the

59 i-th token. It takes a restrict set that limits the set of tokens

60 it is allowed to return. In context sensitive mode, this restrict

61 set guides the scanner. In context insensitive mode, there is no

62 restriction (the set is always the full set of tokens).

63

64 """

65

66 def __init__(self, patterns, ignore, input):

67 """Initialize the scanner.

68

69 @param patterns: [(terminal, uncompiled regex), ...] or C{None}

70 @param ignore: [terminal,...]

71 @param input: string

72

73 If patterns is C{None}, we assume that the subclass has defined

74 C{self.patterns} : [(terminal, compiled regex), ...]. Note that the

75 patterns parameter expects uncompiled regexes, whereas the

76 C{self.patterns} field expects compiled regexes.

77 """

78 self.tokens = [] # [(begin char pos, end char pos, token name, matched t ext), ...]

79 self.restrictions = []

80 self.input = input

81 self.pos = 0

82 self.ignore = ignore

83 self.first_line_number = 1

84

85 if patterns is not None:

86 # Compile the regex strings into regex objects

87 self.patterns = []

88 for terminal, regex in patterns:

89 self.patterns.append( (terminal, re.compile(regex)) )

90

91 def get_token_pos(self):

92 """Get the current token position in the input text."""

93 return len(self.tokens)

94

95 def get_char_pos(self):

96 """Get the current char position in the input text."""

97 return self.pos

98

99 def get_prev_char_pos(self, i=None):

100 """Get the previous position (one token back) in the input text."""

101 if self.pos == 0: return 0

102 if i is None: i = -1

103 return self.tokens[i][0]

104

105 def get_line_number(self):

106 """Get the line number of the current position in the input text."""

107 # TODO: make this work at any token/char position

108 return self.first_line_number + self.get_input_scanned().count('\n')

109

110 def get_column_number(self):

111 """Get the column number of the current position in the input text."""

112 s = self.get_input_scanned()

113 i = s.rfind('\n') # may be -1, but that's okay in this case

114 return len(s) - (i+1)

115

116 def get_input_scanned(self):

117 """Get the portion of the input that has been tokenized."""

118 return self.input[:self.pos]

119

120 def get_input_unscanned(self):

121 """Get the portion of the input that has not yet been tokenized."""

122 return self.input[self.pos:]

123

124 def token(self, i, restrict=None):

125 """Get the i'th token in the input.

126

127 If C{i} is one past the end, then scan for another token.

128

129 @param i: token index

130

131 @param restrict: [token, ...] or C{None}; if restrict is

132 C{None}, then any token is allowed. You may call

133 token(i) more than once. However, the restrict set

134 may never be larger than what was passed in on the

135 first call to token(i).

136 """

137 if i == len(self.tokens):

138 self.scan(restrict)

139 if i < len(self.tokens):

140 # Make sure the restriction is more restricted. This

141 # invariant is needed to avoid ruining tokenization at

142 # position i+1 and higher.

143 if restrict and self.restrictions[i]:

144 for r in restrict:

145 if r not in self.restrictions[i]:

146 raise NotImplementedError("Unimplemented: restriction se t changed")

147 return self.tokens[i]

148 raise NoMoreTokens()

149

150 def __repr__(self):

151 """Print the last 10 tokens that have been scanned in"""

152 output = ''

153 for t in self.tokens[-10:]:

154 output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))

155 return output

156

157 def scan(self, restrict):

158 """Should scan another token and add it to the list, self.tokens,

159 and add the restriction to self.restrictions"""

160 # Keep looking for a token, ignoring any in self.ignore

161 while 1:

162 # Search the patterns for the longest match, with earlier

163 # tokens in the list having preference

164 best_match = -1

165 best_pat = '(error)'

166 for p, regexp in self.patterns:

167 # First check to see if we're ignoring this token

168 if restrict and p not in restrict and p not in self.ignore:

169 continue

170 m = regexp.match(self.input, self.pos)

171 if m and len(m.group(0)) > best_match:

172 # We got a match that's better than the previous one

173 best_pat = p

174 best_match = len(m.group(0))

175

176 # If we didn't find anything, raise an error

177 if best_pat == '(error)' and best_match < 0:

178 msg = 'Bad Token'

179 if restrict:

180 msg = 'Trying to find one of '+', '.join(restrict)

181 raise SyntaxError(self.pos, msg)

182

183 # If we found something that isn't to be ignored, return it

184 if best_pat not in self.ignore:

185 # Create a token with this data

186 token = (self.pos, self.pos+best_match, best_pat,

187 self.input[self.pos:self.pos+best_match])

188 self.pos = self.pos + best_match

189 # Only add this token if it's not in the list

190 # (to prevent looping)

191 if not self.tokens or token != self.tokens[-1]:

192 self.tokens.append(token)

193 self.restrictions.append(restrict)

194 return

195 else:

196 # This token should be ignored ..

197 self.pos = self.pos + best_match

198

199 class Parser:

200 """Base class for Yapps-generated parsers.

201

202 """

203

204 def __init__(self, scanner):

205 self._scanner = scanner

206 self._pos = 0

207

208 def _peek(self, *types):

209 """Returns the token type for lookahead; if there are any args

210 then the list of args is the set of token types to allow"""

211 tok = self._scanner.token(self._pos, types)

212 return tok[2]

213

214 def _scan(self, type):

215 """Returns the matched text, and moves to the next token"""

216 tok = self._scanner.token(self._pos, [type])

217 if tok[2] != type:

218 raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(sel f._scanner.restrictions[self._pos]))

219 self._pos = 1 + self._pos

220 return tok[3]

221

222 class Context:

223 """Class to represent the parser's call stack.

224

225 Every rule creates a Context that links to its parent rule. The

226 contexts can be used for debugging.

227

228 """

229

230 def __init__(self, parent, scanner, tokenpos, rule, args=()):

231 """Create a new context.

232

233 @param parent: Context object or C{None}

234 @param scanner: Scanner object

235 @param tokenpos: scanner token position

236 @type tokenpos: L{int}

237 @param rule: name of the rule

238 @type rule: L{str}

239 @param args: tuple listing parameters to the rule

240

241 """

242 self.parent = parent

243 self.scanner = scanner

244 self.tokenpos = tokenpos

245 self.rule = rule

246 self.args = args

247

248 def __str__(self):

249 output = ''

250 if self.parent: output = str(self.parent) + ' > '

251 output += self.rule

252 return output

253

254 def print_line_with_pointer(text, p):

255 """Print the line of 'text' that includes position 'p',

256 along with a second line with a single caret (^) at position p"""

257

258 # TODO: separate out the logic for determining the line/character

259 # location from the logic for determining how to display an

260 # 80-column line to stderr.

261

262 # Now try printing part of the line

263 text = text[max(p-80, 0):p+80]

264 p = p - max(p-80, 0)

265

266 # Strip to the left

267 i = text[:p].rfind('\n')

268 j = text[:p].rfind('\r')

269 if i < 0 or (0 <= j < i): i = j

270 if 0 <= i < p:

271 p = p - i - 1

272 text = text[i+1:]

273

274 # Strip to the right

275 i = text.find('\n', p)

276 j = text.find('\r', p)

277 if i < 0 or (0 <= j < i): i = j

278 if i >= 0:

279 text = text[:i]

280

281 # Now shorten the text

282 while len(text) > 70 and p > 60:

283 # Cut off 10 chars

284 text = "..." + text[10:]

285 p = p - 7

286

287 # Now print the string, along with an indicator

288 print >>sys.stderr, '> ',text

289 print >>sys.stderr, '> ',' '*p + '^'

290

291 def print_error(input, err, scanner):

292 """Print error messages, the parser stack, and the input text -- for human-r eadable error messages."""

293 # NOTE: this function assumes 80 columns :-(

294 # Figure out the line number

295 line_number = scanner.get_line_number()

296 column_number = scanner.get_column_number()

297 print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)

298

299 context = err.context

300 if not context:

301 print_line_with_pointer(input, err.charpos)

302

303 while context:

304 # TODO: add line number

305 print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context .args))

306 print_line_with_pointer(input, context.scanner.get_prev_char_pos(context .tokenpos))

307 context = context.parent

308

309 def wrap_error_reporter(parser, rule):

310 try:

311 return getattr(parser, rule)()

312 except SyntaxError, e:

313 input = parser._scanner.input

314 print_error(input, e, parser._scanner)

315 except NoMoreTokens:

316 print >>sys.stderr, 'Could not complete parsing; stopped around here:'

317 print >>sys.stderr, parser._scanner

318

319

320 from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue

321 from twisted.words.xish.xpath import Function, IndexValue, LiteralValue

322 from twisted.words.xish.xpath import _AnyLocation, _Location

323

324 %%

325 parser XPathParser:

326 ignore: "\\s+"

327 token INDEX: "[0-9]+"

328 token WILDCARD: "\*"

329 token IDENTIFIER: "[a-zA-Z][a-zA-Z0-9_\-]*"

330 token ATTRIBUTE: "\@[a-zA-Z][a-zA-Z0-9_\-]*"

331 token FUNCNAME: "[a-zA-Z][a-zA-Z0-9_]*"

332 token CMP_EQ: "\="

333 token CMP_NE: "\!\="

334 token STR_DQ: '"([^"]\|(\\"))*?"'

335 token STR_SQ: "'([^']\|(\\'))*?'"

336 token OP_AND: "and"

337 token OP_OR: "or"

338 token END: "$"

339

340 rule XPATH: PATH {{ result = PATH; current = result }}

341 ( PATH {{ current.childLocation = PATH; current = cur rent.childLocation }} ) * END

342 {{ return result }}

343

344 rule PATH: ("/" {{ result = _Location() }} \| "//" {{ result = _Any Location() }} )

345 ( IDENTIFIER {{ result.elementName = IDENTIFIER }} \| WILDCARD {{ result.elementName = None }} )

346 ( "\[" PREDICATE {{ result.predicates.append(PREDICAT E) }} "\]")*

347 {{ return result }}

348

349 rule PREDICATE: EXPR {{ return EXPR }} \|

350 INDEX {{ return IndexValue(INDEX) }}

351

352 rule EXPR: FACTOR {{ e = FACTOR }}

353 ( BOOLOP FACTOR {{ e = BooleanValue(e, BOOLOP, FACTOR ) }} )*

354 {{ return e }}

355

356 rule BOOLOP: ( OP_AND {{ return OP_AND }} \| OP_OR {{ return OP_OR }} )

357

358 rule FACTOR: TERM {{ return TERM }}

359 \| "$" EXPR "$" {{ return EXPR }}

360

361 rule TERM: VALUE {{ t = VALUE }}

362 [ CMP VALUE {{ t = CompareValue(t, CMP, VALUE) }} ]

363 {{ return t }}

364

365 rule VALUE: "@" IDENTIFIER {{ return AttribValue(IDENTIFIER) }} \|

366 FUNCNAME {{ f = Function(FUNCNAME); args = [] } }

367 "\(" [ VALUE {{ args.append(VALUE) }}

368 (

369 "," VALUE {{ args.append(VALUE) }}

370 )*

371 ] "\)" {{ f.setParams(*args); return f }} \|

372 STR {{ return LiteralValue(STR[1:len(STR)- 1]) }}

373

374 rule CMP: (CMP_EQ {{ return CMP_EQ }} \| CMP_NE {{ return CMP_NE }})

375 rule STR: (STR_DQ {{ return STR_DQ }} \| STR_SQ {{ return STR_SQ }})

OLD	NEW

« no previous file with comments | « third_party/twisted_8_1/twisted/words/xish/xpath.py ('k') | third_party/twisted_8_1/twisted/words/xish/xpathparser.py » ('j') | no next file with comments »