third_party/twisted_8_1/twisted/words/xish/xpathparser.py - Issue 12261012: Remove third_party/twisted_8_1

Side by Side Diff: third_party/twisted_8_1/twisted/words/xish/xpathparser.py

Issue 12261012: Remove third_party/twisted_8_1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build

Patch Set: Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 # Copyright (c) 2001-2007 Twisted Matrix Laboratories.

2 # See LICENSE for details.

3

4 # DO NOT EDIT xpathparser.py!

5 #

6 # It is generated from xpathparser.g using Yapps. Make needed changes there.

7 # This also means that the generated Python may not conform to Twisted's coding

8 # standards.

9

10 # HOWTO Generate me:

11 #

12 # 1.) Grab a copy of yapps2, version 2.1.1:

13 # http://theory.stanford.edu/~amitp/Yapps/

14 #

15 # Note: Do NOT use the package in debian/ubuntu as it has incompatible

16 # modifications.

17 #

18 # 2.) Generate the grammar:

19 #

20 # yapps2 xpathparser.g xpathparser.py.proto

21 #

22 # 3.) Edit the output to depend on the embedded runtime, not yappsrt.

23 #

24 # sed -e '/^import yapps/d' -e '/^[^#]/s/yappsrt\.//g' \

25 # xpathparser.py.proto > xpathparser.py

26

27 """

28 XPath Parser.

29

30 Besides the parser code produced by Yapps, this module also defines the

31 parse-time exception classes, a scanner class, a base class for parsers

32 produced by Yapps, and a context class that keeps track of the parse stack.

33 These have been copied from the Yapps runtime.

34 """

35

36 import sys, re

37

38 class SyntaxError(Exception):

39 """When we run into an unexpected token, this is the exception to use"""

40 def __init__(self, charpos=-1, msg="Bad Token", context=None):

41 Exception.__init__(self)

42 self.charpos = charpos

43 self.msg = msg

44 self.context = context

45

46 def __str__(self):

47 if self.charpos < 0: return 'SyntaxError'

48 else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)

49

50 class NoMoreTokens(Exception):

51 """Another exception object, for when we run out of tokens"""

52 pass

53

54 class Scanner:

55 """Yapps scanner.

56

57 The Yapps scanner can work in context sensitive or context

58 insensitive modes. The token(i) method is used to retrieve the

59 i-th token. It takes a restrict set that limits the set of tokens

60 it is allowed to return. In context sensitive mode, this restrict

61 set guides the scanner. In context insensitive mode, there is no

62 restriction (the set is always the full set of tokens).

63

64 """

65

66 def __init__(self, patterns, ignore, input):

67 """Initialize the scanner.

68

69 @param patterns: [(terminal, uncompiled regex), ...] or C{None}

70 @param ignore: [terminal,...]

71 @param input: string

72

73 If patterns is C{None}, we assume that the subclass has defined

74 C{self.patterns} : [(terminal, compiled regex), ...]. Note that the

75 patterns parameter expects uncompiled regexes, whereas the

76 C{self.patterns} field expects compiled regexes.

77 """

78 self.tokens = [] # [(begin char pos, end char pos, token name, matched t ext), ...]

79 self.restrictions = []

80 self.input = input

81 self.pos = 0

82 self.ignore = ignore

83 self.first_line_number = 1

84

85 if patterns is not None:

86 # Compile the regex strings into regex objects

87 self.patterns = []

88 for terminal, regex in patterns:

89 self.patterns.append( (terminal, re.compile(regex)) )

90

91 def get_token_pos(self):

92 """Get the current token position in the input text."""

93 return len(self.tokens)

94

95 def get_char_pos(self):

96 """Get the current char position in the input text."""

97 return self.pos

98

99 def get_prev_char_pos(self, i=None):

100 """Get the previous position (one token back) in the input text."""

101 if self.pos == 0: return 0

102 if i is None: i = -1

103 return self.tokens[i][0]

104

105 def get_line_number(self):

106 """Get the line number of the current position in the input text."""

107 # TODO: make this work at any token/char position

108 return self.first_line_number + self.get_input_scanned().count('\n')

109

110 def get_column_number(self):

111 """Get the column number of the current position in the input text."""

112 s = self.get_input_scanned()

113 i = s.rfind('\n') # may be -1, but that's okay in this case

114 return len(s) - (i+1)

115

116 def get_input_scanned(self):

117 """Get the portion of the input that has been tokenized."""

118 return self.input[:self.pos]

119

120 def get_input_unscanned(self):

121 """Get the portion of the input that has not yet been tokenized."""

122 return self.input[self.pos:]

123

124 def token(self, i, restrict=None):

125 """Get the i'th token in the input.

126

127 If C{i} is one past the end, then scan for another token.

128

129 @param i: token index

130

131 @param restrict: [token, ...] or C{None}; if restrict is

132 C{None}, then any token is allowed. You may call

133 token(i) more than once. However, the restrict set

134 may never be larger than what was passed in on the

135 first call to token(i).

136 """

137 if i == len(self.tokens):

138 self.scan(restrict)

139 if i < len(self.tokens):

140 # Make sure the restriction is more restricted. This

141 # invariant is needed to avoid ruining tokenization at

142 # position i+1 and higher.

143 if restrict and self.restrictions[i]:

144 for r in restrict:

145 if r not in self.restrictions[i]:

146 raise NotImplementedError("Unimplemented: restriction se t changed")

147 return self.tokens[i]

148 raise NoMoreTokens()

149

150 def __repr__(self):

151 """Print the last 10 tokens that have been scanned in"""

152 output = ''

153 for t in self.tokens[-10:]:

154 output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))

155 return output

156

157 def scan(self, restrict):

158 """Should scan another token and add it to the list, self.tokens,

159 and add the restriction to self.restrictions"""

160 # Keep looking for a token, ignoring any in self.ignore

161 while 1:

162 # Search the patterns for the longest match, with earlier

163 # tokens in the list having preference

164 best_match = -1

165 best_pat = '(error)'

166 for p, regexp in self.patterns:

167 # First check to see if we're ignoring this token

168 if restrict and p not in restrict and p not in self.ignore:

169 continue

170 m = regexp.match(self.input, self.pos)

171 if m and len(m.group(0)) > best_match:

172 # We got a match that's better than the previous one

173 best_pat = p

174 best_match = len(m.group(0))

175

176 # If we didn't find anything, raise an error

177 if best_pat == '(error)' and best_match < 0:

178 msg = 'Bad Token'

179 if restrict:

180 msg = 'Trying to find one of '+', '.join(restrict)

181 raise SyntaxError(self.pos, msg)

182

183 # If we found something that isn't to be ignored, return it

184 if best_pat not in self.ignore:

185 # Create a token with this data

186 token = (self.pos, self.pos+best_match, best_pat,

187 self.input[self.pos:self.pos+best_match])

188 self.pos = self.pos + best_match

189 # Only add this token if it's not in the list

190 # (to prevent looping)

191 if not self.tokens or token != self.tokens[-1]:

192 self.tokens.append(token)

193 self.restrictions.append(restrict)

194 return

195 else:

196 # This token should be ignored ..

197 self.pos = self.pos + best_match

198

199 class Parser:

200 """Base class for Yapps-generated parsers.

201

202 """

203

204 def __init__(self, scanner):

205 self._scanner = scanner

206 self._pos = 0

207

208 def _peek(self, *types):

209 """Returns the token type for lookahead; if there are any args

210 then the list of args is the set of token types to allow"""

211 tok = self._scanner.token(self._pos, types)

212 return tok[2]

213

214 def _scan(self, type):

215 """Returns the matched text, and moves to the next token"""

216 tok = self._scanner.token(self._pos, [type])

217 if tok[2] != type:

218 raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(sel f._scanner.restrictions[self._pos]))

219 self._pos = 1 + self._pos

220 return tok[3]

221

222 class Context:

223 """Class to represent the parser's call stack.

224

225 Every rule creates a Context that links to its parent rule. The

226 contexts can be used for debugging.

227

228 """

229

230 def __init__(self, parent, scanner, tokenpos, rule, args=()):

231 """Create a new context.

232

233 @param parent: Context object or C{None}

234 @param scanner: Scanner object

235 @param tokenpos: scanner token position

236 @type tokenpos: L{int}

237 @param rule: name of the rule

238 @type rule: L{str}

239 @param args: tuple listing parameters to the rule

240

241 """

242 self.parent = parent

243 self.scanner = scanner

244 self.tokenpos = tokenpos

245 self.rule = rule

246 self.args = args

247

248 def __str__(self):

249 output = ''

250 if self.parent: output = str(self.parent) + ' > '

251 output += self.rule

252 return output

253

254 def print_line_with_pointer(text, p):

255 """Print the line of 'text' that includes position 'p',

256 along with a second line with a single caret (^) at position p"""

257

258 # TODO: separate out the logic for determining the line/character

259 # location from the logic for determining how to display an

260 # 80-column line to stderr.

261

262 # Now try printing part of the line

263 text = text[max(p-80, 0):p+80]

264 p = p - max(p-80, 0)

265

266 # Strip to the left

267 i = text[:p].rfind('\n')

268 j = text[:p].rfind('\r')

269 if i < 0 or (0 <= j < i): i = j

270 if 0 <= i < p:

271 p = p - i - 1

272 text = text[i+1:]

273

274 # Strip to the right

275 i = text.find('\n', p)

276 j = text.find('\r', p)

277 if i < 0 or (0 <= j < i): i = j

278 if i >= 0:

279 text = text[:i]

280

281 # Now shorten the text

282 while len(text) > 70 and p > 60:

283 # Cut off 10 chars

284 text = "..." + text[10:]

285 p = p - 7

286

287 # Now print the string, along with an indicator

288 print >>sys.stderr, '> ',text

289 print >>sys.stderr, '> ',' '*p + '^'

290

291 def print_error(input, err, scanner):

292 """Print error messages, the parser stack, and the input text -- for human-r eadable error messages."""

293 # NOTE: this function assumes 80 columns :-(

294 # Figure out the line number

295 line_number = scanner.get_line_number()

296 column_number = scanner.get_column_number()

297 print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)

298

299 context = err.context

300 if not context:

301 print_line_with_pointer(input, err.charpos)

302

303 while context:

304 # TODO: add line number

305 print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context .args))

306 print_line_with_pointer(input, context.scanner.get_prev_char_pos(context .tokenpos))

307 context = context.parent

308

309 def wrap_error_reporter(parser, rule):

310 try:

311 return getattr(parser, rule)()

312 except SyntaxError, e:

313 input = parser._scanner.input

314 print_error(input, e, parser._scanner)

315 except NoMoreTokens:

316 print >>sys.stderr, 'Could not complete parsing; stopped around here:'

317 print >>sys.stderr, parser._scanner

318

319

320 from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue

321 from twisted.words.xish.xpath import Function, IndexValue, LiteralValue

322 from twisted.words.xish.xpath import _AnyLocation, _Location

323

324

325 # Begin -- grammar generated by Yapps

326 import sys, re

327

328 class XPathParserScanner(Scanner):

329 patterns = [

330 ('","', re.compile(',')),

331 ('"@"', re.compile('@')),

332 ('"\\)"', re.compile('\\)')),

333 ('"\\("', re.compile('\\(')),

334 ('"\\]"', re.compile('\\]')),

335 ('"\\["', re.compile('\\[')),

336 ('"//"', re.compile('//')),

337 ('"/"', re.compile('/')),

338 ('\\s+', re.compile('\\s+')),

339 ('INDEX', re.compile('[0-9]+')),

340 ('WILDCARD', re.compile('\\*')),

341 ('IDENTIFIER', re.compile('[a-zA-Z][a-zA-Z0-9_\\-]*')),

342 ('ATTRIBUTE', re.compile('\\@[a-zA-Z][a-zA-Z0-9_\\-]*')),

343 ('FUNCNAME', re.compile('[a-zA-Z][a-zA-Z0-9_]*')),

344 ('CMP_EQ', re.compile('\\=')),

345 ('CMP_NE', re.compile('\\!\\=')),

346 ('STR_DQ', re.compile('"([^"]\|(\\"))*?"')),

347 ('STR_SQ', re.compile("'([^']\|(\\'))*?'")),

348 ('OP_AND', re.compile('and')),

349 ('OP_OR', re.compile('or')),

350 ('END', re.compile('$')),

351 ]

352 def __init__(self, str):

353 Scanner.__init__(self,None,['\\s+'],str)

354

355 class XPathParser(Parser):

356 Context = Context

357 def XPATH(self, _parent=None):

358 _context = self.Context(_parent, self._scanner, self._pos, 'XPATH', [])

359 PATH = self.PATH(_context)

360 result = PATH; current = result

361 while self._peek('END', '"/"', '"//"') != 'END':

362 PATH = self.PATH(_context)

363 current.childLocation = PATH; current = current.childLocation

364 if self._peek() not in ['END', '"/"', '"//"']:

365 raise SyntaxError(charpos=self._scanner.get_prev_char_pos(), context =_context, msg='Need one of ' + ', '.join(['END', '"/"', '"//"']))

366 END = self._scan('END')

367 return result

368

369 def PATH(self, _parent=None):

370 _context = self.Context(_parent, self._scanner, self._pos, 'PATH', [])

371 _token = self._peek('"/"', '"//"')

372 if _token == '"/"':

373 self._scan('"/"')

374 result = _Location()

375 else: # == '"//"'

376 self._scan('"//"')

377 result = _AnyLocation()

378 _token = self._peek('IDENTIFIER', 'WILDCARD')

379 if _token == 'IDENTIFIER':

380 IDENTIFIER = self._scan('IDENTIFIER')

381 result.elementName = IDENTIFIER

382 else: # == 'WILDCARD'

383 WILDCARD = self._scan('WILDCARD')

384 result.elementName = None

385 while self._peek('"\\["', 'END', '"/"', '"//"') == '"\\["':

386 self._scan('"\\["')

387 PREDICATE = self.PREDICATE(_context)

388 result.predicates.append(PREDICATE)

389 self._scan('"\\]"')

390 if self._peek() not in ['"\\["', 'END', '"/"', '"//"']:

391 raise SyntaxError(charpos=self._scanner.get_prev_char_pos(), context =_context, msg='Need one of ' + ', '.join(['"\\["', 'END', '"/"', '"//"']))

392 return result

393

394 def PREDICATE(self, _parent=None):

395 _context = self.Context(_parent, self._scanner, self._pos, 'PREDICATE', [])

396 _token = self._peek('INDEX', '"\\("', '"@"', 'FUNCNAME', 'STR_DQ', 'STR_ SQ')

397 if _token != 'INDEX':

398 EXPR = self.EXPR(_context)

399 return EXPR

400 else: # == 'INDEX'

401 INDEX = self._scan('INDEX')

402 return IndexValue(INDEX)

403

404 def EXPR(self, _parent=None):

405 _context = self.Context(_parent, self._scanner, self._pos, 'EXPR', [])

406 FACTOR = self.FACTOR(_context)

407 e = FACTOR

408 while self._peek('OP_AND', 'OP_OR', '"\\)"', '"\\]"') in ['OP_AND', 'OP_ OR']:

409 BOOLOP = self.BOOLOP(_context)

410 FACTOR = self.FACTOR(_context)

411 e = BooleanValue(e, BOOLOP, FACTOR)

412 if self._peek() not in ['OP_AND', 'OP_OR', '"\\)"', '"\\]"']:

413 raise SyntaxError(charpos=self._scanner.get_prev_char_pos(), context =_context, msg='Need one of ' + ', '.join(['OP_AND', 'OP_OR', '"\\)"', '"\\]"']) )

414 return e

415

416 def BOOLOP(self, _parent=None):

417 _context = self.Context(_parent, self._scanner, self._pos, 'BOOLOP', [])

418 _token = self._peek('OP_AND', 'OP_OR')

419 if _token == 'OP_AND':

420 OP_AND = self._scan('OP_AND')

421 return OP_AND

422 else: # == 'OP_OR'

423 OP_OR = self._scan('OP_OR')

424 return OP_OR

425

426 def FACTOR(self, _parent=None):

427 _context = self.Context(_parent, self._scanner, self._pos, 'FACTOR', [])

428 _token = self._peek('"\\("', '"@"', 'FUNCNAME', 'STR_DQ', 'STR_SQ')

429 if _token != '"\\("':

430 TERM = self.TERM(_context)

431 return TERM

432 else: # == '"\\("'

433 self._scan('"\\("')

434 EXPR = self.EXPR(_context)

435 self._scan('"\\)"')

436 return EXPR

437

438 def TERM(self, _parent=None):

439 _context = self.Context(_parent, self._scanner, self._pos, 'TERM', [])

440 VALUE = self.VALUE(_context)

441 t = VALUE

442 if self._peek('CMP_EQ', 'CMP_NE', 'OP_AND', 'OP_OR', '"\\)"', '"\\]"') i n ['CMP_EQ', 'CMP_NE']:

443 CMP = self.CMP(_context)

444 VALUE = self.VALUE(_context)

445 t = CompareValue(t, CMP, VALUE)

446 return t

447

448 def VALUE(self, _parent=None):

449 _context = self.Context(_parent, self._scanner, self._pos, 'VALUE', [])

450 _token = self._peek('"@"', 'FUNCNAME', 'STR_DQ', 'STR_SQ')

451 if _token == '"@"':

452 self._scan('"@"')

453 IDENTIFIER = self._scan('IDENTIFIER')

454 return AttribValue(IDENTIFIER)

455 elif _token == 'FUNCNAME':

456 FUNCNAME = self._scan('FUNCNAME')

457 f = Function(FUNCNAME); args = []

458 self._scan('"\\("')

459 if self._peek('"\\)"', '"@"', 'FUNCNAME', '","', 'STR_DQ', 'STR_SQ') not in ['"\\)"', '","']:

460 VALUE = self.VALUE(_context)

461 args.append(VALUE)

462 while self._peek('","', '"\\)"') == '","':

463 self._scan('","')

464 VALUE = self.VALUE(_context)

465 args.append(VALUE)

466 if self._peek() not in ['","', '"\\)"']:

467 raise SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', '"\\)"']))

468 self._scan('"\\)"')

469 f.setParams(*args); return f

470 else: # in ['STR_DQ', 'STR_SQ']

471 STR = self.STR(_context)

472 return LiteralValue(STR[1:len(STR)-1])

473

474 def CMP(self, _parent=None):

475 _context = self.Context(_parent, self._scanner, self._pos, 'CMP', [])

476 _token = self._peek('CMP_EQ', 'CMP_NE')

477 if _token == 'CMP_EQ':

478 CMP_EQ = self._scan('CMP_EQ')

479 return CMP_EQ

480 else: # == 'CMP_NE'

481 CMP_NE = self._scan('CMP_NE')

482 return CMP_NE

483

484 def STR(self, _parent=None):

485 _context = self.Context(_parent, self._scanner, self._pos, 'STR', [])

486 _token = self._peek('STR_DQ', 'STR_SQ')

487 if _token == 'STR_DQ':

488 STR_DQ = self._scan('STR_DQ')

489 return STR_DQ

490 else: # == 'STR_SQ'

491 STR_SQ = self._scan('STR_SQ')

492 return STR_SQ

493

494

495 def parse(rule, text):

496 P = XPathParser(XPathParserScanner(text))

497 return wrap_error_reporter(P, rule)

498

499 if __name__ == '__main__':

500 from sys import argv, stdin

501 if len(argv) >= 2:

502 if len(argv) >= 3:

503 f = open(argv[2],'r')

504 else:

505 f = stdin

506 print parse(argv[1], f.read())

507 else: print >>sys.stderr, 'Args: <rule> [<filename>]'

508 # End -- grammar generated by Yapps

OLD	NEW

« no previous file with comments | « third_party/twisted_8_1/twisted/words/xish/xpathparser.g ('k') | no next file » | no next file with comments »