mojo/public/third_party/ply/lex.py - Issue 1530433002: Remove directories in mojo/public/third_party that just mirrored third_party.

Side by Side Diff: mojo/public/third_party/ply/lex.py

Issue 1530433002: Remove directories in mojo/public/third_party that just mirrored third_party. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: with fixes Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # -----------------------------------------------------------------------------

2 # ply: lex.py

3 #

4 # Copyright (C) 2001-2011,

5 # David M. Beazley (Dabeaz LLC)

6 # All rights reserved.

7 #

8 # Redistribution and use in source and binary forms, with or without

9 # modification, are permitted provided that the following conditions are

10 # met:

11 #

12 # * Redistributions of source code must retain the above copyright notice,

13 # this list of conditions and the following disclaimer.

14 # * Redistributions in binary form must reproduce the above copyright notice,

15 # this list of conditions and the following disclaimer in the documentation

16 # and/or other materials provided with the distribution.

17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to

18 # endorse or promote products derived from this software without

19 # specific prior written permission.

20 #

21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

32 # -----------------------------------------------------------------------------

33

34 __version__ = "3.4"

35 __tabversion__ = "3.2" # Version of table file used

36

37 import re, sys, types, copy, os

38

39 # This tuple contains known string types

40 try:

41 # Python 2.6

42 StringTypes = (types.StringType, types.UnicodeType)

43 except AttributeError:

44 # Python 3.0

45 StringTypes = (str, bytes)

46

47 # Extract the code attribute of a function. Different implementations

48 # are for Python 2/3 compatibility.

49

50 if sys.version_info[0] < 3:

51 def func_code(f):

52 return f.func_code

53 else:

54 def func_code(f):

55 return f.__code__

56

57 # This regular expression is used to match valid token names

58 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')

59

60 # Exception thrown when invalid token encountered and no default error

61 # handler is defined.

62

63 class LexError(Exception):

64 def __init__(self,message,s):

65 self.args = (message,)

66 self.text = s

67

68 # Token class. This class is used to represent the tokens produced.

69 class LexToken(object):

70 def __str__(self):

71 return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self. lexpos)

72 def __repr__(self):

73 return str(self)

74

75 # This object is a stand-in for a logging object created by the

76 # logging module.

77

78 class PlyLogger(object):

79 def __init__(self,f):

80 self.f = f

81 def critical(self,msg,args,*kwargs):

82 self.f.write((msg % args) + "\n")

83

84 def warning(self,msg,args,*kwargs):

85 self.f.write("WARNING: "+ (msg % args) + "\n")

86

87 def error(self,msg,args,*kwargs):

88 self.f.write("ERROR: " + (msg % args) + "\n")

89

90 info = critical

91 debug = critical

92

93 # Null logger is used when no output is generated. Does nothing.

94 class NullLogger(object):

95 def __getattribute__(self,name):

96 return self

97 def __call__(self,args,*kwargs):

98 return self

99

100 # -----------------------------------------------------------------------------

101 # === Lexing Engine ===

102 #

103 # The following Lexer class implements the lexer runtime. There are only

104 # a few public methods and attributes:

105 #

106 # input() - Store a new string in the lexer

107 # token() - Get the next token

108 # clone() - Clone the lexer

109 #

110 # lineno - Current line number

111 # lexpos - Current position in the input string

112 # -----------------------------------------------------------------------------

113

114 class Lexer:

115 def __init__(self):

116 self.lexre = None # Master regular expression. This is a lis t of

117 # tuples (re,findex) where re is a compile d

118 # regular expression and findex is a list

119 # mapping regex group numbers to rules

120 self.lexretext = None # Current regular expression strings

121 self.lexstatere = {} # Dictionary mapping lexer states to maste r regexs

122 self.lexstateretext = {} # Dictionary mapping lexer states to regex strings

123 self.lexstaterenames = {} # Dictionary mapping lexer states to symbo l names

124 self.lexstate = "INITIAL" # Current lexer state

125 self.lexstatestack = [] # Stack of lexer states

126 self.lexstateinfo = None # State information

127 self.lexstateignore = {} # Dictionary of ignored characters for eac h state

128 self.lexstateerrorf = {} # Dictionary of error functions for each s tate

129 self.lexreflags = 0 # Optional re compile flags

130 self.lexdata = None # Actual input data (as a string)

131 self.lexpos = 0 # Current position in input text

132 self.lexlen = 0 # Length of the input text

133 self.lexerrorf = None # Error rule (if any)

134 self.lextokens = None # List of valid tokens

135 self.lexignore = "" # Ignored characters

136 self.lexliterals = "" # Literal characters that can be passed th rough

137 self.lexmodule = None # Module

138 self.lineno = 1 # Current line number

139 self.lexoptimize = 0 # Optimized mode

140

141 def clone(self,object=None):

142 c = copy.copy(self)

143

144 # If the object parameter has been supplied, it means we are attaching t he

145 # lexer to a new object. In this case, we have to rebind all methods in

146 # the lexstatere and lexstateerrorf tables.

147

148 if object:

149 newtab = { }

150 for key, ritem in self.lexstatere.items():

151 newre = []

152 for cre, findex in ritem:

153 newfindex = []

154 for f in findex:

155 if not f or not f[0]:

156 newfindex.append(f)

157 continue

158 newfindex.append((getattr(object,f[0].__name__),f[1]))

159 newre.append((cre,newfindex))

160 newtab[key] = newre

161 c.lexstatere = newtab

162 c.lexstateerrorf = { }

163 for key, ef in self.lexstateerrorf.items():

164 c.lexstateerrorf[key] = getattr(object,ef.__name__)

165 c.lexmodule = object

166 return c

167

168 # ------------------------------------------------------------

169 # writetab() - Write lexer information to a table file

170 # ------------------------------------------------------------

171 def writetab(self,tabfile,outputdir=""):

172 if isinstance(tabfile,types.ModuleType):

173 return

174 basetabfilename = tabfile.split(".")[-1]

175 filename = os.path.join(outputdir,basetabfilename)+".py"

176 tf = open(filename,"w")

177 tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))

178 tf.write("_tabversion = %s\n" % repr(__version__))

179 tf.write("_lextokens = %s\n" % repr(self.lextokens))

180 tf.write("_lexreflags = %s\n" % repr(self.lexreflags))

181 tf.write("_lexliterals = %s\n" % repr(self.lexliterals))

182 tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))

183

184 tabre = { }

185 # Collect all functions in the initial state

186 initial = self.lexstatere["INITIAL"]

187 initialfuncs = []

188 for part in initial:

189 for f in part[1]:

190 if f and f[0]:

191 initialfuncs.append(f)

192

193 for key, lre in self.lexstatere.items():

194 titem = []

195 for i in range(len(lre)):

196 titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[ i][1],self.lexstaterenames[key][i])))

197 tabre[key] = titem

198

199 tf.write("_lexstatere = %s\n" % repr(tabre))

200 tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))

201

202 taberr = { }

203 for key, ef in self.lexstateerrorf.items():

204 if ef:

205 taberr[key] = ef.__name__

206 else:

207 taberr[key] = None

208 tf.write("_lexstateerrorf = %s\n" % repr(taberr))

209 tf.close()

210

211 # ------------------------------------------------------------

212 # readtab() - Read lexer information from a tab file

213 # ------------------------------------------------------------

214 def readtab(self,tabfile,fdict):

215 if isinstance(tabfile,types.ModuleType):

216 lextab = tabfile

217 else:

218 if sys.version_info[0] < 3:

219 exec("import %s as lextab" % tabfile)

220 else:

221 env = { }

222 exec("import %s as lextab" % tabfile, env,env)

223 lextab = env['lextab']

224

225 if getattr(lextab,"_tabversion","0.0") != __version__:

226 raise ImportError("Inconsistent PLY version")

227

228 self.lextokens = lextab._lextokens

229 self.lexreflags = lextab._lexreflags

230 self.lexliterals = lextab._lexliterals

231 self.lexstateinfo = lextab._lexstateinfo

232 self.lexstateignore = lextab._lexstateignore

233 self.lexstatere = { }

234 self.lexstateretext = { }

235 for key,lre in lextab._lexstatere.items():

236 titem = []

237 txtitem = []

238 for i in range(len(lre)):

239 titem.append((re.compile(lre[i][0],lextab._lexreflags \| re.VER BOSE),_names_to_funcs(lre[i][1],fdict)))

240 txtitem.append(lre[i][0])

241 self.lexstatere[key] = titem

242 self.lexstateretext[key] = txtitem

243 self.lexstateerrorf = { }

244 for key,ef in lextab._lexstateerrorf.items():

245 self.lexstateerrorf[key] = fdict[ef]

246 self.begin('INITIAL')

247

248 # ------------------------------------------------------------

249 # input() - Push a new string into the lexer

250 # ------------------------------------------------------------

251 def input(self,s):

252 # Pull off the first character to see if s looks like a string

253 c = s[:1]

254 if not isinstance(c,StringTypes):

255 raise ValueError("Expected a string")

256 self.lexdata = s

257 self.lexpos = 0

258 self.lexlen = len(s)

259

260 # ------------------------------------------------------------

261 # begin() - Changes the lexing state

262 # ------------------------------------------------------------

263 def begin(self,state):

264 if not state in self.lexstatere:

265 raise ValueError("Undefined state")

266 self.lexre = self.lexstatere[state]

267 self.lexretext = self.lexstateretext[state]

268 self.lexignore = self.lexstateignore.get(state,"")

269 self.lexerrorf = self.lexstateerrorf.get(state,None)

270 self.lexstate = state

271

272 # ------------------------------------------------------------

273 # push_state() - Changes the lexing state and saves old on stack

274 # ------------------------------------------------------------

275 def push_state(self,state):

276 self.lexstatestack.append(self.lexstate)

277 self.begin(state)

278

279 # ------------------------------------------------------------

280 # pop_state() - Restores the previous state

281 # ------------------------------------------------------------

282 def pop_state(self):

283 self.begin(self.lexstatestack.pop())

284

285 # ------------------------------------------------------------

286 # current_state() - Returns the current lexing state

287 # ------------------------------------------------------------

288 def current_state(self):

289 return self.lexstate

290

291 # ------------------------------------------------------------

292 # skip() - Skip ahead n characters

293 # ------------------------------------------------------------

294 def skip(self,n):

295 self.lexpos += n

296

297 # ------------------------------------------------------------

298 # opttoken() - Return the next token from the Lexer

299 #

300 # Note: This function has been carefully implemented to be as fast

301 # as possible. Don't make changes unless you really know what

302 # you are doing

303 # ------------------------------------------------------------

304 def token(self):

305 # Make local copies of frequently referenced attributes

306 lexpos = self.lexpos

307 lexlen = self.lexlen

308 lexignore = self.lexignore

309 lexdata = self.lexdata

310

311 while lexpos < lexlen:

312 # This code provides some short-circuit code for whitespace, tabs, a nd other ignored characters

313 if lexdata[lexpos] in lexignore:

314 lexpos += 1

315 continue

316

317 # Look for a regular expression match

318 for lexre,lexindexfunc in self.lexre:

319 m = lexre.match(lexdata,lexpos)

320 if not m: continue

321

322 # Create a token for return

323 tok = LexToken()

324 tok.value = m.group()

325 tok.lineno = self.lineno

326 tok.lexpos = lexpos

327

328 i = m.lastindex

329 func,tok.type = lexindexfunc[i]

330

331 if not func:

332 # If no token type was set, it's an ignored token

333 if tok.type:

334 self.lexpos = m.end()

335 return tok

336 else:

337 lexpos = m.end()

338 break

339

340 lexpos = m.end()

341

342 # If token is processed by a function, call it

343

344 tok.lexer = self # Set additional attributes useful in toke n rules

345 self.lexmatch = m

346 self.lexpos = lexpos

347

348 newtok = func(tok)

349

350 # Every function must return a token, if nothing, we just move t o next token

351 if not newtok:

352 lexpos = self.lexpos # This is here in case user has updated lexpos.

353 lexignore = self.lexignore # This is here in case there was a state change

354 break

355

356 # Verify type of the token. If not in the token map, raise an e rror

357 if not self.lexoptimize:

358 if not newtok.type in self.lextokens:

359 raise LexError("%s:%d: Rule '%s' returned an unknown tok en type '%s'" % (

360 func_code(func).co_filename, func_code(func).co_firs tlineno,

361 func.__name__, newtok.type),lexdata[lexpos:])

362

363 return newtok

364 else:

365 # No match, see if in literals

366 if lexdata[lexpos] in self.lexliterals:

367 tok = LexToken()

368 tok.value = lexdata[lexpos]

369 tok.lineno = self.lineno

370 tok.type = tok.value

371 tok.lexpos = lexpos

372 self.lexpos = lexpos + 1

373 return tok

374

375 # No match. Call t_error() if defined.

376 if self.lexerrorf:

377 tok = LexToken()

378 tok.value = self.lexdata[lexpos:]

379 tok.lineno = self.lineno

380 tok.type = "error"

381 tok.lexer = self

382 tok.lexpos = lexpos

383 self.lexpos = lexpos

384 newtok = self.lexerrorf(tok)

385 if lexpos == self.lexpos:

386 # Error method didn't change text position at all. This is an error.

387 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])

388 lexpos = self.lexpos

389 if not newtok: continue

390 return newtok

391

392 self.lexpos = lexpos

393 raise LexError("Illegal character '%s' at index %d" % (lexdata[l expos],lexpos), lexdata[lexpos:])

394

395 self.lexpos = lexpos + 1

396 if self.lexdata is None:

397 raise RuntimeError("No input string given with input()")

398 return None

399

400 # Iterator interface

401 def __iter__(self):

402 return self

403

404 def next(self):

405 t = self.token()

406 if t is None:

407 raise StopIteration

408 return t

409

410 __next__ = next

411

412 # -----------------------------------------------------------------------------

413 # ==== Lex Builder ===

414 #

415 # The functions and classes below are used to collect lexing information

416 # and build a Lexer object from it.

417 # -----------------------------------------------------------------------------

418

419 # -----------------------------------------------------------------------------

420 # get_caller_module_dict()

421 #

422 # This function returns a dictionary containing all of the symbols defined withi n

423 # a caller further down the call stack. This is used to get the environment

424 # associated with the yacc() call if none was provided.

425 # -----------------------------------------------------------------------------

426

427 def get_caller_module_dict(levels):

428 try:

429 raise RuntimeError

430 except RuntimeError:

431 e,b,t = sys.exc_info()

432 f = t.tb_frame

433 while levels > 0:

434 f = f.f_back

435 levels -= 1

436 ldict = f.f_globals.copy()

437 if f.f_globals != f.f_locals:

438 ldict.update(f.f_locals)

439

440 return ldict

441

442 # -----------------------------------------------------------------------------

443 # _funcs_to_names()

444 #

445 # Given a list of regular expression functions, this converts it to a list

446 # suitable for output to a table file

447 # -----------------------------------------------------------------------------

448

449 def _funcs_to_names(funclist,namelist):

450 result = []

451 for f,name in zip(funclist,namelist):

452 if f and f[0]:

453 result.append((name, f[1]))

454 else:

455 result.append(f)

456 return result

457

458 # -----------------------------------------------------------------------------

459 # _names_to_funcs()

460 #

461 # Given a list of regular expression function names, this converts it back to

462 # functions.

463 # -----------------------------------------------------------------------------

464

465 def _names_to_funcs(namelist,fdict):

466 result = []

467 for n in namelist:

468 if n and n[0]:

469 result.append((fdict[n[0]],n[1]))

470 else:

471 result.append(n)

472 return result

473

474 # -----------------------------------------------------------------------------

475 # _form_master_re()

476 #

477 # This function takes a list of all of the regex components and attempts to

478 # form the master regular expression. Given limitations in the Python re

479 # module, it may be necessary to break the master regex into separate expression s.

480 # -----------------------------------------------------------------------------

481

482 def _form_master_re(relist,reflags,ldict,toknames):

483 if not relist: return []

484 regex = "\|".join(relist)

485 try:

486 lexre = re.compile(regex,re.VERBOSE \| reflags)

487

488 # Build the index to function map for the matching engine

489 lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)

490 lexindexnames = lexindexfunc[:]

491

492 for f,i in lexre.groupindex.items():

493 handle = ldict.get(f,None)

494 if type(handle) in (types.FunctionType, types.MethodType):

495 lexindexfunc[i] = (handle,toknames[f])

496 lexindexnames[i] = f

497 elif handle is not None:

498 lexindexnames[i] = f

499 if f.find("ignore_") > 0:

500 lexindexfunc[i] = (None,None)

501 else:

502 lexindexfunc[i] = (None, toknames[f])

503

504 return [(lexre,lexindexfunc)],[regex],[lexindexnames]

505 except Exception:

506 m = int(len(relist)/2)

507 if m == 0: m = 1

508 llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)

509 rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)

510 return llist+rlist, lre+rre, lnames+rnames

511

512 # -----------------------------------------------------------------------------

513 # def _statetoken(s,names)

514 #

515 # Given a declaration name s of the form "t_" and a dictionary whose keys are

516 # state names, this function returns a tuple (states,tokenname) where states

517 # is a tuple of state names and tokenname is the name of the token. For example ,

518 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')

519 # -----------------------------------------------------------------------------

520

521 def _statetoken(s,names):

522 nonstate = 1

523 parts = s.split("_")

524 for i in range(1,len(parts)):

525 if not parts[i] in names and parts[i] != 'ANY': break

526 if i > 1:

527 states = tuple(parts[1:i])

528 else:

529 states = ('INITIAL',)

530

531 if 'ANY' in states:

532 states = tuple(names)

533

534 tokenname = "_".join(parts[i:])

535 return (states,tokenname)

536

537

538 # -----------------------------------------------------------------------------

539 # LexerReflect()

540 #

541 # This class represents information needed to build a lexer as extracted from a

542 # user's input file.

543 # -----------------------------------------------------------------------------

544 class LexerReflect(object):

545 def __init__(self,ldict,log=None,reflags=0):

546 self.ldict = ldict

547 self.error_func = None

548 self.tokens = []

549 self.reflags = reflags

550 self.stateinfo = { 'INITIAL' : 'inclusive'}

551 self.files = {}

552 self.error = 0

553

554 if log is None:

555 self.log = PlyLogger(sys.stderr)

556 else:

557 self.log = log

558

559 # Get all of the basic information

560 def get_all(self):

561 self.get_tokens()

562 self.get_literals()

563 self.get_states()

564 self.get_rules()

565

566 # Validate all of the information

567 def validate_all(self):

568 self.validate_tokens()

569 self.validate_literals()

570 self.validate_rules()

571 return self.error

572

573 # Get the tokens map

574 def get_tokens(self):

575 tokens = self.ldict.get("tokens",None)

576 if not tokens:

577 self.log.error("No token list is defined")

578 self.error = 1

579 return

580

581 if not isinstance(tokens,(list, tuple)):

582 self.log.error("tokens must be a list or tuple")

583 self.error = 1

584 return

585

586 if not tokens:

587 self.log.error("tokens is empty")

588 self.error = 1

589 return

590

591 self.tokens = tokens

592

593 # Validate the tokens

594 def validate_tokens(self):

595 terminals = {}

596 for n in self.tokens:

597 if not _is_identifier.match(n):

598 self.log.error("Bad token name '%s'",n)

599 self.error = 1

600 if n in terminals:

601 self.log.warning("Token '%s' multiply defined", n)

602 terminals[n] = 1

603

604 # Get the literals specifier

605 def get_literals(self):

606 self.literals = self.ldict.get("literals","")

607

608 # Validate literals

609 def validate_literals(self):

610 try:

611 for c in self.literals:

612 if not isinstance(c,StringTypes) or len(c) > 1:

613 self.log.error("Invalid literal %s. Must be a single charact er", repr(c))

614 self.error = 1

615 continue

616

617 except TypeError:

618 self.log.error("Invalid literals specification. literals must be a s equence of characters")

619 self.error = 1

620

621 def get_states(self):

622 self.states = self.ldict.get("states",None)

623 # Build statemap

624 if self.states:

625 if not isinstance(self.states,(tuple,list)):

626 self.log.error("states must be defined as a tuple or list")

627 self.error = 1

628 else:

629 for s in self.states:

630 if not isinstance(s,tuple) or len(s) != 2:

631 self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive\|inclusive')",repr(s))

632 self.error = 1

633 continue

634 name, statetype = s

635 if not isinstance(name,StringTypes):

636 self.log.error("State name %s must be a string", repr(name))

637 self.error = 1

638 continue

639 if not (statetype == 'inclusive' or statetype == 'exclus ive'):

640 self.log.error("State type for state %s must be ' inclusive' or 'exclusive'",name)

641 self.error = 1

642 continue

643 if name in self.stateinfo:

644 self.log.error("State '%s' already defined",name)

645 self.error = 1

646 continue

647 self.stateinfo[name] = statetype

648

649 # Get all of the symbols with a t_ prefix and sort them into various

650 # categories (functions, strings, error functions, and ignore characters)

651

652 def get_rules(self):

653 tsymbols = [f for f in self.ldict if f[:2] == 't_' ]

654

655 # Now build up a list of functions and a list of strings

656

657 self.toknames = { } # Mapping of symbols to token names

658 self.funcsym = { } # Symbols defined as functions

659 self.strsym = { } # Symbols defined as strings

660 self.ignore = { } # Ignore strings by state

661 self.errorf = { } # Error functions by state

662

663 for s in self.stateinfo:

664 self.funcsym[s] = []

665 self.strsym[s] = []

666

667 if len(tsymbols) == 0:

668 self.log.error("No rules of the form t_rulename are defined")

669 self.error = 1

670 return

671

672 for f in tsymbols:

673 t = self.ldict[f]

674 states, tokname = _statetoken(f,self.stateinfo)

675 self.toknames[f] = tokname

676

677 if hasattr(t,"__call__"):

678 if tokname == 'error':

679 for s in states:

680 self.errorf[s] = t

681 elif tokname == 'ignore':

682 line = func_code(t).co_firstlineno

683 file = func_code(t).co_filename

684 self.log.error("%s:%d: Rule '%s' must be defined as a string ",file,line,t.__name__)

685 self.error = 1

686 else:

687 for s in states:

688 self.funcsym[s].append((f,t))

689 elif isinstance(t, StringTypes):

690 if tokname == 'ignore':

691 for s in states:

692 self.ignore[s] = t

693 if "\\" in t:

694 self.log.warning("%s contains a literal backslash '\\'", f)

695

696 elif tokname == 'error':

697 self.log.error("Rule '%s' must be defined as a function", f)

698 self.error = 1

699 else:

700 for s in states:

701 self.strsym[s].append((f,t))

702 else:

703 self.log.error("%s not defined as a function or string", f)

704 self.error = 1

705

706 # Sort the functions by line number

707 for f in self.funcsym.values():

708 if sys.version_info[0] < 3:

709 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code( y[1]).co_firstlineno))

710 else:

711 # Python 3.0

712 f.sort(key=lambda x: func_code(x[1]).co_firstlineno)

713

714 # Sort the strings by regular expression length

715 for s in self.strsym.values():

716 if sys.version_info[0] < 3:

717 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[ 1])))

718 else:

719 # Python 3.0

720 s.sort(key=lambda x: len(x[1]),reverse=True)

721

722 # Validate all of the t_rules collected

723 def validate_rules(self):

724 for state in self.stateinfo:

725 # Validate all rules defined by functions

726

727

728

729 for fname, f in self.funcsym[state]:

730 line = func_code(f).co_firstlineno

731 file = func_code(f).co_filename

732 self.files[file] = 1

733

734 tokname = self.toknames[fname]

735 if isinstance(f, types.MethodType):

736 reqargs = 2

737 else:

738 reqargs = 1

739 nargs = func_code(f).co_argcount

740 if nargs > reqargs:

741 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)

742 self.error = 1

743 continue

744

745 if nargs < reqargs:

746 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)

747 self.error = 1

748 continue

749

750 if not f.__doc__:

751 self.log.error("%s:%d: No regular expression defined for rul e '%s'",file,line,f.__name__)

752 self.error = 1

753 continue

754

755 try:

756 c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE \| self.reflags)

757 if c.match(""):

758 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)

759 self.error = 1

760 except re.error:

761 _etype, e, _etrace = sys.exc_info()

762 self.log.error("%s:%d: Invalid regular expression for rule ' %s'. %s", file,line,f.__name__,e)

763 if '#' in f.__doc__:

764 self.log.error("%s:%d. Make sure '#' in rule '%s' is esc aped with '\\#'",file,line, f.__name__)

765 self.error = 1

766

767 # Validate all rules defined by strings

768 for name,r in self.strsym[state]:

769 tokname = self.toknames[name]

770 if tokname == 'error':

771 self.log.error("Rule '%s' must be defined as a function", na me)

772 self.error = 1

773 continue

774

775 if not tokname in self.tokens and tokname.find("ignore_") < 0:

776 self.log.error("Rule '%s' defined for an unspecified token % s",name,tokname)

777 self.error = 1

778 continue

779

780 try:

781 c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE \| self.ref lags)

782 if (c.match("")):

783 self.log.error("Regular expression for rule '%s' matche s empty string",name)

784 self.error = 1

785 except re.error:

786 _etype, e, _etrace = sys.exc_info()

787 self.log.error("Invalid regular expression for rule '%s'. %s ",name,e)

788 if '#' in r:

789 self.log.error("Make sure '#' in rule '%s' is escaped w ith '\\#'",name)

790 self.error = 1

791

792 if not self.funcsym[state] and not self.strsym[state]:

793 self.log.error("No rules defined for state '%s'",state)

794 self.error = 1

795

796 # Validate the error function

797 efunc = self.errorf.get(state,None)

798 if efunc:

799 f = efunc

800 line = func_code(f).co_firstlineno

801 file = func_code(f).co_filename

802 self.files[file] = 1

803

804 if isinstance(f, types.MethodType):

805 reqargs = 2

806 else:

807 reqargs = 1

808 nargs = func_code(f).co_argcount

809 if nargs > reqargs:

810 self.log.error("%s:%d: Rule '%s' has too many arguments",fil e,line,f.__name__)

811 self.error = 1

812

813 if nargs < reqargs:

814 self.log.error("%s:%d: Rule '%s' requires an argument", file ,line,f.__name__)

815 self.error = 1

816

817 for f in self.files:

818 self.validate_file(f)

819

820

821 # -------------------------------------------------------------------------- ---

822 # validate_file()

823 #

824 # This checks to see if there are duplicated t_rulename() functions or strin gs

825 # in the parser input file. This is done using a simple regular expression

826 # match on each line in the given file.

827 # -------------------------------------------------------------------------- ---

828

829 def validate_file(self,filename):

830 import os.path

831 base,ext = os.path.splitext(filename)

832 if ext != '.py': return # No idea what the file is. Return OK

833

834 try:

835 f = open(filename)

836 lines = f.readlines()

837 f.close()

838 except IOError:

839 return # Couldn't find the file. Don't worry a bout it

840

841 fre = re.compile(r'\sdef\s+(t_[a-zA-Z_0-9])\(')

842 sre = re.compile(r'\s(t_[a-zA-Z_0-9])\s*=')

843

844 counthash = { }

845 linen = 1

846 for l in lines:

847 m = fre.match(l)

848 if not m:

849 m = sre.match(l)

850 if m:

851 name = m.group(1)

852 prev = counthash.get(name)

853 if not prev:

854 counthash[name] = linen

855 else:

856 self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)

857 self.error = 1

858 linen += 1

859

860 # -----------------------------------------------------------------------------

861 # lex(module)

862 #

863 # Build all of the regular expression rules from definitions in the supplied mod ule

864 # -----------------------------------------------------------------------------

865 def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now arn=0,outputdir="", debuglog=None, errorlog=None):

866 global lexer

867 ldict = None

868 stateinfo = { 'INITIAL' : 'inclusive'}

869 lexobj = Lexer()

870 lexobj.lexoptimize = optimize

871 global token,input

872

873 if errorlog is None:

874 errorlog = PlyLogger(sys.stderr)

875

876 if debug:

877 if debuglog is None:

878 debuglog = PlyLogger(sys.stderr)

879

880 # Get the module dictionary used for the lexer

881 if object: module = object

882

883 if module:

884 _items = [(k,getattr(module,k)) for k in dir(module)]

885 ldict = dict(_items)

886 else:

887 ldict = get_caller_module_dict(2)

888

889 # Collect parser information from the dictionary

890 linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)

891 linfo.get_all()

892 if not optimize:

893 if linfo.validate_all():

894 raise SyntaxError("Can't build lexer")

895

896 if optimize and lextab:

897 try:

898 lexobj.readtab(lextab,ldict)

899 token = lexobj.token

900 input = lexobj.input

901 lexer = lexobj

902 return lexobj

903

904 except ImportError:

905 pass

906

907 # Dump some basic debugging information

908 if debug:

909 debuglog.info("lex: tokens = %r", linfo.tokens)

910 debuglog.info("lex: literals = %r", linfo.literals)

911 debuglog.info("lex: states = %r", linfo.stateinfo)

912

913 # Build a dictionary of valid token names

914 lexobj.lextokens = { }

915 for n in linfo.tokens:

916 lexobj.lextokens[n] = 1

917

918 # Get literals specification

919 if isinstance(linfo.literals,(list,tuple)):

920 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)

921 else:

922 lexobj.lexliterals = linfo.literals

923

924 # Get the stateinfo dictionary

925 stateinfo = linfo.stateinfo

926

927 regexs = { }

928 # Build the master regular expressions

929 for state in stateinfo:

930 regex_list = []

931

932 # Add rules defined by functions first

933 for fname, f in linfo.funcsym[state]:

934 line = func_code(f).co_firstlineno

935 file = func_code(f).co_filename

936 regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))

937 if debug:

938 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f .__doc__, state)

939

940 # Now add all of the simple rules

941 for name,r in linfo.strsym[state]:

942 regex_list.append("(?P<%s>%s)" % (name,r))

943 if debug:

944 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)

945

946 regexs[state] = regex_list

947

948 # Build the master regular expressions

949

950 if debug:

951 debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")

952

953 for state in regexs:

954 lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,l info.toknames)

955 lexobj.lexstatere[state] = lexre

956 lexobj.lexstateretext[state] = re_text

957 lexobj.lexstaterenames[state] = re_names

958 if debug:

959 for i in range(len(re_text)):

960 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_ text[i])

961

962 # For inclusive states, we need to add the regular expressions from the INIT IAL state

963 for state,stype in stateinfo.items():

964 if state != "INITIAL" and stype == 'inclusive':

965 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])

966 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL' ])

967 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIA L'])

968

969 lexobj.lexstateinfo = stateinfo

970 lexobj.lexre = lexobj.lexstatere["INITIAL"]

971 lexobj.lexretext = lexobj.lexstateretext["INITIAL"]

972 lexobj.lexreflags = reflags

973

974 # Set up ignore variables

975 lexobj.lexstateignore = linfo.ignore

976 lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")

977

978 # Set up error functions

979 lexobj.lexstateerrorf = linfo.errorf

980 lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)

981 if not lexobj.lexerrorf:

982 errorlog.warning("No t_error rule is defined")

983

984 # Check state information for ignore and error rules

985 for s,stype in stateinfo.items():

986 if stype == 'exclusive':

987 if not s in linfo.errorf:

988 errorlog.warning("No error rule is defined for exclusive stat e '%s'", s)

989 if not s in linfo.ignore and lexobj.lexignore:

990 errorlog.warning("No ignore rule is defined for exclusive sta te '%s'", s)

991 elif stype == 'inclusive':

992 if not s in linfo.errorf:

993 linfo.errorf[s] = linfo.errorf.get("INITIAL",None)

994 if not s in linfo.ignore:

995 linfo.ignore[s] = linfo.ignore.get("INITIAL","")

996

997 # Create global versions of the token() and input() functions

998 token = lexobj.token

999 input = lexobj.input

1000 lexer = lexobj

1001

1002 # If in optimize mode, we write the lextab

1003 if lextab and optimize:

1004 lexobj.writetab(lextab,outputdir)

1005

1006 return lexobj

1007

1008 # -----------------------------------------------------------------------------

1009 # runmain()

1010 #

1011 # This runs the lexer as a main program

1012 # -----------------------------------------------------------------------------

1013

1014 def runmain(lexer=None,data=None):

1015 if not data:

1016 try:

1017 filename = sys.argv[1]

1018 f = open(filename)

1019 data = f.read()

1020 f.close()

1021 except IndexError:

1022 sys.stdout.write("Reading from standard input (type EOF to end):\n")

1023 data = sys.stdin.read()

1024

1025 if lexer:

1026 _input = lexer.input

1027 else:

1028 _input = input

1029 _input(data)

1030 if lexer:

1031 _token = lexer.token

1032 else:

1033 _token = token

1034

1035 while 1:

1036 tok = _token()

1037 if not tok: break

1038 sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,to k.lexpos))

1039

1040 # -----------------------------------------------------------------------------

1041 # @TOKEN(regex)

1042 #

1043 # This decorator function can be used to set the regex expression on a function

1044 # when its docstring might need to be set in an alternative way

1045 # -----------------------------------------------------------------------------

1046

1047 def TOKEN(r):

1048 def set_doc(f):

1049 if hasattr(r,"__call__"):

1050 f.__doc__ = r.__doc__

1051 else:

1052 f.__doc__ = r

1053 return f

1054 return set_doc

1055

1056 # Alternative spelling of the TOKEN decorator

1057 Token = TOKEN

1058

OLD	NEW

« no previous file with comments | « mojo/public/third_party/ply/__init__.py ('k') | mojo/public/third_party/ply/license.patch » ('j') | no next file with comments »