Chromium Code Reviews| Index: ppapi/generators/idl_lexer.py |
| diff --git a/ppapi/generators/idl_lexer.py b/ppapi/generators/idl_lexer.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2509b8c4ff33a28cadfcb70259c764eb029969ef |
| --- /dev/null |
| +++ b/ppapi/generators/idl_lexer.py |
| @@ -0,0 +1,234 @@ |
| +#!/usr/bin/python |
| +# |
| +# Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +""" Lexer for PPAPI IDL """ |
| + |
| + |
| +import getopt |
| +import os.path |
| +import re |
| +import sys |
| + |
| +# |
| +# Try to load the ply module, if not, then assume it is in the third_party |
| +# directory, relative to ppapi |
| +# |
| +try: |
| + from ply import lex |
| + from ply import yacc |
| +except: |
|
Nick Bray
2011/03/15 21:44:06
Don't bother? Just modify the path, then import?
noelallen1
2011/03/17 01:20:02
I try instead of forcing it, we do know now if it'
Nick Bray
2011/03/21 20:25:29
You're trading one ugly for another. I personally
|
| + module_path, module_name = os.path.split(__file__) |
| + third_party = os.path.join(module_path, '..', '..', 'third_party') |
| + sys.path.append(third_party) |
| + from ply import lex |
| + from ply import yacc |
|
Nick Bray
2011/03/15 21:44:06
Unused?
noelallen1
2011/03/17 01:20:02
Done.
|
| + |
| + |
| +# |
| +# IDL Lexer |
| +# |
| +# There are only |
| +# |
| + |
| +class IDLLexer(object): |
|
Nick Bray
2011/03/15 21:44:06
Please comment the lex magic.
There are lots of s
noelallen1
2011/03/17 01:20:02
Done.
|
| + tokens = [ |
| + # Symbol and keywords types |
| + 'COMMENT', |
| + 'DESCRIBE', |
| + 'ENUM', |
| + 'SYMBOL', |
| + 'INTERFACE', |
| + 'STRUCT', |
| + 'TYPEDEF', |
| + |
| + # Data types |
| + 'FLOAT', |
| + 'INT', |
| + 'HEX', |
| + 'STRING', |
| + |
| + # Operators |
| + 'LSHIFT' |
| + ] |
| + |
| + keywords = { |
| + 'describe' : 'DESCRIBE', |
| + 'enum' : 'ENUM', |
| + 'interface' : 'INTERFACE', |
| + 'readonly' : 'READONLY', |
| + 'struct' : 'STRUCT', |
| + 'typedef' : 'TYPEDEF', |
| + } |
| + |
| + literals = '"*.(){}[],;:=+-' |
| + t_ignore = ' \t' |
| + |
| + # A line ending '\n', we use this to increment the line number |
| + def t_LINE_END(self, t): |
| + r'\n+' |
| + self.AddLines(len(t.value)) |
| + |
| + # Constant values |
| + t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+' |
|
Nick Bray
2011/03/15 21:44:06
FYI: negative literal numbers can be a pain if you
noelallen1
2011/03/17 01:20:02
Done.
|
| + t_HEX = r'0x[a-fA-F0-9]+' |
| + t_INT = r'-?\d+' |
| + t_LSHIFT = r'<<' |
| + |
| + def t_STRING(self, t): |
| + r'"[^"]*"' |
|
Nick Bray
2011/03/15 21:44:06
Wrong for strings with an escaped double quote.
noelallen1
2011/03/17 01:20:02
We do not allow escaped quotes.
On 2011/03/15 21:4
|
| + t.value = t.value[1:-1] |
|
Nick Bray
2011/03/15 21:44:06
Does not unescape characters.
Consider eval(t.val
noelallen1
2011/03/17 01:20:02
Same as above.
|
| + self.AddLines(t.value.count('\n')) |
|
Nick Bray
2011/03/15 21:44:06
Count before converting the string? (\\n -> \n wh
noelallen1
2011/03/17 01:20:02
see above
|
| + return t |
| + |
| + # A C or C++ style comment: /* xxx */ or // |
| + def t_COMMENT(self, t): |
| + r'(/\*(?s).*?\*/)|((?m)//.*?$)' |
|
Nick Bray
2011/03/15 21:44:06
The (?m) option seems a little weird. Why?
Did y
noelallen1
2011/03/17 01:20:02
Done.
|
| + self.AddLines(t.value.count('\n')) |
| + return t |
| + |
| + # A symbol or keyword. |
| + def t_KEYWORD_SYBOL(self, t): |
|
Nick Bray
2011/03/15 21:44:06
Typo
noelallen1
2011/03/17 01:20:02
Done.
|
| + r'[A-Za-z][A-Za-z_0-9]*' |
|
Nick Bray
2011/03/15 21:44:06
Leading underscores?
noelallen1
2011/03/17 01:20:02
Not allowed
|
| + |
| + #All non-keywords are assumed to be symbols |
| + t.type = self.keywords.get(t.value, 'SYMBOL') |
| + return t |
| + |
| + def t_ANY_error(self, t): |
| + line = self.lexobj.lineno |
| + pos = self.lexobj.lexpos - self.index[line] |
| + file = self.lexobj.filename |
| + out = self.ErrorMessage(file, line, pos, "Unrecognized input") |
| + print >>sys.stderr, out |
|
Nick Bray
2011/03/15 21:44:06
Prefer:
sys.stderr.write(out+'\n')
noelallen1
2011/03/17 01:20:02
Done.
|
| + |
| + def AddLines(self, count): |
| + self.lexobj.lineno += count |
| + for i in range(count): |
| + self.index.append(self.lexobj.lexpos) |
|
Nick Bray
2011/03/15 21:44:06
This seems a little odd in cases where count > 1..
noelallen1
2011/03/17 01:20:02
Commented.
|
| + |
| + def FileLineMsg(self, file, line, msg): |
| + if file: return "%s(%d) : %s" % (file, line + 1, msg) |
| + return "<BuiltIn> : %s" % msg |
| + |
| + def SourceLine(self, file, line, pos): |
| + caret = '\t^'.expandtabs(pos) |
| + return "%s\n%s" % (self.lines[line], caret) |
| + |
| + def ErrorMessage(self, file, line, pos, msg): |
| + return "\n%s\n%s" % ( |
| + self.FileLineMsg(file, line, msg), |
| + self.SourceLine(file, line, pos)) |
| + |
| + def SetData(self, filename, data): |
| + self.lexobj.filename = filename |
| + self.lexobj.lineno = 0 |
| + self.lines = data.split('\n') |
| + self.index = [0] |
| + self.lexobj.input(data) |
| + |
| + def __init__(self): |
| + self.lexobj = lex.lex(object=self, lextab=None, optimize=0) |
| + |
| + |
| +# |
| +# FilesToTokens |
| +# |
| +# From a set of source file names, generate a list of tokens. |
| +# |
| +def FilesToTokens(filenames, verbose = False): |
|
Nick Bray
2011/03/15 21:44:06
no spaces around default argument.
noelallen1
2011/03/17 01:20:02
Done.
|
| + lexer = IDLLexer() |
| + outlist = [] |
| + for filename in filenames: |
| + data = open(filename).read() |
| + lexer.SetData(filename, data) |
| + if verbose:print '\tLoaded %s...' % filenames |
|
Nick Bray
2011/03/15 21:44:06
space or stick on new line.
noelallen1
2011/03/17 01:20:02
Done.
|
| + while 1: |
| + t = lexer.lexobj.token() |
| + if t is None: break |
| + outlist.append(t.value) |
| + return outlist |
| + |
| +# |
| +# TextToTokens |
| +# |
| +# From a block of text, generate a list of tokens |
| +# |
| +def TextToTokens(source): |
| + lexer = IDLLexer() |
| + outlist = [] |
| + lexer.SetData('AUTO', source) |
| + while 1: |
| + t = lexer.lexobj.token() |
| + if t is None: break |
| + outlist.append(t.value) |
| + return outlist |
| + |
| + |
| +# |
| +# Test |
| +# |
| +# From a set of filenames, generate a token list, which is then converted |
| +# to a text block by joining with a single space. Then re-tokenize the new |
| +# text block to verify it generates the same set. |
| +# |
| +def Test(tokens, output = False, verbose = False): |
|
Nick Bray
2011/03/15 21:44:06
No spaces
noelallen1
2011/03/17 01:20:02
Done.
|
| + if verbose: |
| + print "Testing lexer" |
| + src1 = ' '.join(tokens) |
| + src2 = ' '.join(TextToTokens(src1)) |
| + |
| + if output: |
| + open('original.txt', 'w').write(src1) |
| + open('tokized.txt', 'w').write(src2) |
| + |
| + if src1 == src2: |
| + print "Pass" |
| + return 0 |
| + |
| + print "Failed" |
| + return -1 |
| + |
| + |
| +def Main(args): |
| + usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]' |
| + try: |
| + long_opts = ['output=', 'test'] |
| + opts, filenames = getopt.getopt(args, '', long_opts) |
|
Nick Bray
2011/03/15 21:44:06
I prefer optparse. Consider build/download_chrome
noelallen1
2011/03/17 01:20:02
see below
|
| + |
| + except getopt.error, e: |
| + print >>sys.stderr, 'Illegal option:', str(e) |
|
Nick Bray
2011/03/15 21:44:06
Again, >> syntax is nonstandard.
noelallen1
2011/03/17 01:20:02
Done.
|
| + print >>sys.stderr, usage |
| + return 1 |
| + |
| + output = False |
| + test = False |
| + verbose = False |
| + |
| + for opt, val in opts: |
|
Nick Bray
2011/03/15 21:44:06
optparse would do away with this silliness.
noelallen1
2011/03/17 01:20:02
optparse might be a reasonable change, but since t
|
| + if opt == '--output': |
| + output = True |
| + |
| + if opt == '--test': |
| + test = True |
| + |
| + if opt == '--verbose': |
| + verbose = True |
| + |
| + try: |
| + tokens = FilesToTokens(filenames, verbose) |
| + |
| + if output: print ' '.join(tokens) |
| + if test: return Test(tokens, output = output, verbose = verbose) |
| + return 0 |
| + |
| + except lex.LexError as le: |
| + print >>sys.stderr, le |
| + return -1 |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(Main(sys.argv[1:])) |
| + |