Index: ppapi/generators/idl_lexer.py |
diff --git a/ppapi/generators/idl_lexer.py b/ppapi/generators/idl_lexer.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2509b8c4ff33a28cadfcb70259c764eb029969ef |
--- /dev/null |
+++ b/ppapi/generators/idl_lexer.py |
@@ -0,0 +1,234 @@ |
+#!/usr/bin/python |
+# |
+# Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+""" Lexer for PPAPI IDL """ |
+ |
+ |
+import getopt |
+import os.path |
+import re |
+import sys |
+ |
+# |
+# Try to load the ply module, if not, then assume it is in the third_party |
+# directory, relative to ppapi |
+# |
+try: |
+ from ply import lex |
+ from ply import yacc |
+except: |
Nick Bray
2011/03/15 21:44:06
Don't bother? Just modify the path, then import?
noelallen1
2011/03/17 01:20:02
I try instead of forcing it, we do know now if it'
Nick Bray
2011/03/21 20:25:29
You're trading one ugly for another. I personally
|
+ module_path, module_name = os.path.split(__file__) |
+ third_party = os.path.join(module_path, '..', '..', 'third_party') |
+ sys.path.append(third_party) |
+ from ply import lex |
+ from ply import yacc |
Nick Bray
2011/03/15 21:44:06
Unused?
noelallen1
2011/03/17 01:20:02
Done.
|
+ |
+ |
+# |
+# IDL Lexer |
+# |
+# There are only |
+# |
+ |
+class IDLLexer(object): |
Nick Bray
2011/03/15 21:44:06
Please comment the lex magic.
There are lots of s
noelallen1
2011/03/17 01:20:02
Done.
|
+ tokens = [ |
+ # Symbol and keywords types |
+ 'COMMENT', |
+ 'DESCRIBE', |
+ 'ENUM', |
+ 'SYMBOL', |
+ 'INTERFACE', |
+ 'STRUCT', |
+ 'TYPEDEF', |
+ |
+ # Data types |
+ 'FLOAT', |
+ 'INT', |
+ 'HEX', |
+ 'STRING', |
+ |
+ # Operators |
+ 'LSHIFT' |
+ ] |
+ |
+ keywords = { |
+ 'describe' : 'DESCRIBE', |
+ 'enum' : 'ENUM', |
+ 'interface' : 'INTERFACE', |
+ 'readonly' : 'READONLY', |
+ 'struct' : 'STRUCT', |
+ 'typedef' : 'TYPEDEF', |
+ } |
+ |
+ literals = '"*.(){}[],;:=+-' |
+ t_ignore = ' \t' |
+ |
+ # A line ending '\n', we use this to increment the line number |
+ def t_LINE_END(self, t): |
+ r'\n+' |
+ self.AddLines(len(t.value)) |
+ |
+ # Constant values |
+ t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+' |
Nick Bray
2011/03/15 21:44:06
FYI: negative literal numbers can be a pain if you
noelallen1
2011/03/17 01:20:02
Done.
|
+ t_HEX = r'0x[a-fA-F0-9]+' |
+ t_INT = r'-?\d+' |
+ t_LSHIFT = r'<<' |
+ |
+ def t_STRING(self, t): |
+ r'"[^"]*"' |
Nick Bray
2011/03/15 21:44:06
Wrong for strings with an escaped double quote.
noelallen1
2011/03/17 01:20:02
We do not allow escaped quotes.
On 2011/03/15 21:4
|
+ t.value = t.value[1:-1] |
Nick Bray
2011/03/15 21:44:06
Does not unescape characters.
Consider eval(t.val
noelallen1
2011/03/17 01:20:02
Same as above.
|
+ self.AddLines(t.value.count('\n')) |
Nick Bray
2011/03/15 21:44:06
Count before converting the string? (\\n -> \n wh
noelallen1
2011/03/17 01:20:02
see above
|
+ return t |
+ |
+ # A C or C++ style comment: /* xxx */ or // |
+ def t_COMMENT(self, t): |
+ r'(/\*(?s).*?\*/)|((?m)//.*?$)' |
Nick Bray
2011/03/15 21:44:06
The (?m) option seems a little weird. Why?
Did y
noelallen1
2011/03/17 01:20:02
Done.
|
+ self.AddLines(t.value.count('\n')) |
+ return t |
+ |
+ # A symbol or keyword. |
+ def t_KEYWORD_SYBOL(self, t): |
Nick Bray
2011/03/15 21:44:06
Typo
noelallen1
2011/03/17 01:20:02
Done.
|
+ r'[A-Za-z][A-Za-z_0-9]*' |
Nick Bray
2011/03/15 21:44:06
Leading underscores?
noelallen1
2011/03/17 01:20:02
Not allowed
|
+ |
+ #All non-keywords are assumed to be symbols |
+ t.type = self.keywords.get(t.value, 'SYMBOL') |
+ return t |
+ |
+ def t_ANY_error(self, t): |
+ line = self.lexobj.lineno |
+ pos = self.lexobj.lexpos - self.index[line] |
+ file = self.lexobj.filename |
+ out = self.ErrorMessage(file, line, pos, "Unrecognized input") |
+ print >>sys.stderr, out |
Nick Bray
2011/03/15 21:44:06
Prefer:
sys.stderr.write(out+'\n')
noelallen1
2011/03/17 01:20:02
Done.
|
+ |
+ def AddLines(self, count): |
+ self.lexobj.lineno += count |
+ for i in range(count): |
+ self.index.append(self.lexobj.lexpos) |
Nick Bray
2011/03/15 21:44:06
This seems a little odd in cases where count > 1..
noelallen1
2011/03/17 01:20:02
Commented.
|
+ |
+ def FileLineMsg(self, file, line, msg): |
+ if file: return "%s(%d) : %s" % (file, line + 1, msg) |
+ return "<BuiltIn> : %s" % msg |
+ |
+ def SourceLine(self, file, line, pos): |
+ caret = '\t^'.expandtabs(pos) |
+ return "%s\n%s" % (self.lines[line], caret) |
+ |
+ def ErrorMessage(self, file, line, pos, msg): |
+ return "\n%s\n%s" % ( |
+ self.FileLineMsg(file, line, msg), |
+ self.SourceLine(file, line, pos)) |
+ |
+ def SetData(self, filename, data): |
+ self.lexobj.filename = filename |
+ self.lexobj.lineno = 0 |
+ self.lines = data.split('\n') |
+ self.index = [0] |
+ self.lexobj.input(data) |
+ |
+ def __init__(self): |
+ self.lexobj = lex.lex(object=self, lextab=None, optimize=0) |
+ |
+ |
+# |
+# FilesToTokens |
+# |
+# From a set of source file names, generate a list of tokens. |
+# |
+def FilesToTokens(filenames, verbose = False): |
Nick Bray
2011/03/15 21:44:06
no spaces around default argument.
noelallen1
2011/03/17 01:20:02
Done.
|
+ lexer = IDLLexer() |
+ outlist = [] |
+ for filename in filenames: |
+ data = open(filename).read() |
+ lexer.SetData(filename, data) |
+ if verbose:print '\tLoaded %s...' % filenames |
Nick Bray
2011/03/15 21:44:06
space or stick on new line.
noelallen1
2011/03/17 01:20:02
Done.
|
+ while 1: |
+ t = lexer.lexobj.token() |
+ if t is None: break |
+ outlist.append(t.value) |
+ return outlist |
+ |
+# |
+# TextToTokens |
+# |
+# From a block of text, generate a list of tokens |
+# |
+def TextToTokens(source): |
+ lexer = IDLLexer() |
+ outlist = [] |
+ lexer.SetData('AUTO', source) |
+ while 1: |
+ t = lexer.lexobj.token() |
+ if t is None: break |
+ outlist.append(t.value) |
+ return outlist |
+ |
+ |
+# |
+# Test |
+# |
+# From a set of filenames, generate a token list, which is then converted |
+# to a text block by joining with a single space. Then re-tokenize the new |
+# text block to verify it generates the same set. |
+# |
+def Test(tokens, output = False, verbose = False): |
Nick Bray
2011/03/15 21:44:06
No spaces
noelallen1
2011/03/17 01:20:02
Done.
|
+ if verbose: |
+ print "Testing lexer" |
+ src1 = ' '.join(tokens) |
+ src2 = ' '.join(TextToTokens(src1)) |
+ |
+ if output: |
+ open('original.txt', 'w').write(src1) |
+ open('tokized.txt', 'w').write(src2) |
+ |
+ if src1 == src2: |
+ print "Pass" |
+ return 0 |
+ |
+ print "Failed" |
+ return -1 |
+ |
+ |
+def Main(args): |
+ usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]' |
+ try: |
+ long_opts = ['output=', 'test'] |
+ opts, filenames = getopt.getopt(args, '', long_opts) |
Nick Bray
2011/03/15 21:44:06
I prefer optparse. Consider build/download_chrome
noelallen1
2011/03/17 01:20:02
see below
|
+ |
+ except getopt.error, e: |
+ print >>sys.stderr, 'Illegal option:', str(e) |
Nick Bray
2011/03/15 21:44:06
Again, >> syntax is nonstandard.
noelallen1
2011/03/17 01:20:02
Done.
|
+ print >>sys.stderr, usage |
+ return 1 |
+ |
+ output = False |
+ test = False |
+ verbose = False |
+ |
+ for opt, val in opts: |
Nick Bray
2011/03/15 21:44:06
optparse would do away with this silliness.
noelallen1
2011/03/17 01:20:02
optparse might be a reasonable change, but since t
|
+ if opt == '--output': |
+ output = True |
+ |
+ if opt == '--test': |
+ test = True |
+ |
+ if opt == '--verbose': |
+ verbose = True |
+ |
+ try: |
+ tokens = FilesToTokens(filenames, verbose) |
+ |
+ if output: print ' '.join(tokens) |
+ if test: return Test(tokens, output = output, verbose = verbose) |
+ return 0 |
+ |
+ except lex.LexError as le: |
+ print >>sys.stderr, le |
+ return -1 |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(Main(sys.argv[1:])) |
+ |