Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(194)

Unified Diff: ppapi/generators/idl_lexer.py

Issue 6697028: Add IDL Lexer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: ppapi/generators/idl_lexer.py
diff --git a/ppapi/generators/idl_lexer.py b/ppapi/generators/idl_lexer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2509b8c4ff33a28cadfcb70259c764eb029969ef
--- /dev/null
+++ b/ppapi/generators/idl_lexer.py
@@ -0,0 +1,234 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+""" Lexer for PPAPI IDL """
+
+
+import getopt
+import os.path
+import re
+import sys
+
+#
+# Try to load the ply module, if not, then assume it is in the third_party
+# directory, relative to ppapi
+#
+try:
+ from ply import lex
+ from ply import yacc
+except:
Nick Bray 2011/03/15 21:44:06 Don't bother? Just modify the path, then import?
noelallen1 2011/03/17 01:20:02 I try instead of forcing it, we do know now if it'
Nick Bray 2011/03/21 20:25:29 You're trading one ugly for another. I personally
+ module_path, module_name = os.path.split(__file__)
+ third_party = os.path.join(module_path, '..', '..', 'third_party')
+ sys.path.append(third_party)
+ from ply import lex
+ from ply import yacc
Nick Bray 2011/03/15 21:44:06 Unused?
noelallen1 2011/03/17 01:20:02 Done.
+
+
+#
+# IDL Lexer
+#
+# There are only
+#
+
+class IDLLexer(object):
Nick Bray 2011/03/15 21:44:06 Please comment the lex magic. There are lots of s
noelallen1 2011/03/17 01:20:02 Done.
+ tokens = [
+ # Symbol and keywords types
+ 'COMMENT',
+ 'DESCRIBE',
+ 'ENUM',
+ 'SYMBOL',
+ 'INTERFACE',
+ 'STRUCT',
+ 'TYPEDEF',
+
+ # Data types
+ 'FLOAT',
+ 'INT',
+ 'HEX',
+ 'STRING',
+
+ # Operators
+ 'LSHIFT'
+ ]
+
+ keywords = {
+ 'describe' : 'DESCRIBE',
+ 'enum' : 'ENUM',
+ 'interface' : 'INTERFACE',
+ 'readonly' : 'READONLY',
+ 'struct' : 'STRUCT',
+ 'typedef' : 'TYPEDEF',
+ }
+
+ literals = '"*.(){}[],;:=+-'
+ t_ignore = ' \t'
+
+ # A line ending '\n', we use this to increment the line number
+ def t_LINE_END(self, t):
+ r'\n+'
+ self.AddLines(len(t.value))
+
+ # Constant values
+ t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+'
Nick Bray 2011/03/15 21:44:06 FYI: negative literal numbers can be a pain if you
noelallen1 2011/03/17 01:20:02 Done.
+ t_HEX = r'0x[a-fA-F0-9]+'
+ t_INT = r'-?\d+'
+ t_LSHIFT = r'<<'
+
+ def t_STRING(self, t):
+ r'"[^"]*"'
Nick Bray 2011/03/15 21:44:06 Wrong for strings with an escaped double quote.
noelallen1 2011/03/17 01:20:02 We do not allow escaped quotes. On 2011/03/15 21:4
+ t.value = t.value[1:-1]
Nick Bray 2011/03/15 21:44:06 Does not unescape characters. Consider eval(t.val
noelallen1 2011/03/17 01:20:02 Same as above.
+ self.AddLines(t.value.count('\n'))
Nick Bray 2011/03/15 21:44:06 Count before converting the string? (\\n -> \n wh
noelallen1 2011/03/17 01:20:02 see above
+ return t
+
+ # A C or C++ style comment: /* xxx */ or //
+ def t_COMMENT(self, t):
+ r'(/\*(?s).*?\*/)|((?m)//.*?$)'
Nick Bray 2011/03/15 21:44:06 The (?m) option seems a little weird. Why? Did y
noelallen1 2011/03/17 01:20:02 Done.
+ self.AddLines(t.value.count('\n'))
+ return t
+
+ # A symbol or keyword.
+ def t_KEYWORD_SYBOL(self, t):
Nick Bray 2011/03/15 21:44:06 Typo
noelallen1 2011/03/17 01:20:02 Done.
+ r'[A-Za-z][A-Za-z_0-9]*'
Nick Bray 2011/03/15 21:44:06 Leading underscores?
noelallen1 2011/03/17 01:20:02 Not allowed
+
+ #All non-keywords are assumed to be symbols
+ t.type = self.keywords.get(t.value, 'SYMBOL')
+ return t
+
+ def t_ANY_error(self, t):
+ line = self.lexobj.lineno
+ pos = self.lexobj.lexpos - self.index[line]
+ file = self.lexobj.filename
+ out = self.ErrorMessage(file, line, pos, "Unrecognized input")
+ print >>sys.stderr, out
Nick Bray 2011/03/15 21:44:06 Prefer: sys.stderr.write(out+'\n')
noelallen1 2011/03/17 01:20:02 Done.
+
+ def AddLines(self, count):
+ self.lexobj.lineno += count
+ for i in range(count):
+ self.index.append(self.lexobj.lexpos)
Nick Bray 2011/03/15 21:44:06 This seems a little odd in cases where count > 1..
noelallen1 2011/03/17 01:20:02 Commented.
+
+ def FileLineMsg(self, file, line, msg):
+ if file: return "%s(%d) : %s" % (file, line + 1, msg)
+ return "<BuiltIn> : %s" % msg
+
+ def SourceLine(self, file, line, pos):
+ caret = '\t^'.expandtabs(pos)
+ return "%s\n%s" % (self.lines[line], caret)
+
+ def ErrorMessage(self, file, line, pos, msg):
+ return "\n%s\n%s" % (
+ self.FileLineMsg(file, line, msg),
+ self.SourceLine(file, line, pos))
+
+ def SetData(self, filename, data):
+ self.lexobj.filename = filename
+ self.lexobj.lineno = 0
+ self.lines = data.split('\n')
+ self.index = [0]
+ self.lexobj.input(data)
+
+ def __init__(self):
+ self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
+
+
+#
+# FilesToTokens
+#
+# From a set of source file names, generate a list of tokens.
+#
+def FilesToTokens(filenames, verbose = False):
Nick Bray 2011/03/15 21:44:06 no spaces around default argument.
noelallen1 2011/03/17 01:20:02 Done.
+ lexer = IDLLexer()
+ outlist = []
+ for filename in filenames:
+ data = open(filename).read()
+ lexer.SetData(filename, data)
+ if verbose:print '\tLoaded %s...' % filenames
Nick Bray 2011/03/15 21:44:06 space or stick on new line.
noelallen1 2011/03/17 01:20:02 Done.
+ while 1:
+ t = lexer.lexobj.token()
+ if t is None: break
+ outlist.append(t.value)
+ return outlist
+
+#
+# TextToTokens
+#
+# From a block of text, generate a list of tokens
+#
+def TextToTokens(source):
+ lexer = IDLLexer()
+ outlist = []
+ lexer.SetData('AUTO', source)
+ while 1:
+ t = lexer.lexobj.token()
+ if t is None: break
+ outlist.append(t.value)
+ return outlist
+
+
+#
+# Test
+#
+# From a set of filenames, generate a token list, which is then converted
+# to a text block by joining with a single space. Then re-tokenize the new
+# text block to verify it generates the same set.
+#
+def Test(tokens, output = False, verbose = False):
Nick Bray 2011/03/15 21:44:06 No spaces
noelallen1 2011/03/17 01:20:02 Done.
+ if verbose:
+ print "Testing lexer"
+ src1 = ' '.join(tokens)
+ src2 = ' '.join(TextToTokens(src1))
+
+ if output:
+ open('original.txt', 'w').write(src1)
+ open('tokized.txt', 'w').write(src2)
+
+ if src1 == src2:
+ print "Pass"
+ return 0
+
+ print "Failed"
+ return -1
+
+
+def Main(args):
+ usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]'
+ try:
+ long_opts = ['output=', 'test']
+ opts, filenames = getopt.getopt(args, '', long_opts)
Nick Bray 2011/03/15 21:44:06 I prefer optparse. Consider build/download_chrome
noelallen1 2011/03/17 01:20:02 see below
+
+ except getopt.error, e:
+ print >>sys.stderr, 'Illegal option:', str(e)
Nick Bray 2011/03/15 21:44:06 Again, >> syntax is nonstandard.
noelallen1 2011/03/17 01:20:02 Done.
+ print >>sys.stderr, usage
+ return 1
+
+ output = False
+ test = False
+ verbose = False
+
+ for opt, val in opts:
Nick Bray 2011/03/15 21:44:06 optparse would do away with this silliness.
noelallen1 2011/03/17 01:20:02 optparse might be a reasonable change, but since t
+ if opt == '--output':
+ output = True
+
+ if opt == '--test':
+ test = True
+
+ if opt == '--verbose':
+ verbose = True
+
+ try:
+ tokens = FilesToTokens(filenames, verbose)
+
+ if output: print ' '.join(tokens)
+ if test: return Test(tokens, output = output, verbose = verbose)
+ return 0
+
+ except lex.LexError as le:
+ print >>sys.stderr, le
+ return -1
+
+
+if __name__ == '__main__':
+ sys.exit(Main(sys.argv[1:]))
+
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698