Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(408)

Side by Side Diff: ppapi/generators/idl_lexer.py

Issue 6697028: Add IDL Lexer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 #
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 """ Lexer for PPAPI IDL """
8
9
10 import getopt
11 import os.path
12 import re
13 import sys
14
15 #
16 # Try to load the ply module, if not, then assume it is in the third_party
17 # directory, relative to ppapi
18 #
19 try:
20 from ply import lex
21 from ply import yacc
22 except:
Nick Bray 2011/03/15 21:44:06 Don't bother? Just modify the path, then import?
noelallen1 2011/03/17 01:20:02 I try instead of forcing it, we do know now if it'
Nick Bray 2011/03/21 20:25:29 You're trading one ugly for another. I personally
23 module_path, module_name = os.path.split(__file__)
24 third_party = os.path.join(module_path, '..', '..', 'third_party')
25 sys.path.append(third_party)
26 from ply import lex
27 from ply import yacc
Nick Bray 2011/03/15 21:44:06 Unused?
noelallen1 2011/03/17 01:20:02 Done.
28
29
30 #
31 # IDL Lexer
32 #
33 # There are only
34 #
35
36 class IDLLexer(object):
Nick Bray 2011/03/15 21:44:06 Please comment the lex magic. There are lots of s
noelallen1 2011/03/17 01:20:02 Done.
37 tokens = [
38 # Symbol and keywords types
39 'COMMENT',
40 'DESCRIBE',
41 'ENUM',
42 'SYMBOL',
43 'INTERFACE',
44 'STRUCT',
45 'TYPEDEF',
46
47 # Data types
48 'FLOAT',
49 'INT',
50 'HEX',
51 'STRING',
52
53 # Operators
54 'LSHIFT'
55 ]
56
57 keywords = {
58 'describe' : 'DESCRIBE',
59 'enum' : 'ENUM',
60 'interface' : 'INTERFACE',
61 'readonly' : 'READONLY',
62 'struct' : 'STRUCT',
63 'typedef' : 'TYPEDEF',
64 }
65
66 literals = '"*.(){}[],;:=+-'
67 t_ignore = ' \t'
68
69 # A line ending '\n', we use this to increment the line number
70 def t_LINE_END(self, t):
71 r'\n+'
72 self.AddLines(len(t.value))
73
74 # Constant values
75 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+'
Nick Bray 2011/03/15 21:44:06 FYI: negative literal numbers can be a pain if you
noelallen1 2011/03/17 01:20:02 Done.
76 t_HEX = r'0x[a-fA-F0-9]+'
77 t_INT = r'-?\d+'
78 t_LSHIFT = r'<<'
79
80 def t_STRING(self, t):
81 r'"[^"]*"'
Nick Bray 2011/03/15 21:44:06 Wrong for strings with an escaped double quote.
noelallen1 2011/03/17 01:20:02 We do not allow escaped quotes. On 2011/03/15 21:4
82 t.value = t.value[1:-1]
Nick Bray 2011/03/15 21:44:06 Does not unescape characters. Consider eval(t.val
noelallen1 2011/03/17 01:20:02 Same as above.
83 self.AddLines(t.value.count('\n'))
Nick Bray 2011/03/15 21:44:06 Count before converting the string? (\\n -> \n wh
noelallen1 2011/03/17 01:20:02 see above
84 return t
85
86 # A C or C++ style comment: /* xxx */ or //
87 def t_COMMENT(self, t):
88 r'(/\*(?s).*?\*/)|((?m)//.*?$)'
Nick Bray 2011/03/15 21:44:06 The (?m) option seems a little weird. Why? Did y
noelallen1 2011/03/17 01:20:02 Done.
89 self.AddLines(t.value.count('\n'))
90 return t
91
92 # A symbol or keyword.
93 def t_KEYWORD_SYBOL(self, t):
Nick Bray 2011/03/15 21:44:06 Typo
noelallen1 2011/03/17 01:20:02 Done.
94 r'[A-Za-z][A-Za-z_0-9]*'
Nick Bray 2011/03/15 21:44:06 Leading underscores?
noelallen1 2011/03/17 01:20:02 Not allowed
95
96 #All non-keywords are assumed to be symbols
97 t.type = self.keywords.get(t.value, 'SYMBOL')
98 return t
99
100 def t_ANY_error(self, t):
101 line = self.lexobj.lineno
102 pos = self.lexobj.lexpos - self.index[line]
103 file = self.lexobj.filename
104 out = self.ErrorMessage(file, line, pos, "Unrecognized input")
105 print >>sys.stderr, out
Nick Bray 2011/03/15 21:44:06 Prefer: sys.stderr.write(out+'\n')
noelallen1 2011/03/17 01:20:02 Done.
106
107 def AddLines(self, count):
108 self.lexobj.lineno += count
109 for i in range(count):
110 self.index.append(self.lexobj.lexpos)
Nick Bray 2011/03/15 21:44:06 This seems a little odd in cases where count > 1..
noelallen1 2011/03/17 01:20:02 Commented.
111
112 def FileLineMsg(self, file, line, msg):
113 if file: return "%s(%d) : %s" % (file, line + 1, msg)
114 return "<BuiltIn> : %s" % msg
115
116 def SourceLine(self, file, line, pos):
117 caret = '\t^'.expandtabs(pos)
118 return "%s\n%s" % (self.lines[line], caret)
119
120 def ErrorMessage(self, file, line, pos, msg):
121 return "\n%s\n%s" % (
122 self.FileLineMsg(file, line, msg),
123 self.SourceLine(file, line, pos))
124
125 def SetData(self, filename, data):
126 self.lexobj.filename = filename
127 self.lexobj.lineno = 0
128 self.lines = data.split('\n')
129 self.index = [0]
130 self.lexobj.input(data)
131
132 def __init__(self):
133 self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
134
135
136 #
137 # FilesToTokens
138 #
139 # From a set of source file names, generate a list of tokens.
140 #
141 def FilesToTokens(filenames, verbose = False):
Nick Bray 2011/03/15 21:44:06 no spaces around default argument.
noelallen1 2011/03/17 01:20:02 Done.
142 lexer = IDLLexer()
143 outlist = []
144 for filename in filenames:
145 data = open(filename).read()
146 lexer.SetData(filename, data)
147 if verbose:print '\tLoaded %s...' % filenames
Nick Bray 2011/03/15 21:44:06 space or stick on new line.
noelallen1 2011/03/17 01:20:02 Done.
148 while 1:
149 t = lexer.lexobj.token()
150 if t is None: break
151 outlist.append(t.value)
152 return outlist
153
154 #
155 # TextToTokens
156 #
157 # From a block of text, generate a list of tokens
158 #
159 def TextToTokens(source):
160 lexer = IDLLexer()
161 outlist = []
162 lexer.SetData('AUTO', source)
163 while 1:
164 t = lexer.lexobj.token()
165 if t is None: break
166 outlist.append(t.value)
167 return outlist
168
169
170 #
171 # Test
172 #
173 # From a set of filenames, generate a token list, which is then converted
174 # to a text block by joining with a single space. Then re-tokenize the new
175 # text block to verify it generates the same set.
176 #
177 def Test(tokens, output = False, verbose = False):
Nick Bray 2011/03/15 21:44:06 No spaces
noelallen1 2011/03/17 01:20:02 Done.
178 if verbose:
179 print "Testing lexer"
180 src1 = ' '.join(tokens)
181 src2 = ' '.join(TextToTokens(src1))
182
183 if output:
184 open('original.txt', 'w').write(src1)
185 open('tokized.txt', 'w').write(src2)
186
187 if src1 == src2:
188 print "Pass"
189 return 0
190
191 print "Failed"
192 return -1
193
194
195 def Main(args):
196 usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]'
197 try:
198 long_opts = ['output=', 'test']
199 opts, filenames = getopt.getopt(args, '', long_opts)
Nick Bray 2011/03/15 21:44:06 I prefer optparse. Consider build/download_chrome
noelallen1 2011/03/17 01:20:02 see below
200
201 except getopt.error, e:
202 print >>sys.stderr, 'Illegal option:', str(e)
Nick Bray 2011/03/15 21:44:06 Again, >> syntax is nonstandard.
noelallen1 2011/03/17 01:20:02 Done.
203 print >>sys.stderr, usage
204 return 1
205
206 output = False
207 test = False
208 verbose = False
209
210 for opt, val in opts:
Nick Bray 2011/03/15 21:44:06 optparse would do away with this silliness.
noelallen1 2011/03/17 01:20:02 optparse might be a reasonable change, but since t
211 if opt == '--output':
212 output = True
213
214 if opt == '--test':
215 test = True
216
217 if opt == '--verbose':
218 verbose = True
219
220 try:
221 tokens = FilesToTokens(filenames, verbose)
222
223 if output: print ' '.join(tokens)
224 if test: return Test(tokens, output = output, verbose = verbose)
225 return 0
226
227 except lex.LexError as le:
228 print >>sys.stderr, le
229 return -1
230
231
232 if __name__ == '__main__':
233 sys.exit(Main(sys.argv[1:]))
234
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698