OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # | |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 | |
7 """ Lexer for PPAPI IDL """ | |
8 | |
9 | |
10 import getopt | |
11 import os.path | |
12 import re | |
13 import sys | |
14 | |
15 # | |
16 # Try to load the ply module, if not, then assume it is in the third_party | |
17 # directory, relative to ppapi | |
18 # | |
19 try: | |
20 from ply import lex | |
21 from ply import yacc | |
22 except: | |
Nick Bray
2011/03/15 21:44:06
Don't bother? Just modify the path, then import?
noelallen1
2011/03/17 01:20:02
I try instead of forcing it, we do know now if it'
Nick Bray
2011/03/21 20:25:29
You're trading one ugly for another. I personally
| |
23 module_path, module_name = os.path.split(__file__) | |
24 third_party = os.path.join(module_path, '..', '..', 'third_party') | |
25 sys.path.append(third_party) | |
26 from ply import lex | |
27 from ply import yacc | |
Nick Bray
2011/03/15 21:44:06
Unused?
noelallen1
2011/03/17 01:20:02
Done.
| |
28 | |
29 | |
30 # | |
31 # IDL Lexer | |
32 # | |
33 # There are only | |
34 # | |
35 | |
36 class IDLLexer(object): | |
Nick Bray
2011/03/15 21:44:06
Please comment the lex magic.
There are lots of s
noelallen1
2011/03/17 01:20:02
Done.
| |
37 tokens = [ | |
38 # Symbol and keywords types | |
39 'COMMENT', | |
40 'DESCRIBE', | |
41 'ENUM', | |
42 'SYMBOL', | |
43 'INTERFACE', | |
44 'STRUCT', | |
45 'TYPEDEF', | |
46 | |
47 # Data types | |
48 'FLOAT', | |
49 'INT', | |
50 'HEX', | |
51 'STRING', | |
52 | |
53 # Operators | |
54 'LSHIFT' | |
55 ] | |
56 | |
57 keywords = { | |
58 'describe' : 'DESCRIBE', | |
59 'enum' : 'ENUM', | |
60 'interface' : 'INTERFACE', | |
61 'readonly' : 'READONLY', | |
62 'struct' : 'STRUCT', | |
63 'typedef' : 'TYPEDEF', | |
64 } | |
65 | |
66 literals = '"*.(){}[],;:=+-' | |
67 t_ignore = ' \t' | |
68 | |
69 # A line ending '\n', we use this to increment the line number | |
70 def t_LINE_END(self, t): | |
71 r'\n+' | |
72 self.AddLines(len(t.value)) | |
73 | |
74 # Constant values | |
75 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+' | |
Nick Bray
2011/03/15 21:44:06
FYI: negative literal numbers can be a pain if you
noelallen1
2011/03/17 01:20:02
Done.
| |
76 t_HEX = r'0x[a-fA-F0-9]+' | |
77 t_INT = r'-?\d+' | |
78 t_LSHIFT = r'<<' | |
79 | |
80 def t_STRING(self, t): | |
81 r'"[^"]*"' | |
Nick Bray
2011/03/15 21:44:06
Wrong for strings with an escaped double quote.
noelallen1
2011/03/17 01:20:02
We do not allow escaped quotes.
On 2011/03/15 21:4
| |
82 t.value = t.value[1:-1] | |
Nick Bray
2011/03/15 21:44:06
Does not unescape characters.
Consider eval(t.val
noelallen1
2011/03/17 01:20:02
Same as above.
| |
83 self.AddLines(t.value.count('\n')) | |
Nick Bray
2011/03/15 21:44:06
Count before converting the string? (\\n -> \n wh
noelallen1
2011/03/17 01:20:02
see above
| |
84 return t | |
85 | |
86 # A C or C++ style comment: /* xxx */ or // | |
87 def t_COMMENT(self, t): | |
88 r'(/\*(?s).*?\*/)|((?m)//.*?$)' | |
Nick Bray
2011/03/15 21:44:06
The (?m) option seems a little weird. Why?
Did y
noelallen1
2011/03/17 01:20:02
Done.
| |
89 self.AddLines(t.value.count('\n')) | |
90 return t | |
91 | |
92 # A symbol or keyword. | |
93 def t_KEYWORD_SYBOL(self, t): | |
Nick Bray
2011/03/15 21:44:06
Typo
noelallen1
2011/03/17 01:20:02
Done.
| |
94 r'[A-Za-z][A-Za-z_0-9]*' | |
Nick Bray
2011/03/15 21:44:06
Leading underscores?
noelallen1
2011/03/17 01:20:02
Not allowed
| |
95 | |
96 #All non-keywords are assumed to be symbols | |
97 t.type = self.keywords.get(t.value, 'SYMBOL') | |
98 return t | |
99 | |
100 def t_ANY_error(self, t): | |
101 line = self.lexobj.lineno | |
102 pos = self.lexobj.lexpos - self.index[line] | |
103 file = self.lexobj.filename | |
104 out = self.ErrorMessage(file, line, pos, "Unrecognized input") | |
105 print >>sys.stderr, out | |
Nick Bray
2011/03/15 21:44:06
Prefer:
sys.stderr.write(out+'\n')
noelallen1
2011/03/17 01:20:02
Done.
| |
106 | |
107 def AddLines(self, count): | |
108 self.lexobj.lineno += count | |
109 for i in range(count): | |
110 self.index.append(self.lexobj.lexpos) | |
Nick Bray
2011/03/15 21:44:06
This seems a little odd in cases where count > 1..
noelallen1
2011/03/17 01:20:02
Commented.
| |
111 | |
112 def FileLineMsg(self, file, line, msg): | |
113 if file: return "%s(%d) : %s" % (file, line + 1, msg) | |
114 return "<BuiltIn> : %s" % msg | |
115 | |
116 def SourceLine(self, file, line, pos): | |
117 caret = '\t^'.expandtabs(pos) | |
118 return "%s\n%s" % (self.lines[line], caret) | |
119 | |
120 def ErrorMessage(self, file, line, pos, msg): | |
121 return "\n%s\n%s" % ( | |
122 self.FileLineMsg(file, line, msg), | |
123 self.SourceLine(file, line, pos)) | |
124 | |
125 def SetData(self, filename, data): | |
126 self.lexobj.filename = filename | |
127 self.lexobj.lineno = 0 | |
128 self.lines = data.split('\n') | |
129 self.index = [0] | |
130 self.lexobj.input(data) | |
131 | |
132 def __init__(self): | |
133 self.lexobj = lex.lex(object=self, lextab=None, optimize=0) | |
134 | |
135 | |
136 # | |
137 # FilesToTokens | |
138 # | |
139 # From a set of source file names, generate a list of tokens. | |
140 # | |
141 def FilesToTokens(filenames, verbose = False): | |
Nick Bray
2011/03/15 21:44:06
no spaces around default argument.
noelallen1
2011/03/17 01:20:02
Done.
| |
142 lexer = IDLLexer() | |
143 outlist = [] | |
144 for filename in filenames: | |
145 data = open(filename).read() | |
146 lexer.SetData(filename, data) | |
147 if verbose:print '\tLoaded %s...' % filenames | |
Nick Bray
2011/03/15 21:44:06
space or stick on new line.
noelallen1
2011/03/17 01:20:02
Done.
| |
148 while 1: | |
149 t = lexer.lexobj.token() | |
150 if t is None: break | |
151 outlist.append(t.value) | |
152 return outlist | |
153 | |
154 # | |
155 # TextToTokens | |
156 # | |
157 # From a block of text, generate a list of tokens | |
158 # | |
159 def TextToTokens(source): | |
160 lexer = IDLLexer() | |
161 outlist = [] | |
162 lexer.SetData('AUTO', source) | |
163 while 1: | |
164 t = lexer.lexobj.token() | |
165 if t is None: break | |
166 outlist.append(t.value) | |
167 return outlist | |
168 | |
169 | |
170 # | |
171 # Test | |
172 # | |
173 # From a set of filenames, generate a token list, which is then converted | |
174 # to a text block by joining with a single space. Then re-tokenize the new | |
175 # text block to verify it generates the same set. | |
176 # | |
177 def Test(tokens, output = False, verbose = False): | |
Nick Bray
2011/03/15 21:44:06
No spaces
noelallen1
2011/03/17 01:20:02
Done.
| |
178 if verbose: | |
179 print "Testing lexer" | |
180 src1 = ' '.join(tokens) | |
181 src2 = ' '.join(TextToTokens(src1)) | |
182 | |
183 if output: | |
184 open('original.txt', 'w').write(src1) | |
185 open('tokized.txt', 'w').write(src2) | |
186 | |
187 if src1 == src2: | |
188 print "Pass" | |
189 return 0 | |
190 | |
191 print "Failed" | |
192 return -1 | |
193 | |
194 | |
195 def Main(args): | |
196 usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]' | |
197 try: | |
198 long_opts = ['output=', 'test'] | |
199 opts, filenames = getopt.getopt(args, '', long_opts) | |
Nick Bray
2011/03/15 21:44:06
I prefer optparse. Consider build/download_chrome
noelallen1
2011/03/17 01:20:02
see below
| |
200 | |
201 except getopt.error, e: | |
202 print >>sys.stderr, 'Illegal option:', str(e) | |
Nick Bray
2011/03/15 21:44:06
Again, >> syntax is nonstandard.
noelallen1
2011/03/17 01:20:02
Done.
| |
203 print >>sys.stderr, usage | |
204 return 1 | |
205 | |
206 output = False | |
207 test = False | |
208 verbose = False | |
209 | |
210 for opt, val in opts: | |
Nick Bray
2011/03/15 21:44:06
optparse would do away with this silliness.
noelallen1
2011/03/17 01:20:02
optparse might be a reasonable change, but since t
| |
211 if opt == '--output': | |
212 output = True | |
213 | |
214 if opt == '--test': | |
215 test = True | |
216 | |
217 if opt == '--verbose': | |
218 verbose = True | |
219 | |
220 try: | |
221 tokens = FilesToTokens(filenames, verbose) | |
222 | |
223 if output: print ' '.join(tokens) | |
224 if test: return Test(tokens, output = output, verbose = verbose) | |
225 return 0 | |
226 | |
227 except lex.LexError as le: | |
228 print >>sys.stderr, le | |
229 return -1 | |
230 | |
231 | |
232 if __name__ == '__main__': | |
233 sys.exit(Main(sys.argv[1:])) | |
234 | |
OLD | NEW |