OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # | |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 | |
7 """ Lexer for PPAPI IDL """ | |
8 | |
9 | |
10 import getopt | |
11 import os.path | |
12 import re | |
13 import sys | |
14 | |
15 # | |
16 # Try to load the ply module, if not, then assume it is in the third_party | |
17 # directory, relative to ppapi | |
18 # | |
19 try: | |
20 from ply import lex | |
21 except: | |
22 module_path, module_name = os.path.split(__file__) | |
23 third_party = os.path.join(module_path, '..', '..', 'third_party') | |
24 sys.path.append(third_party) | |
25 from ply import lex | |
26 | |
27 # | |
28 # IDL Lexer | |
29 # | |
30 class IDLLexer(object): | |
31 # 'tokens' is a value required by lex which specifies the complete list | |
32 # of valid token types. | |
33 tokens = [ | |
34 # Symbol and keywords types | |
35 'COMMENT', | |
36 'DESCRIBE', | |
37 'ENUM', | |
38 'SYMBOL', | |
39 'INTERFACE', | |
40 'STRUCT', | |
41 'TYPEDEF', | |
42 | |
43 # Data types | |
44 'FLOAT', | |
45 'INT', | |
46 'HEX', | |
47 'STRING', | |
48 | |
49 # Operators | |
50 'LSHIFT' | |
51 ] | |
52 | |
53 # 'keywords' is a map of string to token type. All SYMBOL tokens are | |
54 # matched against keywords, to determine if the token is actually a keyword. | |
55 keywords = { | |
56 'describe' : 'DESCRIBE', | |
57 'enum' : 'ENUM', | |
58 'interface' : 'INTERFACE', | |
59 'readonly' : 'READONLY', | |
60 'struct' : 'STRUCT', | |
61 'typedef' : 'TYPEDEF', | |
Nick Bray
2011/03/21 20:25:29
I believe the style guide says:
'typedef': 'TYPEDE
| |
62 } | |
63 | |
64 # 'literals' is a value expected by lex which specifies a list of valid | |
65 # literal tokens, meaning the token type and token value are identical. | |
66 literals = '"*.(){}[],;:=+-' | |
67 | |
68 # Token definitions | |
69 # | |
70 # Lex assumes any value or function in the form of 't_<TYPE>' represents a | |
71 # regular expression where a match will emit a token of type <TYPE>. In the | |
72 # case of a function, the function is called when a match is made. | |
73 | |
74 # 't_ignore' is a special match of items to ignore | |
75 t_ignore = ' \t' | |
76 | |
77 # Constant values | |
78 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+' | |
79 t_HEX = r'0x[a-fA-F0-9]+' | |
80 t_INT = r'-?\d+' | |
81 t_LSHIFT = r'<<' | |
82 | |
83 # A line ending '\n', we use this to increment the line number | |
84 def t_LINE_END(self, t): | |
85 r'\n+' | |
86 self.AddLines(len(t.value)) | |
87 | |
88 # We do not process escapes in the IDL strings. Strings are exclusively | |
89 # used for attributes, and not used as typical 'C' constants. | |
90 def t_STRING(self, t): | |
91 r'"[^"]*"' | |
92 t.value = t.value[1:-1] | |
93 self.AddLines(t.value.count('\n')) | |
94 return t | |
95 | |
96 # A C or C++ style comment: /* xxx */ or // | |
97 def t_COMMENT(self, t): | |
98 r'(/\*(.|\n)*?\*/)|(//.*)' | |
99 self.AddLines(t.value.count('\n')) | |
100 | |
101 # C++ comments should keep the newline | |
102 if t.value[:2] == '//': t.value += '\n' | |
103 return t | |
104 | |
105 # A symbol or keyword. | |
106 def t_KEYWORD_SYMBOL(self, t): | |
107 r'[A-Za-z][A-Za-z_0-9]*' | |
108 | |
109 #All non-keywords are assumed to be symbols | |
110 t.type = self.keywords.get(t.value, 'SYMBOL') | |
111 return t | |
112 | |
113 def t_ANY_error(self, t): | |
114 line = self.lexobj.lineno | |
115 pos = self.lexobj.lexpos - self.index[line] | |
116 file = self.lexobj.filename | |
117 out = self.ErrorMessage(file, line, pos, "Unrecognized input") | |
118 sys.stderr.write(out + '\n') | |
119 | |
120 def AddLines(self, count): | |
121 # Set the lexer position for the beginning of the next line. In the case | |
122 # of multiple lines, tokens can not exist on any of the lines except the | |
123 # last one, so the recorded value for previous lines are unused. We still | |
124 # fill the array however, to make sure the line count is correct. | |
125 self.lexobj.lineno += count | |
126 for i in range(count): | |
127 self.index.append(self.lexobj.lexpos) | |
128 | |
129 def FileLineMsg(self, file, line, msg): | |
130 if file: return "%s(%d) : %s" % (file, line + 1, msg) | |
131 return "<BuiltIn> : %s" % msg | |
132 | |
133 def SourceLine(self, file, line, pos): | |
134 caret = '\t^'.expandtabs(pos) | |
135 return "%s\n%s" % (self.lines[line], caret) | |
136 | |
137 def ErrorMessage(self, file, line, pos, msg): | |
138 return "\n%s\n%s" % ( | |
139 self.FileLineMsg(file, line, msg), | |
140 self.SourceLine(file, line, pos)) | |
141 | |
142 def SetData(self, filename, data): | |
143 self.lexobj.filename = filename | |
144 self.lexobj.lineno = 0 | |
145 self.lines = data.split('\n') | |
146 self.index = [0] | |
147 self.lexobj.input(data) | |
148 | |
149 def __init__(self): | |
150 self.lexobj = lex.lex(object=self, lextab=None, optimize=0) | |
151 | |
152 | |
153 # | |
154 # FilesToTokens | |
155 # | |
156 # From a set of source file names, generate a list of tokens. | |
157 # | |
158 def FilesToTokens(filenames, verbose=False): | |
Nick Bray
2011/03/21 20:25:29
Implement this in terms of TextToTokens
| |
159 lexer = IDLLexer() | |
160 outlist = [] | |
161 for filename in filenames: | |
162 data = open(filename).read() | |
163 lexer.SetData(filename, data) | |
164 if verbose: sys.stdout.write(' Loaded %s...\n' % filename) | |
165 while 1: | |
166 t = lexer.lexobj.token() | |
167 if t is None: break | |
168 outlist.append(t) | |
169 return outlist | |
170 | |
171 # | |
172 # TextToTokens | |
173 # | |
174 # From a block of text, generate a list of tokens | |
175 # | |
176 def TextToTokens(source): | |
177 lexer = IDLLexer() | |
178 outlist = [] | |
179 lexer.SetData('AUTO', source) | |
180 while 1: | |
181 t = lexer.lexobj.token() | |
182 if t is None: break | |
183 outlist.append(t.value) | |
184 return outlist | |
185 | |
186 | |
187 # | |
188 # TestSame | |
189 # | |
190 # From a set of token values, generate a new source text by joining with a | |
191 # single space. The new source is then tokenized and compared against the | |
192 # old set. | |
193 # | |
194 def TestSame(values, output=False, verbose=False): | |
195 src1 = ' '.join(values) | |
196 src2 = ' '.join(TextToTokens(src1)) | |
197 | |
198 if output: | |
199 sys.stdout.write('Generating original.txt and tokenized.txt\n') | |
200 open('original.txt', 'w').write(src1) | |
201 open('tokenized.txt', 'w').write(src2) | |
202 | |
203 if src1 == src2: | |
204 sys.stdout.write('Same: Pass\n') | |
205 return 0 | |
206 | |
207 sys.stdout.write('Same: Failed\n') | |
208 return -1 | |
209 | |
210 | |
211 # | |
212 # TestExpect | |
213 # | |
214 # From a set of tokens pairs, verify the type field of the second matches | |
215 # the value of the first, so that: | |
216 # INT 123 FLOAT 1.1 | |
217 # will generate a passing test, where the first token is the SYMBOL INT, | |
218 # and the second token is the INT 123, third token is the SYMBOL FLOAT and | |
219 # the fourth is the FLOAT 1.1, etc... | |
220 def TestExpect(tokens): | |
221 count = len(tokens) | |
222 index = 0 | |
223 errors = 0 | |
224 while index < count: | |
225 type = tokens[index].value | |
226 token = tokens[index + 1] | |
227 index += 2 | |
228 | |
229 if type != token.type: | |
230 sys.stderr.write('Mismatch: Expected %s, but got %s = %s.' % | |
231 (type, token.type, token.value)) | |
232 errors += 1 | |
233 | |
234 if not errors: | |
235 sys.stdout.write('Expect: Pass\n') | |
236 return 0 | |
237 | |
238 sys.stdout.write('Expect: Failed\n') | |
239 return -1 | |
240 | |
241 | |
242 | |
243 | |
244 def Main(args): | |
245 try: | |
246 long_opts = ['output', 'verbose', 'test_expect', 'test_same'] | |
247 usage = 'Usage: idl_lexer.py %s [<src.idl> ...]' % ' '.join( | |
248 ['--%s' % opt for opt in long_opts]) | |
249 | |
250 opts, filenames = getopt.getopt(args, '', long_opts) | |
251 except getopt.error, e: | |
252 sys.stderr.write('Illegal option: %s\n%s\n' % (str(e), usage)) | |
253 return 1 | |
254 | |
255 output = False | |
256 test_same = False | |
257 test_expect = False | |
258 verbose = False | |
259 | |
260 for opt, val in opts: | |
261 if opt == '--output': | |
262 output = True | |
263 | |
264 if opt == '--test_expect': | |
265 test_expect = True | |
266 | |
267 if opt == '--test_same': | |
268 test_same = True | |
269 | |
270 if opt == '--verbose': | |
271 verbose = True | |
272 | |
273 try: | |
274 tokens = FilesToTokens(filenames, verbose) | |
275 values = [tok.value for tok in tokens] | |
276 if output: sys.stdout.write(' <> '.join(values) + '\n') | |
277 if test_same: | |
278 if TestSame(values, output = output, verbose = verbose): | |
279 return -1 | |
280 | |
281 if test_expect: | |
282 if TestExpect(tokens): | |
283 return -1 | |
284 return 0 | |
285 | |
286 except lex.LexError as le: | |
287 sys.stderr.write('%s\n' % str(le)) | |
288 return -1 | |
289 | |
290 | |
291 if __name__ == '__main__': | |
292 sys.exit(Main(sys.argv[1:])) | |
293 | |
OLD | NEW |