Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: ppapi/generators/idl_lexer.py

Issue 6697028: Add IDL Lexer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed debug print of sys.path Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | ppapi/generators/test_lex.in » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 #
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 """ Lexer for PPAPI IDL """
8
9
10 import getopt
11 import os.path
12 import re
13 import sys
14
15 #
16 # Try to load the ply module, if not, then assume it is in the third_party
17 # directory, relative to ppapi
18 #
19 try:
20 from ply import lex
21 except:
22 module_path, module_name = os.path.split(__file__)
23 third_party = os.path.join(module_path, '..', '..', 'third_party')
24 sys.path.append(third_party)
25 from ply import lex
26
27 #
28 # IDL Lexer
29 #
30 class IDLLexer(object):
31 # 'tokens' is a value required by lex which specifies the complete list
32 # of valid token types.
33 tokens = [
34 # Symbol and keywords types
35 'COMMENT',
36 'DESCRIBE',
37 'ENUM',
38 'SYMBOL',
39 'INTERFACE',
40 'STRUCT',
41 'TYPEDEF',
42
43 # Data types
44 'FLOAT',
45 'INT',
46 'HEX',
47 'STRING',
48
49 # Operators
50 'LSHIFT'
51 ]
52
53 # 'keywords' is a map of string to token type. All SYMBOL tokens are
54 # matched against keywords, to determine if the token is actually a keyword.
55 keywords = {
56 'describe' : 'DESCRIBE',
57 'enum' : 'ENUM',
58 'interface' : 'INTERFACE',
59 'readonly' : 'READONLY',
60 'struct' : 'STRUCT',
61 'typedef' : 'TYPEDEF',
Nick Bray 2011/03/21 20:25:29 I believe the style guide says: 'typedef': 'TYPEDE
62 }
63
64 # 'literals' is a value expected by lex which specifies a list of valid
65 # literal tokens, meaning the token type and token value are identical.
66 literals = '"*.(){}[],;:=+-'
67
68 # Token definitions
69 #
70 # Lex assumes any value or function in the form of 't_<TYPE>' represents a
71 # regular expression where a match will emit a token of type <TYPE>. In the
72 # case of a function, the function is called when a match is made.
73
74 # 't_ignore' is a special match of items to ignore
75 t_ignore = ' \t'
76
77 # Constant values
78 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+'
79 t_HEX = r'0x[a-fA-F0-9]+'
80 t_INT = r'-?\d+'
81 t_LSHIFT = r'<<'
82
83 # A line ending '\n', we use this to increment the line number
84 def t_LINE_END(self, t):
85 r'\n+'
86 self.AddLines(len(t.value))
87
88 # We do not process escapes in the IDL strings. Strings are exclusively
89 # used for attributes, and not used as typical 'C' constants.
90 def t_STRING(self, t):
91 r'"[^"]*"'
92 t.value = t.value[1:-1]
93 self.AddLines(t.value.count('\n'))
94 return t
95
96 # A C or C++ style comment: /* xxx */ or //
97 def t_COMMENT(self, t):
98 r'(/\*(.|\n)*?\*/)|(//.*)'
99 self.AddLines(t.value.count('\n'))
100
101 # C++ comments should keep the newline
102 if t.value[:2] == '//': t.value += '\n'
103 return t
104
105 # A symbol or keyword.
106 def t_KEYWORD_SYMBOL(self, t):
107 r'[A-Za-z][A-Za-z_0-9]*'
108
109 #All non-keywords are assumed to be symbols
110 t.type = self.keywords.get(t.value, 'SYMBOL')
111 return t
112
113 def t_ANY_error(self, t):
114 line = self.lexobj.lineno
115 pos = self.lexobj.lexpos - self.index[line]
116 file = self.lexobj.filename
117 out = self.ErrorMessage(file, line, pos, "Unrecognized input")
118 sys.stderr.write(out + '\n')
119
120 def AddLines(self, count):
121 # Set the lexer position for the beginning of the next line. In the case
122 # of multiple lines, tokens can not exist on any of the lines except the
123 # last one, so the recorded value for previous lines are unused. We still
124 # fill the array however, to make sure the line count is correct.
125 self.lexobj.lineno += count
126 for i in range(count):
127 self.index.append(self.lexobj.lexpos)
128
129 def FileLineMsg(self, file, line, msg):
130 if file: return "%s(%d) : %s" % (file, line + 1, msg)
131 return "<BuiltIn> : %s" % msg
132
133 def SourceLine(self, file, line, pos):
134 caret = '\t^'.expandtabs(pos)
135 return "%s\n%s" % (self.lines[line], caret)
136
137 def ErrorMessage(self, file, line, pos, msg):
138 return "\n%s\n%s" % (
139 self.FileLineMsg(file, line, msg),
140 self.SourceLine(file, line, pos))
141
142 def SetData(self, filename, data):
143 self.lexobj.filename = filename
144 self.lexobj.lineno = 0
145 self.lines = data.split('\n')
146 self.index = [0]
147 self.lexobj.input(data)
148
149 def __init__(self):
150 self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
151
152
153 #
154 # FilesToTokens
155 #
156 # From a set of source file names, generate a list of tokens.
157 #
158 def FilesToTokens(filenames, verbose=False):
Nick Bray 2011/03/21 20:25:29 Implement this in terms of TextToTokens
159 lexer = IDLLexer()
160 outlist = []
161 for filename in filenames:
162 data = open(filename).read()
163 lexer.SetData(filename, data)
164 if verbose: sys.stdout.write(' Loaded %s...\n' % filename)
165 while 1:
166 t = lexer.lexobj.token()
167 if t is None: break
168 outlist.append(t)
169 return outlist
170
171 #
172 # TextToTokens
173 #
174 # From a block of text, generate a list of tokens
175 #
176 def TextToTokens(source):
177 lexer = IDLLexer()
178 outlist = []
179 lexer.SetData('AUTO', source)
180 while 1:
181 t = lexer.lexobj.token()
182 if t is None: break
183 outlist.append(t.value)
184 return outlist
185
186
187 #
188 # TestSame
189 #
190 # From a set of token values, generate a new source text by joining with a
191 # single space. The new source is then tokenized and compared against the
192 # old set.
193 #
194 def TestSame(values, output=False, verbose=False):
195 src1 = ' '.join(values)
196 src2 = ' '.join(TextToTokens(src1))
197
198 if output:
199 sys.stdout.write('Generating original.txt and tokenized.txt\n')
200 open('original.txt', 'w').write(src1)
201 open('tokenized.txt', 'w').write(src2)
202
203 if src1 == src2:
204 sys.stdout.write('Same: Pass\n')
205 return 0
206
207 sys.stdout.write('Same: Failed\n')
208 return -1
209
210
211 #
212 # TestExpect
213 #
214 # From a set of tokens pairs, verify the type field of the second matches
215 # the value of the first, so that:
216 # INT 123 FLOAT 1.1
217 # will generate a passing test, where the first token is the SYMBOL INT,
218 # and the second token is the INT 123, third token is the SYMBOL FLOAT and
219 # the fourth is the FLOAT 1.1, etc...
220 def TestExpect(tokens):
221 count = len(tokens)
222 index = 0
223 errors = 0
224 while index < count:
225 type = tokens[index].value
226 token = tokens[index + 1]
227 index += 2
228
229 if type != token.type:
230 sys.stderr.write('Mismatch: Expected %s, but got %s = %s.' %
231 (type, token.type, token.value))
232 errors += 1
233
234 if not errors:
235 sys.stdout.write('Expect: Pass\n')
236 return 0
237
238 sys.stdout.write('Expect: Failed\n')
239 return -1
240
241
242
243
244 def Main(args):
245 try:
246 long_opts = ['output', 'verbose', 'test_expect', 'test_same']
247 usage = 'Usage: idl_lexer.py %s [<src.idl> ...]' % ' '.join(
248 ['--%s' % opt for opt in long_opts])
249
250 opts, filenames = getopt.getopt(args, '', long_opts)
251 except getopt.error, e:
252 sys.stderr.write('Illegal option: %s\n%s\n' % (str(e), usage))
253 return 1
254
255 output = False
256 test_same = False
257 test_expect = False
258 verbose = False
259
260 for opt, val in opts:
261 if opt == '--output':
262 output = True
263
264 if opt == '--test_expect':
265 test_expect = True
266
267 if opt == '--test_same':
268 test_same = True
269
270 if opt == '--verbose':
271 verbose = True
272
273 try:
274 tokens = FilesToTokens(filenames, verbose)
275 values = [tok.value for tok in tokens]
276 if output: sys.stdout.write(' <> '.join(values) + '\n')
277 if test_same:
278 if TestSame(values, output = output, verbose = verbose):
279 return -1
280
281 if test_expect:
282 if TestExpect(tokens):
283 return -1
284 return 0
285
286 except lex.LexError as le:
287 sys.stderr.write('%s\n' % str(le))
288 return -1
289
290
291 if __name__ == '__main__':
292 sys.exit(Main(sys.argv[1:]))
293
OLDNEW
« no previous file with comments | « no previous file | ppapi/generators/test_lex.in » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698