Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # | |
| 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 4 # Use of this source code is governed by a BSD-style license that can be | |
| 5 # found in the LICENSE file. | |
| 6 | |
| 7 """ Lexer for PPAPI IDL """ | |
| 8 | |
| 9 | |
| 10 import getopt | |
| 11 import os.path | |
| 12 import re | |
| 13 import sys | |
| 14 | |
| 15 # | |
| 16 # Try to load the ply module, if not, then assume it is in the third_party | |
| 17 # directory, relative to ppapi | |
| 18 # | |
| 19 try: | |
| 20 from ply import lex | |
| 21 except: | |
| 22 module_path, module_name = os.path.split(__file__) | |
| 23 third_party = os.path.join(module_path, '..', '..', 'third_party') | |
| 24 sys.path.append(third_party) | |
| 25 from ply import lex | |
| 26 | |
| 27 # | |
| 28 # IDL Lexer | |
| 29 # | |
| 30 class IDLLexer(object): | |
| 31 # 'tokens' is a value required by lex which specifies the complete list | |
| 32 # of valid token types. | |
| 33 tokens = [ | |
| 34 # Symbol and keywords types | |
| 35 'COMMENT', | |
| 36 'DESCRIBE', | |
| 37 'ENUM', | |
| 38 'SYMBOL', | |
| 39 'INTERFACE', | |
| 40 'STRUCT', | |
| 41 'TYPEDEF', | |
| 42 | |
| 43 # Data types | |
| 44 'FLOAT', | |
| 45 'INT', | |
| 46 'HEX', | |
| 47 'STRING', | |
| 48 | |
| 49 # Operators | |
| 50 'LSHIFT' | |
| 51 ] | |
| 52 | |
| 53 # 'keywords' is a map of string to token type. All SYMBOL tokens are | |
| 54 # matched against keywords, to determine if the token is actually a keyword. | |
| 55 keywords = { | |
| 56 'describe' : 'DESCRIBE', | |
| 57 'enum' : 'ENUM', | |
| 58 'interface' : 'INTERFACE', | |
| 59 'readonly' : 'READONLY', | |
| 60 'struct' : 'STRUCT', | |
| 61 'typedef' : 'TYPEDEF', | |
|
Nick Bray
2011/03/21 20:25:29
I believe the style guide says:
'typedef': 'TYPEDE
| |
| 62 } | |
| 63 | |
| 64 # 'literals' is a value expected by lex which specifies a list of valid | |
| 65 # literal tokens, meaning the token type and token value are identical. | |
| 66 literals = '"*.(){}[],;:=+-' | |
| 67 | |
| 68 # Token definitions | |
| 69 # | |
| 70 # Lex assumes any value or function in the form of 't_<TYPE>' represents a | |
| 71 # regular expression where a match will emit a token of type <TYPE>. In the | |
| 72 # case of a function, the function is called when a match is made. | |
| 73 | |
| 74 # 't_ignore' is a special match of items to ignore | |
| 75 t_ignore = ' \t' | |
| 76 | |
| 77 # Constant values | |
| 78 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+' | |
| 79 t_HEX = r'0x[a-fA-F0-9]+' | |
| 80 t_INT = r'-?\d+' | |
| 81 t_LSHIFT = r'<<' | |
| 82 | |
| 83 # A line ending '\n', we use this to increment the line number | |
| 84 def t_LINE_END(self, t): | |
| 85 r'\n+' | |
| 86 self.AddLines(len(t.value)) | |
| 87 | |
| 88 # We do not process escapes in the IDL strings. Strings are exclusively | |
| 89 # used for attributes, and not used as typical 'C' constants. | |
| 90 def t_STRING(self, t): | |
| 91 r'"[^"]*"' | |
| 92 t.value = t.value[1:-1] | |
| 93 self.AddLines(t.value.count('\n')) | |
| 94 return t | |
| 95 | |
| 96 # A C or C++ style comment: /* xxx */ or // | |
| 97 def t_COMMENT(self, t): | |
| 98 r'(/\*(.|\n)*?\*/)|(//.*)' | |
| 99 self.AddLines(t.value.count('\n')) | |
| 100 | |
| 101 # C++ comments should keep the newline | |
| 102 if t.value[:2] == '//': t.value += '\n' | |
| 103 return t | |
| 104 | |
| 105 # A symbol or keyword. | |
| 106 def t_KEYWORD_SYMBOL(self, t): | |
| 107 r'[A-Za-z][A-Za-z_0-9]*' | |
| 108 | |
| 109 #All non-keywords are assumed to be symbols | |
| 110 t.type = self.keywords.get(t.value, 'SYMBOL') | |
| 111 return t | |
| 112 | |
| 113 def t_ANY_error(self, t): | |
| 114 line = self.lexobj.lineno | |
| 115 pos = self.lexobj.lexpos - self.index[line] | |
| 116 file = self.lexobj.filename | |
| 117 out = self.ErrorMessage(file, line, pos, "Unrecognized input") | |
| 118 sys.stderr.write(out + '\n') | |
| 119 | |
| 120 def AddLines(self, count): | |
| 121 # Set the lexer position for the beginning of the next line. In the case | |
| 122 # of multiple lines, tokens can not exist on any of the lines except the | |
| 123 # last one, so the recorded value for previous lines are unused. We still | |
| 124 # fill the array however, to make sure the line count is correct. | |
| 125 self.lexobj.lineno += count | |
| 126 for i in range(count): | |
| 127 self.index.append(self.lexobj.lexpos) | |
| 128 | |
| 129 def FileLineMsg(self, file, line, msg): | |
| 130 if file: return "%s(%d) : %s" % (file, line + 1, msg) | |
| 131 return "<BuiltIn> : %s" % msg | |
| 132 | |
| 133 def SourceLine(self, file, line, pos): | |
| 134 caret = '\t^'.expandtabs(pos) | |
| 135 return "%s\n%s" % (self.lines[line], caret) | |
| 136 | |
| 137 def ErrorMessage(self, file, line, pos, msg): | |
| 138 return "\n%s\n%s" % ( | |
| 139 self.FileLineMsg(file, line, msg), | |
| 140 self.SourceLine(file, line, pos)) | |
| 141 | |
| 142 def SetData(self, filename, data): | |
| 143 self.lexobj.filename = filename | |
| 144 self.lexobj.lineno = 0 | |
| 145 self.lines = data.split('\n') | |
| 146 self.index = [0] | |
| 147 self.lexobj.input(data) | |
| 148 | |
| 149 def __init__(self): | |
| 150 self.lexobj = lex.lex(object=self, lextab=None, optimize=0) | |
| 151 | |
| 152 | |
| 153 # | |
| 154 # FilesToTokens | |
| 155 # | |
| 156 # From a set of source file names, generate a list of tokens. | |
| 157 # | |
| 158 def FilesToTokens(filenames, verbose=False): | |
|
Nick Bray
2011/03/21 20:25:29
Implement this in terms of TextToTokens
| |
| 159 lexer = IDLLexer() | |
| 160 outlist = [] | |
| 161 for filename in filenames: | |
| 162 data = open(filename).read() | |
| 163 lexer.SetData(filename, data) | |
| 164 if verbose: sys.stdout.write(' Loaded %s...\n' % filename) | |
| 165 while 1: | |
| 166 t = lexer.lexobj.token() | |
| 167 if t is None: break | |
| 168 outlist.append(t) | |
| 169 return outlist | |
| 170 | |
| 171 # | |
| 172 # TextToTokens | |
| 173 # | |
| 174 # From a block of text, generate a list of tokens | |
| 175 # | |
| 176 def TextToTokens(source): | |
| 177 lexer = IDLLexer() | |
| 178 outlist = [] | |
| 179 lexer.SetData('AUTO', source) | |
| 180 while 1: | |
| 181 t = lexer.lexobj.token() | |
| 182 if t is None: break | |
| 183 outlist.append(t.value) | |
| 184 return outlist | |
| 185 | |
| 186 | |
| 187 # | |
| 188 # TestSame | |
| 189 # | |
| 190 # From a set of token values, generate a new source text by joining with a | |
| 191 # single space. The new source is then tokenized and compared against the | |
| 192 # old set. | |
| 193 # | |
| 194 def TestSame(values, output=False, verbose=False): | |
| 195 src1 = ' '.join(values) | |
| 196 src2 = ' '.join(TextToTokens(src1)) | |
| 197 | |
| 198 if output: | |
| 199 sys.stdout.write('Generating original.txt and tokenized.txt\n') | |
| 200 open('original.txt', 'w').write(src1) | |
| 201 open('tokenized.txt', 'w').write(src2) | |
| 202 | |
| 203 if src1 == src2: | |
| 204 sys.stdout.write('Same: Pass\n') | |
| 205 return 0 | |
| 206 | |
| 207 sys.stdout.write('Same: Failed\n') | |
| 208 return -1 | |
| 209 | |
| 210 | |
| 211 # | |
| 212 # TestExpect | |
| 213 # | |
| 214 # From a set of tokens pairs, verify the type field of the second matches | |
| 215 # the value of the first, so that: | |
| 216 # INT 123 FLOAT 1.1 | |
| 217 # will generate a passing test, where the first token is the SYMBOL INT, | |
| 218 # and the second token is the INT 123, third token is the SYMBOL FLOAT and | |
| 219 # the fourth is the FLOAT 1.1, etc... | |
| 220 def TestExpect(tokens): | |
| 221 count = len(tokens) | |
| 222 index = 0 | |
| 223 errors = 0 | |
| 224 while index < count: | |
| 225 type = tokens[index].value | |
| 226 token = tokens[index + 1] | |
| 227 index += 2 | |
| 228 | |
| 229 if type != token.type: | |
| 230 sys.stderr.write('Mismatch: Expected %s, but got %s = %s.' % | |
| 231 (type, token.type, token.value)) | |
| 232 errors += 1 | |
| 233 | |
| 234 if not errors: | |
| 235 sys.stdout.write('Expect: Pass\n') | |
| 236 return 0 | |
| 237 | |
| 238 sys.stdout.write('Expect: Failed\n') | |
| 239 return -1 | |
| 240 | |
| 241 | |
| 242 | |
| 243 | |
| 244 def Main(args): | |
| 245 try: | |
| 246 long_opts = ['output', 'verbose', 'test_expect', 'test_same'] | |
| 247 usage = 'Usage: idl_lexer.py %s [<src.idl> ...]' % ' '.join( | |
| 248 ['--%s' % opt for opt in long_opts]) | |
| 249 | |
| 250 opts, filenames = getopt.getopt(args, '', long_opts) | |
| 251 except getopt.error, e: | |
| 252 sys.stderr.write('Illegal option: %s\n%s\n' % (str(e), usage)) | |
| 253 return 1 | |
| 254 | |
| 255 output = False | |
| 256 test_same = False | |
| 257 test_expect = False | |
| 258 verbose = False | |
| 259 | |
| 260 for opt, val in opts: | |
| 261 if opt == '--output': | |
| 262 output = True | |
| 263 | |
| 264 if opt == '--test_expect': | |
| 265 test_expect = True | |
| 266 | |
| 267 if opt == '--test_same': | |
| 268 test_same = True | |
| 269 | |
| 270 if opt == '--verbose': | |
| 271 verbose = True | |
| 272 | |
| 273 try: | |
| 274 tokens = FilesToTokens(filenames, verbose) | |
| 275 values = [tok.value for tok in tokens] | |
| 276 if output: sys.stdout.write(' <> '.join(values) + '\n') | |
| 277 if test_same: | |
| 278 if TestSame(values, output = output, verbose = verbose): | |
| 279 return -1 | |
| 280 | |
| 281 if test_expect: | |
| 282 if TestExpect(tokens): | |
| 283 return -1 | |
| 284 return 0 | |
| 285 | |
| 286 except lex.LexError as le: | |
| 287 sys.stderr.write('%s\n' % str(le)) | |
| 288 return -1 | |
| 289 | |
| 290 | |
| 291 if __name__ == '__main__': | |
| 292 sys.exit(Main(sys.argv[1:])) | |
| 293 | |
| OLD | NEW |