Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # | |
| 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 4 # Use of this source code is governed by a BSD-style license that can be | |
| 5 # found in the LICENSE file. | |
| 6 | |
| 7 """ Lexer for PPAPI IDL """ | |
| 8 | |
| 9 | |
| 10 import getopt | |
| 11 import os.path | |
| 12 import re | |
| 13 import sys | |
| 14 | |
| 15 # | |
| 16 # Try to load the ply module, if not, then assume it is in the third_party | |
| 17 # directory, relative to ppapi | |
| 18 # | |
| 19 try: | |
| 20 from ply import lex | |
| 21 from ply import yacc | |
| 22 except: | |
|
Nick Bray
2011/03/15 21:44:06
Don't bother? Just modify the path, then import?
noelallen1
2011/03/17 01:20:02
I try instead of forcing it, we do know now if it'
Nick Bray
2011/03/21 20:25:29
You're trading one ugly for another. I personally
| |
| 23 module_path, module_name = os.path.split(__file__) | |
| 24 third_party = os.path.join(module_path, '..', '..', 'third_party') | |
| 25 sys.path.append(third_party) | |
| 26 from ply import lex | |
| 27 from ply import yacc | |
|
Nick Bray
2011/03/15 21:44:06
Unused?
noelallen1
2011/03/17 01:20:02
Done.
| |
| 28 | |
| 29 | |
| 30 # | |
| 31 # IDL Lexer | |
| 32 # | |
| 33 # There are only | |
| 34 # | |
| 35 | |
| 36 class IDLLexer(object): | |
|
Nick Bray
2011/03/15 21:44:06
Please comment the lex magic.
There are lots of s
noelallen1
2011/03/17 01:20:02
Done.
| |
| 37 tokens = [ | |
| 38 # Symbol and keywords types | |
| 39 'COMMENT', | |
| 40 'DESCRIBE', | |
| 41 'ENUM', | |
| 42 'SYMBOL', | |
| 43 'INTERFACE', | |
| 44 'STRUCT', | |
| 45 'TYPEDEF', | |
| 46 | |
| 47 # Data types | |
| 48 'FLOAT', | |
| 49 'INT', | |
| 50 'HEX', | |
| 51 'STRING', | |
| 52 | |
| 53 # Operators | |
| 54 'LSHIFT' | |
| 55 ] | |
| 56 | |
| 57 keywords = { | |
| 58 'describe' : 'DESCRIBE', | |
| 59 'enum' : 'ENUM', | |
| 60 'interface' : 'INTERFACE', | |
| 61 'readonly' : 'READONLY', | |
| 62 'struct' : 'STRUCT', | |
| 63 'typedef' : 'TYPEDEF', | |
| 64 } | |
| 65 | |
| 66 literals = '"*.(){}[],;:=+-' | |
| 67 t_ignore = ' \t' | |
| 68 | |
| 69 # A line ending '\n', we use this to increment the line number | |
| 70 def t_LINE_END(self, t): | |
| 71 r'\n+' | |
| 72 self.AddLines(len(t.value)) | |
| 73 | |
| 74 # Constant values | |
| 75 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|\d+[Ee][+-]?\d+' | |
|
Nick Bray
2011/03/15 21:44:06
FYI: negative literal numbers can be a pain if you
noelallen1
2011/03/17 01:20:02
Done.
| |
| 76 t_HEX = r'0x[a-fA-F0-9]+' | |
| 77 t_INT = r'-?\d+' | |
| 78 t_LSHIFT = r'<<' | |
| 79 | |
| 80 def t_STRING(self, t): | |
| 81 r'"[^"]*"' | |
|
Nick Bray
2011/03/15 21:44:06
Wrong for strings with an escaped double quote.
noelallen1
2011/03/17 01:20:02
We do not allow escaped quotes.
On 2011/03/15 21:4
| |
| 82 t.value = t.value[1:-1] | |
|
Nick Bray
2011/03/15 21:44:06
Does not unescape characters.
Consider eval(t.val
noelallen1
2011/03/17 01:20:02
Same as above.
| |
| 83 self.AddLines(t.value.count('\n')) | |
|
Nick Bray
2011/03/15 21:44:06
Count before converting the string? (\\n -> \n wh
noelallen1
2011/03/17 01:20:02
see above
| |
| 84 return t | |
| 85 | |
| 86 # A C or C++ style comment: /* xxx */ or // | |
| 87 def t_COMMENT(self, t): | |
| 88 r'(/\*(?s).*?\*/)|((?m)//.*?$)' | |
|
Nick Bray
2011/03/15 21:44:06
The (?m) option seems a little weird. Why?
Did y
noelallen1
2011/03/17 01:20:02
Done.
| |
| 89 self.AddLines(t.value.count('\n')) | |
| 90 return t | |
| 91 | |
| 92 # A symbol or keyword. | |
| 93 def t_KEYWORD_SYBOL(self, t): | |
|
Nick Bray
2011/03/15 21:44:06
Typo
noelallen1
2011/03/17 01:20:02
Done.
| |
| 94 r'[A-Za-z][A-Za-z_0-9]*' | |
|
Nick Bray
2011/03/15 21:44:06
Leading underscores?
noelallen1
2011/03/17 01:20:02
Not allowed
| |
| 95 | |
| 96 #All non-keywords are assumed to be symbols | |
| 97 t.type = self.keywords.get(t.value, 'SYMBOL') | |
| 98 return t | |
| 99 | |
| 100 def t_ANY_error(self, t): | |
| 101 line = self.lexobj.lineno | |
| 102 pos = self.lexobj.lexpos - self.index[line] | |
| 103 file = self.lexobj.filename | |
| 104 out = self.ErrorMessage(file, line, pos, "Unrecognized input") | |
| 105 print >>sys.stderr, out | |
|
Nick Bray
2011/03/15 21:44:06
Prefer:
sys.stderr.write(out+'\n')
noelallen1
2011/03/17 01:20:02
Done.
| |
| 106 | |
| 107 def AddLines(self, count): | |
| 108 self.lexobj.lineno += count | |
| 109 for i in range(count): | |
| 110 self.index.append(self.lexobj.lexpos) | |
|
Nick Bray
2011/03/15 21:44:06
This seems a little odd in cases where count > 1..
noelallen1
2011/03/17 01:20:02
Commented.
| |
| 111 | |
| 112 def FileLineMsg(self, file, line, msg): | |
| 113 if file: return "%s(%d) : %s" % (file, line + 1, msg) | |
| 114 return "<BuiltIn> : %s" % msg | |
| 115 | |
| 116 def SourceLine(self, file, line, pos): | |
| 117 caret = '\t^'.expandtabs(pos) | |
| 118 return "%s\n%s" % (self.lines[line], caret) | |
| 119 | |
| 120 def ErrorMessage(self, file, line, pos, msg): | |
| 121 return "\n%s\n%s" % ( | |
| 122 self.FileLineMsg(file, line, msg), | |
| 123 self.SourceLine(file, line, pos)) | |
| 124 | |
| 125 def SetData(self, filename, data): | |
| 126 self.lexobj.filename = filename | |
| 127 self.lexobj.lineno = 0 | |
| 128 self.lines = data.split('\n') | |
| 129 self.index = [0] | |
| 130 self.lexobj.input(data) | |
| 131 | |
| 132 def __init__(self): | |
| 133 self.lexobj = lex.lex(object=self, lextab=None, optimize=0) | |
| 134 | |
| 135 | |
| 136 # | |
| 137 # FilesToTokens | |
| 138 # | |
| 139 # From a set of source file names, generate a list of tokens. | |
| 140 # | |
| 141 def FilesToTokens(filenames, verbose = False): | |
|
Nick Bray
2011/03/15 21:44:06
no spaces around default argument.
noelallen1
2011/03/17 01:20:02
Done.
| |
| 142 lexer = IDLLexer() | |
| 143 outlist = [] | |
| 144 for filename in filenames: | |
| 145 data = open(filename).read() | |
| 146 lexer.SetData(filename, data) | |
| 147 if verbose:print '\tLoaded %s...' % filenames | |
|
Nick Bray
2011/03/15 21:44:06
space or stick on new line.
noelallen1
2011/03/17 01:20:02
Done.
| |
| 148 while 1: | |
| 149 t = lexer.lexobj.token() | |
| 150 if t is None: break | |
| 151 outlist.append(t.value) | |
| 152 return outlist | |
| 153 | |
| 154 # | |
| 155 # TextToTokens | |
| 156 # | |
| 157 # From a block of text, generate a list of tokens | |
| 158 # | |
| 159 def TextToTokens(source): | |
| 160 lexer = IDLLexer() | |
| 161 outlist = [] | |
| 162 lexer.SetData('AUTO', source) | |
| 163 while 1: | |
| 164 t = lexer.lexobj.token() | |
| 165 if t is None: break | |
| 166 outlist.append(t.value) | |
| 167 return outlist | |
| 168 | |
| 169 | |
| 170 # | |
| 171 # Test | |
| 172 # | |
| 173 # From a set of filenames, generate a token list, which is then converted | |
| 174 # to a text block by joining with a single space. Then re-tokenize the new | |
| 175 # text block to verify it generates the same set. | |
| 176 # | |
| 177 def Test(tokens, output = False, verbose = False): | |
|
Nick Bray
2011/03/15 21:44:06
No spaces
noelallen1
2011/03/17 01:20:02
Done.
| |
| 178 if verbose: | |
| 179 print "Testing lexer" | |
| 180 src1 = ' '.join(tokens) | |
| 181 src2 = ' '.join(TextToTokens(src1)) | |
| 182 | |
| 183 if output: | |
| 184 open('original.txt', 'w').write(src1) | |
| 185 open('tokized.txt', 'w').write(src2) | |
| 186 | |
| 187 if src1 == src2: | |
| 188 print "Pass" | |
| 189 return 0 | |
| 190 | |
| 191 print "Failed" | |
| 192 return -1 | |
| 193 | |
| 194 | |
| 195 def Main(args): | |
| 196 usage = 'Usage: idl_lexer.py --test --output --verbose [<src.idl> ...]' | |
| 197 try: | |
| 198 long_opts = ['output=', 'test'] | |
| 199 opts, filenames = getopt.getopt(args, '', long_opts) | |
|
Nick Bray
2011/03/15 21:44:06
I prefer optparse. Consider build/download_chrome
noelallen1
2011/03/17 01:20:02
see below
| |
| 200 | |
| 201 except getopt.error, e: | |
| 202 print >>sys.stderr, 'Illegal option:', str(e) | |
|
Nick Bray
2011/03/15 21:44:06
Again, >> syntax is nonstandard.
noelallen1
2011/03/17 01:20:02
Done.
| |
| 203 print >>sys.stderr, usage | |
| 204 return 1 | |
| 205 | |
| 206 output = False | |
| 207 test = False | |
| 208 verbose = False | |
| 209 | |
| 210 for opt, val in opts: | |
|
Nick Bray
2011/03/15 21:44:06
optparse would do away with this silliness.
noelallen1
2011/03/17 01:20:02
optparse might be a reasonable change, but since t
| |
| 211 if opt == '--output': | |
| 212 output = True | |
| 213 | |
| 214 if opt == '--test': | |
| 215 test = True | |
| 216 | |
| 217 if opt == '--verbose': | |
| 218 verbose = True | |
| 219 | |
| 220 try: | |
| 221 tokens = FilesToTokens(filenames, verbose) | |
| 222 | |
| 223 if output: print ' '.join(tokens) | |
| 224 if test: return Test(tokens, output = output, verbose = verbose) | |
| 225 return 0 | |
| 226 | |
| 227 except lex.LexError as le: | |
| 228 print >>sys.stderr, le | |
| 229 return -1 | |
| 230 | |
| 231 | |
| 232 if __name__ == '__main__': | |
| 233 sys.exit(Main(sys.argv[1:])) | |
| 234 | |
| OLD | NEW |