ppapi/generators/idl_lexer.py - Issue 6697028: Add IDL Lexer

Side by Side Diff: ppapi/generators/idl_lexer.py

Issue 6697028: Add IDL Lexer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Removed debug print of sys.path Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 #

	3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	4 # Use of this source code is governed by a BSD-style license that can be

	5 # found in the LICENSE file.

	6

	7 """ Lexer for PPAPI IDL """

	8

	9

	10 import getopt

	11 import os.path

	12 import re

	13 import sys

	14

	15 #

	16 # Try to load the ply module, if not, then assume it is in the third_party

	17 # directory, relative to ppapi

	18 #

	19 try:

	20 from ply import lex

	21 except:

	22 module_path, module_name = os.path.split(__file__)

	23 third_party = os.path.join(module_path, '..', '..', 'third_party')

	24 sys.path.append(third_party)

	25 from ply import lex

	26

	27 #

	28 # IDL Lexer

	29 #

	30 class IDLLexer(object):

	31 # 'tokens' is a value required by lex which specifies the complete list

	32 # of valid token types.

	33 tokens = [

	34 # Symbol and keywords types

	35 'COMMENT',

	36 'DESCRIBE',

	37 'ENUM',

	38 'SYMBOL',

	39 'INTERFACE',

	40 'STRUCT',

	41 'TYPEDEF',

	42

	43 # Data types

	44 'FLOAT',

	45 'INT',

	46 'HEX',

	47 'STRING',

	48

	49 # Operators

	50 'LSHIFT'

	51 ]

	52

	53 # 'keywords' is a map of string to token type. All SYMBOL tokens are

	54 # matched against keywords, to determine if the token is actually a keyword.

	55 keywords = {

	56 'describe' : 'DESCRIBE',

	57 'enum' : 'ENUM',

	58 'interface' : 'INTERFACE',

	59 'readonly' : 'READONLY',

	60 'struct' : 'STRUCT',

	61 'typedef' : 'TYPEDEF',
	Nick Bray 2011/03/21 20:25:29 I believe the style guide says: 'typedef': 'TYPEDE I believe the style guide says: 'typedef': 'TYPEDEF' (remove a space)
	62 }

	63

	64 # 'literals' is a value expected by lex which specifies a list of valid

	65 # literal tokens, meaning the token type and token value are identical.

	66 literals = '"*.(){}[],;:=+-'

	67

	68 # Token definitions

	69 #

	70 # Lex assumes any value or function in the form of 't_<TYPE>' represents a

	71 # regular expression where a match will emit a token of type <TYPE>. In the

	72 # case of a function, the function is called when a match is made.

	73

	74 # 't_ignore' is a special match of items to ignore

	75 t_ignore = ' \t'

	76

	77 # Constant values

	78 t_FLOAT = r'-?(\d+\.\d\|\d\.\d+)([Ee][+-]?\d+)?\|-?\d+[Ee][+-]?\d+'

	79 t_HEX = r'0x[a-fA-F0-9]+'

	80 t_INT = r'-?\d+'

	81 t_LSHIFT = r'<<'

	82

	83 # A line ending '\n', we use this to increment the line number

	84 def t_LINE_END(self, t):

	85 r'\n+'

	86 self.AddLines(len(t.value))

	87

	88 # We do not process escapes in the IDL strings. Strings are exclusively

	89 # used for attributes, and not used as typical 'C' constants.

	90 def t_STRING(self, t):

	91 r'"[^"]*"'

	92 t.value = t.value[1:-1]

	93 self.AddLines(t.value.count('\n'))

	94 return t

	95

	96 # A C or C++ style comment: /* xxx */ or //

	97 def t_COMMENT(self, t):

	98 r'(/\(.\|\n)?\/)\|(//.)'

	99 self.AddLines(t.value.count('\n'))

	100

	101 # C++ comments should keep the newline

	102 if t.value[:2] == '//': t.value += '\n'

	103 return t

	104

	105 # A symbol or keyword.

	106 def t_KEYWORD_SYMBOL(self, t):

	107 r'[A-Za-z][A-Za-z_0-9]*'

	108

	109 #All non-keywords are assumed to be symbols

	110 t.type = self.keywords.get(t.value, 'SYMBOL')

	111 return t

	112

	113 def t_ANY_error(self, t):

	114 line = self.lexobj.lineno

	115 pos = self.lexobj.lexpos - self.index[line]

	116 file = self.lexobj.filename

	117 out = self.ErrorMessage(file, line, pos, "Unrecognized input")

	118 sys.stderr.write(out + '\n')

	119

	120 def AddLines(self, count):

	121 # Set the lexer position for the beginning of the next line. In the case

	122 # of multiple lines, tokens can not exist on any of the lines except the

	123 # last one, so the recorded value for previous lines are unused. We still

	124 # fill the array however, to make sure the line count is correct.

	125 self.lexobj.lineno += count

	126 for i in range(count):

	127 self.index.append(self.lexobj.lexpos)

	128

	129 def FileLineMsg(self, file, line, msg):

	130 if file: return "%s(%d) : %s" % (file, line + 1, msg)

	131 return "<BuiltIn> : %s" % msg

	132

	133 def SourceLine(self, file, line, pos):

	134 caret = '\t^'.expandtabs(pos)

	135 return "%s\n%s" % (self.lines[line], caret)

	136

	137 def ErrorMessage(self, file, line, pos, msg):

	138 return "\n%s\n%s" % (

	139 self.FileLineMsg(file, line, msg),

	140 self.SourceLine(file, line, pos))

	141

	142 def SetData(self, filename, data):

	143 self.lexobj.filename = filename

	144 self.lexobj.lineno = 0

	145 self.lines = data.split('\n')

	146 self.index = [0]

	147 self.lexobj.input(data)

	148

	149 def __init__(self):

	150 self.lexobj = lex.lex(object=self, lextab=None, optimize=0)

	151

	152

	153 #

	154 # FilesToTokens

	155 #

	156 # From a set of source file names, generate a list of tokens.

	157 #

	158 def FilesToTokens(filenames, verbose=False):
	Nick Bray 2011/03/21 20:25:29 Implement this in terms of TextToTokens Implement this in terms of TextToTokens
	159 lexer = IDLLexer()

	160 outlist = []

	161 for filename in filenames:

	162 data = open(filename).read()

	163 lexer.SetData(filename, data)

	164 if verbose: sys.stdout.write(' Loaded %s...\n' % filename)

	165 while 1:

	166 t = lexer.lexobj.token()

	167 if t is None: break

	168 outlist.append(t)

	169 return outlist

	170

	171 #

	172 # TextToTokens

	173 #

	174 # From a block of text, generate a list of tokens

	175 #

	176 def TextToTokens(source):

	177 lexer = IDLLexer()

	178 outlist = []

	179 lexer.SetData('AUTO', source)

	180 while 1:

	181 t = lexer.lexobj.token()

	182 if t is None: break

	183 outlist.append(t.value)

	184 return outlist

	185

	186

	187 #

	188 # TestSame

	189 #

	190 # From a set of token values, generate a new source text by joining with a

	191 # single space. The new source is then tokenized and compared against the

	192 # old set.

	193 #

	194 def TestSame(values, output=False, verbose=False):

	195 src1 = ' '.join(values)

	196 src2 = ' '.join(TextToTokens(src1))

	197

	198 if output:

	199 sys.stdout.write('Generating original.txt and tokenized.txt\n')

	200 open('original.txt', 'w').write(src1)

	201 open('tokenized.txt', 'w').write(src2)

	202

	203 if src1 == src2:

	204 sys.stdout.write('Same: Pass\n')

	205 return 0

	206

	207 sys.stdout.write('Same: Failed\n')

	208 return -1

	209

	210

	211 #

	212 # TestExpect

	213 #

	214 # From a set of tokens pairs, verify the type field of the second matches

	215 # the value of the first, so that:

	216 # INT 123 FLOAT 1.1

	217 # will generate a passing test, where the first token is the SYMBOL INT,

	218 # and the second token is the INT 123, third token is the SYMBOL FLOAT and

	219 # the fourth is the FLOAT 1.1, etc...

	220 def TestExpect(tokens):

	221 count = len(tokens)

	222 index = 0

	223 errors = 0

	224 while index < count:

	225 type = tokens[index].value

	226 token = tokens[index + 1]

	227 index += 2

	228

	229 if type != token.type:

	230 sys.stderr.write('Mismatch: Expected %s, but got %s = %s.' %

	231 (type, token.type, token.value))

	232 errors += 1

	233

	234 if not errors:

	235 sys.stdout.write('Expect: Pass\n')

	236 return 0

	237

	238 sys.stdout.write('Expect: Failed\n')

	239 return -1

	240

	241

	242

	243

	244 def Main(args):

	245 try:

	246 long_opts = ['output', 'verbose', 'test_expect', 'test_same']

	247 usage = 'Usage: idl_lexer.py %s [<src.idl> ...]' % ' '.join(

	248 ['--%s' % opt for opt in long_opts])

	249

	250 opts, filenames = getopt.getopt(args, '', long_opts)

	251 except getopt.error, e:

	252 sys.stderr.write('Illegal option: %s\n%s\n' % (str(e), usage))

	253 return 1

	254

	255 output = False

	256 test_same = False

	257 test_expect = False

	258 verbose = False

	259

	260 for opt, val in opts:

	261 if opt == '--output':

	262 output = True

	263

	264 if opt == '--test_expect':

	265 test_expect = True

	266

	267 if opt == '--test_same':

	268 test_same = True

	269

	270 if opt == '--verbose':

	271 verbose = True

	272

	273 try:

	274 tokens = FilesToTokens(filenames, verbose)

	275 values = [tok.value for tok in tokens]

	276 if output: sys.stdout.write(' <> '.join(values) + '\n')

	277 if test_same:

	278 if TestSame(values, output = output, verbose = verbose):

	279 return -1

	280

	281 if test_expect:

	282 if TestExpect(tokens):

	283 return -1

	284 return 0

	285

	286 except lex.LexError as le:

	287 sys.stderr.write('%s\n' % str(le))

	288 return -1

	289

	290

	291 if __name__ == '__main__':

	292 sys.exit(Main(sys.argv[1:]))

	293

OLD	NEW

« no previous file with comments | « no previous file | ppapi/generators/test_lex.in » ('j') | no next file with comments »