tools/idl_parser/idl_lexer.py - Issue 1713673002: Remove //tools/idl_parser.

Side by Side Diff: tools/idl_parser/idl_lexer.py

Issue 1713673002: Remove //tools/idl_parser. (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: rebased Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """ Lexer for PPAPI IDL

7

8 The lexer uses the PLY library to build a tokenizer which understands both

9 WebIDL and Pepper tokens.

10

11 WebIDL, and WebIDL regular expressions can be found at:

12 http://www.w3.org/TR/2012/CR-WebIDL-20120419/

13 PLY can be found at:

14 http://www.dabeaz.com/ply/

15 """

16

17 import os.path

18 import sys

19

20 #

21 # Try to load the ply module, if not, then assume it is in the third_party

22 # directory.

23 #

24 try:

25 # Disable lint check which fails to find the ply module.

26 # pylint: disable=F0401

27 from ply import lex

28 except ImportError:

29 module_path, module_name = os.path.split(__file__)

30 third_party = os.path.join(module_path, '..', '..', 'third_party')

31 sys.path.append(third_party)

32 # pylint: disable=F0401

33 from ply import lex

34

35 #

36 # IDL Lexer

37 #

38 class IDLLexer(object):

39 # 'literals' is a value expected by lex which specifies a list of valid

40 # literal tokens, meaning the token type and token value are identical.

41 literals = r'"*.(){}[],;:=+-/~\|&^?<>'

42

43 # 't_ignore' contains ignored characters (spaces and tabs)

44 t_ignore = ' \t'

45

46 # 'tokens' is a value required by lex which specifies the complete list

47 # of valid token types.

48 tokens = [

49 # Data types

50 'float',

51 'integer',

52 'string',

53

54 # Symbol and keywords types

55 'COMMENT',

56 'identifier',

57

58 # MultiChar operators

59 'ELLIPSIS',

60 ]

61

62 # 'keywords' is a map of string to token type. All tokens matching

63 # KEYWORD_OR_SYMBOL are matched against keywords dictionary, to determine

64 # if the token is actually a keyword.

65 keywords = {

66 'any' : 'ANY',

67 'attribute' : 'ATTRIBUTE',

68 'boolean' : 'BOOLEAN',

69 'byte' : 'BYTE',

70 'ByteString' : 'BYTESTRING',

71 'callback' : 'CALLBACK',

72 'const' : 'CONST',

73 'creator' : 'CREATOR',

74 'Date' : 'DATE',

75 'deleter' : 'DELETER',

76 'dictionary' : 'DICTIONARY',

77 'DOMString' : 'DOMSTRING',

78 'double' : 'DOUBLE',

79 'enum' : 'ENUM',

80 'exception' : 'EXCEPTION',

81 'false' : 'FALSE',

82 'float' : 'FLOAT',

83 'getter': 'GETTER',

84 'implements' : 'IMPLEMENTS',

85 'Infinity' : 'INFINITY',

86 'inherit' : 'INHERIT',

87 'interface' : 'INTERFACE',

88 'iterable': 'ITERABLE',

89 'legacycaller' : 'LEGACYCALLER',

90 'legacyiterable' : 'LEGACYITERABLE',

91 'long' : 'LONG',

92 'maplike': 'MAPLIKE',

93 'Nan' : 'NAN',

94 'null' : 'NULL',

95 'object' : 'OBJECT',

96 'octet' : 'OCTET',

97 'optional' : 'OPTIONAL',

98 'or' : 'OR',

99 'partial' : 'PARTIAL',

100 'Promise' : 'PROMISE',

101 'readonly' : 'READONLY',

102 'RegExp' : 'REGEXP',

103 'required' : 'REQUIRED',

104 'sequence' : 'SEQUENCE',

105 'serializer' : 'SERIALIZER',

106 'setlike' : 'SETLIKE',

107 'setter': 'SETTER',

108 'short' : 'SHORT',

109 'static' : 'STATIC',

110 'stringifier' : 'STRINGIFIER',

111 'typedef' : 'TYPEDEF',

112 'true' : 'TRUE',

113 'unsigned' : 'UNSIGNED',

114 'unrestricted' : 'UNRESTRICTED',

115 'void' : 'VOID'

116 }

117

118 # Token definitions

119 #

120 # Lex assumes any value or function in the form of 't_<TYPE>' represents a

121 # regular expression where a match will emit a token of type <TYPE>. In the

122 # case of a function, the function is called when a match is made. These

123 # definitions come from WebIDL.

124 #

125 # These need to be methods for lexer construction, despite not using self.

126 # pylint: disable=R0201

127 def t_ELLIPSIS(self, t):

128 r'\.\.\.'

129 return t

130

131 # Regex needs to be in the docstring

132 # pylint: disable=C0301

133 def t_float(self, t):

134 r'-?(([0-9]+\.[0-9]\|[0-9]\.[0-9]+)([Ee][+-]?[0-9]+)?\|[0-9]+[Ee][+-]?[0-9]+ )'

135 return t

136

137 def t_integer(self, t):

138 r'-?([1-9][0-9]\|0[Xx][0-9A-Fa-f]+\|0[0-7])'

139 return t

140

141

142 # A line ending '\n', we use this to increment the line number

143 def t_LINE_END(self, t):

144 r'\n+'

145 self.AddLines(len(t.value))

146

147 # We do not process escapes in the IDL strings. Strings are exclusively

148 # used for attributes and enums, and not used as typical 'C' constants.

149 def t_string(self, t):

150 r'"[^"]*"'

151 t.value = t.value[1:-1]

152 self.AddLines(t.value.count('\n'))

153 return t

154

155 # A C or C++ style comment: /* xxx */ or //

156 def t_COMMENT(self, t):

157 r'(/\(.\|\n)?\/)\|(//.(\n[ \t]//.)*)'

158 self.AddLines(t.value.count('\n'))

159 return t

160

161 # A symbol or keyword.

162 def t_KEYWORD_OR_SYMBOL(self, t):

163 r'_?[A-Za-z][A-Za-z_0-9]*'

164

165 # All non-keywords are assumed to be symbols

166 t.type = self.keywords.get(t.value, 'identifier')

167

168 # We strip leading underscores so that you can specify symbols with the same

169 # value as a keywords (E.g. a dictionary named 'interface').

170 if t.value[0] == '_':

171 t.value = t.value[1:]

172 return t

173

174 def t_ANY_error(self, t):

175 msg = 'Unrecognized input'

176 line = self.Lexer().lineno

177

178 # If that line has not been accounted for, then we must have hit

179 # EoF, so compute the beginning of the line that caused the problem.

180 if line >= len(self.index):

181 # Find the offset in the line of the first word causing the issue

182 word = t.value.split()[0]

183 offs = self.lines[line - 1].find(word)

184 # Add the computed line's starting position

185 self.index.append(self.Lexer().lexpos - offs)

186 msg = 'Unexpected EoF reached after'

187

188 pos = self.Lexer().lexpos - self.index[line]

189 out = self.ErrorMessage(line, pos, msg)

190 sys.stderr.write(out + '\n')

191 self._lex_errors += 1

192

193

194 def AddLines(self, count):

195 # Set the lexer position for the beginning of the next line. In the case

196 # of multiple lines, tokens can not exist on any of the lines except the

197 # last one, so the recorded value for previous lines are unused. We still

198 # fill the array however, to make sure the line count is correct.

199 self.Lexer().lineno += count

200 for _ in range(count):

201 self.index.append(self.Lexer().lexpos)

202

203 def FileLineMsg(self, line, msg):

204 # Generate a message containing the file and line number of a token.

205 filename = self.Lexer().filename

206 if filename:

207 return "%s(%d) : %s" % (filename, line + 1, msg)

208 return "<BuiltIn> : %s" % msg

209

210 def SourceLine(self, line, pos):

211 # Create a source line marker

212 caret = ' ' * pos + '^'

213 # We decrement the line number since the array is 0 based while the

214 # line numbers are 1 based.

215 return "%s\n%s" % (self.lines[line - 1], caret)

216

217 def ErrorMessage(self, line, pos, msg):

218 return "\n%s\n%s" % (

219 self.FileLineMsg(line, msg),

220 self.SourceLine(line, pos))

221

222 #

223 # Tokenizer

224 #

225 # The token function returns the next token provided by IDLLexer for matching

226 # against the leaf paterns.

227 #

228 def token(self):

229 tok = self.Lexer().token()

230 if tok:

231 self.last = tok

232 return tok

233

234

235 def GetTokens(self):

236 outlist = []

237 while True:

238 t = self.Lexer().token()

239 if not t:

240 break

241 outlist.append(t)

242 return outlist

243

244 def Tokenize(self, data, filename='__no_file__'):

245 lexer = self.Lexer()

246 lexer.lineno = 1

247 lexer.filename = filename

248 lexer.input(data)

249 self.lines = data.split('\n')

250

251 def KnownTokens(self):

252 return self.tokens

253

254 def Lexer(self):

255 if not self._lexobj:

256 self._lexobj = lex.lex(object=self, lextab=None, optimize=0)

257 return self._lexobj

258

259 def _AddToken(self, token):

260 if token in self.tokens:

261 raise RuntimeError('Same token: ' + token)

262 self.tokens.append(token)

263

264 def _AddTokens(self, tokens):

265 for token in tokens:

266 self._AddToken(token)

267

268 def _AddKeywords(self, keywords):

269 for key in keywords:

270 value = key.upper()

271 self._AddToken(value)

272 self.keywords[key] = value

273

274 def _DelKeywords(self, keywords):

275 for key in keywords:

276 self.tokens.remove(key.upper())

277 del self.keywords[key]

278

279 def __init__(self):

280 self.index = [0]

281 self._lex_errors = 0

282 self.linex = []

283 self.filename = None

284 self.keywords = {}

285 self.tokens = []

286 self._AddTokens(IDLLexer.tokens)

287 self._AddKeywords(IDLLexer.keywords)

288 self._lexobj = None

289 self.last = None

290 self.lines = None

291

292 # If run by itself, attempt to build the lexer

293 if __name__ == '__main__':

294 lexer_object = IDLLexer()

OLD	NEW

« no previous file with comments | « tools/idl_parser/__init__.py ('k') | tools/idl_parser/idl_lexer_test.py » ('j') | no next file with comments »