mojo/public/bindings/pylib/parse/mojo_lexer.py - Issue 226263002: Mojo: Move mojo/public/bindings to mojo/public/tools/bindings.

Side by Side Diff: mojo/public/bindings/pylib/parse/mojo_lexer.py

Issue 226263002: Mojo: Move mojo/public/bindings to mojo/public/tools/bindings. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebased Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 import re

6 import sys

7 import os.path

8

9 # Try to load the ply module, if not, then assume it is in the third_party

10 # directory.

11 try:

12 # Disable lint check which fails to find the ply module.

13 # pylint: disable=F0401

14 from ply.lex import TOKEN

15 except ImportError:

16 module_path, module_name = os.path.split(__file__)

17 third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir,

18 os.pardir, os.pardir, 'third_party')

19 sys.path.append(third_party)

20 # pylint: disable=F0401

21 from ply.lex import TOKEN

22

23

24 class LexError(Exception):

25 def __init__(self, filename, lineno, msg):

26 self.filename = filename

27 self.lineno = lineno

28 self.msg = msg

29

30 def __str__(self):

31 return "%s:%d: Error: %s" % (self.filename, self.lineno, self.msg)

32

33 def __repr__(self):

34 return str(self)

35

36

37 class Lexer(object):

38

39 def __init__(self, filename):

40 self.filename = filename

41

42 ######################-- PRIVATE --######################

43

44 ##

45 ## Internal auxiliary methods

46 ##

47 def _error(self, msg, token):

48 raise LexError(self.filename, token.lineno, msg)

49

50 ##

51 ## Reserved keywords

52 ##

53 keywords = (

54 'HANDLE',

55 'DATA_PIPE_CONSUMER',

56 'DATA_PIPE_PRODUCER',

57 'MESSAGE_PIPE',

58 'SHARED_BUFFER',

59

60 'IMPORT',

61 'MODULE',

62 'STRUCT',

63 'INTERFACE',

64 'ENUM',

65 )

66

67 keyword_map = {}

68 for keyword in keywords:

69 keyword_map[keyword.lower()] = keyword

70

71 ##

72 ## All the tokens recognized by the lexer

73 ##

74 tokens = keywords + (

75 # Identifiers

76 'NAME',

77

78 # Constants

79 'ORDINAL',

80 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',

81 'FLOAT_CONST',

82 'CHAR_CONST',

83

84 # String literals

85 'STRING_LITERAL',

86

87 # Operators

88 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',

89 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',

90

91 # Assignment

92 'EQUALS',

93

94 # Request / response

95 'RESPONSE',

96

97 # Delimiters

98 'LPAREN', 'RPAREN', # ( )

99 'LBRACKET', 'RBRACKET', # [ ]

100 'LBRACE', 'RBRACE', # { }

101 'LANGLE', 'RANGLE', # < >

102 'SEMI', # ;

103 'COMMA', 'DOT' # , .

104 )

105

106 ##

107 ## Regexes for use in tokens

108 ##

109

110 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)

111 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'

112

113 hex_prefix = '0[xX]'

114 hex_digits = '[0-9a-fA-F]+'

115

116 # integer constants (K&R2: A.2.5.1)

117 integer_suffix_opt = \

118 r'(([uU]ll)\|([uU]LL)\|(ll[uU]?)\|(LL[uU]?)\|([uU][lL])\|([lL][uU]?)\|[uU])?'

119 decimal_constant = \

120 '(0'+integer_suffix_opt+')\|([1-9][0-9]*'+integer_suffix_opt+')'

121 octal_constant = '0[0-7]*'+integer_suffix_opt

122 hex_constant = hex_prefix+hex_digits+integer_suffix_opt

123

124 bad_octal_constant = '0[0-7]*[89]'

125

126 # character constants (K&R2: A.2.5.2)

127 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line

128 # directives with Windows paths as filenames (..\..\dir\file)

129 # For the same reason, decimal_escape allows all digit sequences. We want to

130 # parse all correct code, even if it means to sometimes parse incorrect

131 # code.

132 #

133 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""

134 decimal_escape = r"""(\d+)"""

135 hex_escape = r"""(x[0-9a-fA-F]+)"""

136 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""

137

138 escape_sequence = \

139 r"""(\\("""+simple_escape+'\|'+decimal_escape+'\|'+hex_escape+'))'

140 cconst_char = r"""([^'\\\n]\|"""+escape_sequence+')'

141 char_const = "'"+cconst_char+"'"

142 unmatched_quote = "('"+cconst_char+"\\n)\|('"+cconst_char+"$)"

143 bad_char_const = \

144 r"""('"""+cconst_char+"""[^'\n]+')\|('')\|('"""+ \

145 bad_escape+r"""[^'\n]*')"""

146

147 # string literals (K&R2: A.2.6)

148 string_char = r"""([^"\\\n]\|"""+escape_sequence+')'

149 string_literal = '"'+string_char+'*"'

150 bad_string_literal = '"'+string_char+''+bad_escape+string_char+'"'

151

152 # floating constants (K&R2: A.2.5.3)

153 exponent_part = r"""([eE][-+]?[0-9]+)"""

154 fractional_constant = r"""([0-9]*\.[0-9]+)\|([0-9]+\.)"""

155 floating_constant = \

156 '(((('+fractional_constant+')'+ \

157 exponent_part+'?)\|([0-9]+'+exponent_part+')))'

158

159 # Ordinals

160 ordinal = r'@[0-9]+'

161 missing_ordinal_value = r'@'

162 # Don't allow ordinal values in octal (even invalid octal, like 09) or

163 # hexadecimal.

164 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)\|('+hex_prefix+hex_digits+'))'

165

166 ##

167 ## Rules for the normal state

168 ##

169 t_ignore = ' \t\r'

170

171 # Newlines

172 def t_NEWLINE(self, t):

173 r'\n+'

174 t.lexer.lineno += t.value.count("\n")

175

176 # Operators

177 t_PLUS = r'\+'

178 t_MINUS = r'-'

179 t_TIMES = r'\*'

180 t_DIVIDE = r'/'

181 t_MOD = r'%'

182 t_OR = r'\\|'

183 t_AND = r'&'

184 t_NOT = r'~'

185 t_XOR = r'\^'

186 t_LSHIFT = r'<<'

187 t_RSHIFT = r'>>'

188

189 # =

190 t_EQUALS = r'='

191

192 # =>

193 t_RESPONSE = r'=>'

194

195 # Delimiters

196 t_LPAREN = r'\('

197 t_RPAREN = r'\)'

198 t_LBRACKET = r'\['

199 t_RBRACKET = r'\]'

200 t_LBRACE = r'\{'

201 t_RBRACE = r'\}'

202 t_LANGLE = r'<'

203 t_RANGLE = r'>'

204 t_COMMA = r','

205 t_DOT = r'\.'

206 t_SEMI = r';'

207

208 t_STRING_LITERAL = string_literal

209

210 # The following floating and integer constants are defined as

211 # functions to impose a strict order (otherwise, decimal

212 # is placed before the others because its regex is longer,

213 # and this is bad)

214 #

215 @TOKEN(floating_constant)

216 def t_FLOAT_CONST(self, t):

217 return t

218

219 @TOKEN(hex_constant)

220 def t_INT_CONST_HEX(self, t):

221 return t

222

223 @TOKEN(bad_octal_constant)

224 def t_BAD_CONST_OCT(self, t):

225 msg = "Invalid octal constant"

226 self._error(msg, t)

227

228 @TOKEN(octal_constant)

229 def t_INT_CONST_OCT(self, t):

230 return t

231

232 @TOKEN(decimal_constant)

233 def t_INT_CONST_DEC(self, t):

234 return t

235

236 # Must come before bad_char_const, to prevent it from

237 # catching valid char constants as invalid

238 #

239 @TOKEN(char_const)

240 def t_CHAR_CONST(self, t):

241 return t

242

243 @TOKEN(unmatched_quote)

244 def t_UNMATCHED_QUOTE(self, t):

245 msg = "Unmatched '"

246 self._error(msg, t)

247

248 @TOKEN(bad_char_const)

249 def t_BAD_CHAR_CONST(self, t):

250 msg = "Invalid char constant %s" % t.value

251 self._error(msg, t)

252

253 # unmatched string literals are caught by the preprocessor

254

255 @TOKEN(bad_string_literal)

256 def t_BAD_STRING_LITERAL(self, t):

257 msg = "String contains invalid escape code"

258 self._error(msg, t)

259

260 # Handle ordinal-related tokens in the right order:

261 @TOKEN(octal_or_hex_ordinal_disallowed)

262 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):

263 msg = "Octal and hexadecimal ordinal values not allowed"

264 self._error(msg, t)

265

266 @TOKEN(ordinal)

267 def t_ORDINAL(self, t):

268 return t

269

270 @TOKEN(missing_ordinal_value)

271 def t_BAD_ORDINAL(self, t):

272 msg = "Missing ordinal value"

273 self._error(msg, t)

274

275 @TOKEN(identifier)

276 def t_NAME(self, t):

277 t.type = self.keyword_map.get(t.value, "NAME")

278 return t

279

280 # Ignore C and C++ style comments

281 def t_COMMENT(self, t):

282 r'(/\(.\|\n)?\/)\|(//.(\n[ \t]//.)*)'

283 pass

284

285 def t_error(self, t):

286 msg = 'Illegal character %s' % repr(t.value[0])

287 self._error(msg, t)

OLD	NEW

« no previous file with comments | « mojo/public/bindings/pylib/parse/__init__.py ('k') | mojo/public/bindings/pylib/parse/mojo_lexer_unittest.py » ('j') | no next file with comments »