mojo/public/tools/bindings/pylib/mojom/parse/lexer.py - Issue 814543006: Move //mojo/{public, edk} underneath //third_party

Side by Side Diff: mojo/public/tools/bindings/pylib/mojom/parse/lexer.py

Issue 814543006: Move //mojo/{public, edk} underneath //third_party (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebase Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 import imp

6 import os.path

7 import sys

8

9 def _GetDirAbove(dirname):

10 """Returns the directory "above" this file containing \|dirname\| (which must

11 also be "above" this file)."""

12 path = os.path.abspath(__file__)

13 while True:

14 path, tail = os.path.split(path)

15 assert tail

16 if tail == dirname:

17 return path

18

19 try:

20 imp.find_module("ply")

21 except ImportError:

22 sys.path.append(os.path.join(_GetDirAbove("public"), "public/third_party"))

23 from ply.lex import TOKEN

24

25 from ..error import Error

26

27

28 class LexError(Error):

29 """Class for errors from the lexer."""

30

31 def __init__(self, filename, message, lineno):

32 Error.__init__(self, filename, message, lineno=lineno)

33

34

35 # We have methods which look like they could be functions:

36 # pylint: disable=R0201

37 class Lexer(object):

38

39 def __init__(self, filename):

40 self.filename = filename

41

42 ######################-- PRIVATE --######################

43

44 ##

45 ## Internal auxiliary methods

46 ##

47 def _error(self, msg, token):

48 raise LexError(self.filename, msg, token.lineno)

49

50 ##

51 ## Reserved keywords

52 ##

53 keywords = (

54 'HANDLE',

55

56 'IMPORT',

57 'MODULE',

58 'STRUCT',

59 'UNION',

60 'INTERFACE',

61 'ENUM',

62 'CONST',

63 'TRUE',

64 'FALSE',

65 'DEFAULT',

66 'ARRAY',

67 'MAP'

68 )

69

70 keyword_map = {}

71 for keyword in keywords:

72 keyword_map[keyword.lower()] = keyword

73

74 ##

75 ## All the tokens recognized by the lexer

76 ##

77 tokens = keywords + (

78 # Identifiers

79 'NAME',

80

81 # Constants

82 'ORDINAL',

83 'INT_CONST_DEC', 'INT_CONST_HEX',

84 'FLOAT_CONST',

85

86 # String literals

87 'STRING_LITERAL',

88

89 # Operators

90 'MINUS',

91 'PLUS',

92 'AMP',

93 'QSTN',

94

95 # Assignment

96 'EQUALS',

97

98 # Request / response

99 'RESPONSE',

100

101 # Delimiters

102 'LPAREN', 'RPAREN', # ( )

103 'LBRACKET', 'RBRACKET', # [ ]

104 'LBRACE', 'RBRACE', # { }

105 'LANGLE', 'RANGLE', # < >

106 'SEMI', # ;

107 'COMMA', 'DOT' # , .

108 )

109

110 ##

111 ## Regexes for use in tokens

112 ##

113

114 # valid C identifiers (K&R2: A.2.3)

115 identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'

116

117 hex_prefix = '0[xX]'

118 hex_digits = '[0-9a-fA-F]+'

119

120 # integer constants (K&R2: A.2.5.1)

121 decimal_constant = '0\|([1-9][0-9]*)'

122 hex_constant = hex_prefix+hex_digits

123 # Don't allow octal constants (even invalid octal).

124 octal_constant_disallowed = '0[0-9]+'

125

126 # character constants (K&R2: A.2.5.2)

127 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line

128 # directives with Windows paths as filenames (..\..\dir\file)

129 # For the same reason, decimal_escape allows all digit sequences. We want to

130 # parse all correct code, even if it means to sometimes parse incorrect

131 # code.

132 #

133 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""

134 decimal_escape = r"""(\d+)"""

135 hex_escape = r"""(x[0-9a-fA-F]+)"""

136 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""

137

138 escape_sequence = \

139 r"""(\\("""+simple_escape+'\|'+decimal_escape+'\|'+hex_escape+'))'

140

141 # string literals (K&R2: A.2.6)

142 string_char = r"""([^"\\\n]\|"""+escape_sequence+')'

143 string_literal = '"'+string_char+'*"'

144 bad_string_literal = '"'+string_char+''+bad_escape+string_char+'"'

145

146 # floating constants (K&R2: A.2.5.3)

147 exponent_part = r"""([eE][-+]?[0-9]+)"""

148 fractional_constant = r"""([0-9]*\.[0-9]+)\|([0-9]+\.)"""

149 floating_constant = \

150 '(((('+fractional_constant+')'+ \

151 exponent_part+'?)\|([0-9]+'+exponent_part+')))'

152

153 # Ordinals

154 ordinal = r'@[0-9]+'

155 missing_ordinal_value = r'@'

156 # Don't allow ordinal values in octal (even invalid octal, like 09) or

157 # hexadecimal.

158 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)\|('+hex_prefix+hex_digits+'))'

159

160 ##

161 ## Rules for the normal state

162 ##

163 t_ignore = ' \t\r'

164

165 # Newlines

166 def t_NEWLINE(self, t):

167 r'\n+'

168 t.lexer.lineno += len(t.value)

169

170 # Operators

171 t_MINUS = r'-'

172 t_PLUS = r'\+'

173 t_AMP = r'&'

174 t_QSTN = r'\?'

175

176 # =

177 t_EQUALS = r'='

178

179 # =>

180 t_RESPONSE = r'=>'

181

182 # Delimiters

183 t_LPAREN = r'\('

184 t_RPAREN = r'\)'

185 t_LBRACKET = r'\['

186 t_RBRACKET = r'\]'

187 t_LBRACE = r'\{'

188 t_RBRACE = r'\}'

189 t_LANGLE = r'<'

190 t_RANGLE = r'>'

191 t_COMMA = r','

192 t_DOT = r'\.'

193 t_SEMI = r';'

194

195 t_STRING_LITERAL = string_literal

196

197 # The following floating and integer constants are defined as

198 # functions to impose a strict order (otherwise, decimal

199 # is placed before the others because its regex is longer,

200 # and this is bad)

201 #

202 @TOKEN(floating_constant)

203 def t_FLOAT_CONST(self, t):

204 return t

205

206 @TOKEN(hex_constant)

207 def t_INT_CONST_HEX(self, t):

208 return t

209

210 @TOKEN(octal_constant_disallowed)

211 def t_OCTAL_CONSTANT_DISALLOWED(self, t):

212 msg = "Octal values not allowed"

213 self._error(msg, t)

214

215 @TOKEN(decimal_constant)

216 def t_INT_CONST_DEC(self, t):

217 return t

218

219 # unmatched string literals are caught by the preprocessor

220

221 @TOKEN(bad_string_literal)

222 def t_BAD_STRING_LITERAL(self, t):

223 msg = "String contains invalid escape code"

224 self._error(msg, t)

225

226 # Handle ordinal-related tokens in the right order:

227 @TOKEN(octal_or_hex_ordinal_disallowed)

228 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):

229 msg = "Octal and hexadecimal ordinal values not allowed"

230 self._error(msg, t)

231

232 @TOKEN(ordinal)

233 def t_ORDINAL(self, t):

234 return t

235

236 @TOKEN(missing_ordinal_value)

237 def t_BAD_ORDINAL(self, t):

238 msg = "Missing ordinal value"

239 self._error(msg, t)

240

241 @TOKEN(identifier)

242 def t_NAME(self, t):

243 t.type = self.keyword_map.get(t.value, "NAME")

244 return t

245

246 # Ignore C and C++ style comments

247 def t_COMMENT(self, t):

248 r'(/\(.\|\n)?\/)\|(//.(\n[ \t]//.)*)'

249 t.lexer.lineno += t.value.count("\n")

250

251 def t_error(self, t):

252 msg = "Illegal character %s" % repr(t.value[0])

253 self._error(msg, t)

OLD	NEW

« no previous file with comments | « mojo/public/tools/bindings/pylib/mojom/parse/ast.py ('k') | mojo/public/tools/bindings/pylib/mojom/parse/parser.py » ('j') | no next file with comments »