Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1897)

Side by Side Diff: mojo/public/bindings/parse/mojo_lexer.py

Issue 130443003: Add support for using expressions as enum values. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # PLY based Lexer class, based on pycparser by Eli Bendersky.
2 #
3 # Copyright (C) 2008-2013, Eli Bendersky
4 # License: BSD
5
6 import re
7 import sys
8 import os.path
9
10 # Try to load the ply module, if not, then assume it is in the third_party
11 # directory.
12 try:
13 # Disable lint check which fails to find the ply module.
14 # pylint: disable=F0401
15 from ply.lex import TOKEN
16 except ImportError:
17 module_path, module_name = os.path.split(__file__)
18 third_party = os.path.join(
19 module_path, os.pardir, os.pardir, os.pardir, os.pardir, 'third_party')
20 sys.path.append(third_party)
21 # pylint: disable=F0401
22 from ply.lex import TOKEN
23
24
25 class Lexer(object):
26 ######################-- PRIVATE --######################
27
28 ##
29 ## Internal auxiliary methods
30 ##
31 def _error(self, msg, token):
32 print('%s at line %d' % (msg, token.lineno))
33 self.lexer.skip(1)
34
35 ##
36 ## Reserved keywords
37 ##
38 keywords = (
39 'HANDLE',
40 'DATA_PIPE_CONSUMER',
41 'DATA_PIPE_PRODUCER',
42 'MESSAGE_PIPE',
43
44 'MODULE',
45 'STRUCT',
46 'INTERFACE',
47 'ENUM',
48 'VOID',
49 )
50
51 keyword_map = {}
52 for keyword in keywords:
53 keyword_map[keyword.lower()] = keyword
54
55 ##
56 ## All the tokens recognized by the lexer
57 ##
58 tokens = keywords + (
59 # Identifiers
60 'NAME',
61
62 # constants
63 'ORDINAL',
64 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
65 'FLOAT_CONST', 'HEX_FLOAT_CONST',
66 'CHAR_CONST',
67 'WCHAR_CONST',
68
69 # String literals
70 'STRING_LITERAL',
71 'WSTRING_LITERAL',
72
73 # Operators
74 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
75 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
76 'LOR', 'LAND', 'LNOT',
77 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
78
79 # Assignment
80 'EQUALS',
81
82 # Conditional operator (?)
83 'CONDOP',
84
85 # Delimeters
86 'LPAREN', 'RPAREN', # ( )
87 'LBRACKET', 'RBRACKET', # [ ]
88 'LBRACE', 'RBRACE', # { }
89 'SEMI', 'COLON', # ; :
90 'COMMA', # .
91 )
92
93 ##
94 ## Regexes for use in tokens
95 ##
96 ##
97
98 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
99 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
100
101 hex_prefix = '0[xX]'
102 hex_digits = '[0-9a-fA-F]+'
103
104 # integer constants (K&R2: A.2.5.1)
105 integer_suffix_opt = \
106 r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
107 decimal_constant = \
108 '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
109 octal_constant = '0[0-7]*'+integer_suffix_opt
110 hex_constant = hex_prefix+hex_digits+integer_suffix_opt
111
112 bad_octal_constant = '0[0-7]*[89]'
113
114 # character constants (K&R2: A.2.5.2)
115 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
116 # directives with Windows paths as filenames (..\..\dir\file)
117 # For the same reason, decimal_escape allows all digit sequences. We want to
118 # parse all correct code, even if it means to sometimes parse incorrect
119 # code.
120 #
121 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
122 decimal_escape = r"""(\d+)"""
123 hex_escape = r"""(x[0-9a-fA-F]+)"""
124 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
125
126 escape_sequence = \
127 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
128 cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
129 char_const = "'"+cconst_char+"'"
130 wchar_const = 'L'+char_const
131 unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
132 bad_char_const = \
133 r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
134 bad_escape+r"""[^'\n]*')"""
135
136 # string literals (K&R2: A.2.6)
137 string_char = r"""([^"\\\n]|"""+escape_sequence+')'
138 string_literal = '"'+string_char+'*"'
139 wstring_literal = 'L'+string_literal
140 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
141
142 # floating constants (K&R2: A.2.5.3)
143 exponent_part = r"""([eE][-+]?[0-9]+)"""
144 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
145 floating_constant = \
146 '(((('+fractional_constant+')'+ \
147 exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
148 binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
149 hex_fractional_constant = \
150 '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
151 hex_floating_constant = \
152 '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+ \
153 binary_exponent_part+'[FfLl]?)'
154
155 ##
156 ## Rules for the normal state
157 ##
158 t_ignore = ' \t'
159
160 # Newlines
161 def t_NEWLINE(self, t):
162 r'\n+'
163 t.lexer.lineno += t.value.count("\n")
164
165 # Operators
166 t_PLUS = r'\+'
167 t_MINUS = r'-'
168 t_TIMES = r'\*'
169 t_DIVIDE = r'/'
170 t_MOD = r'%'
171 t_OR = r'\|'
172 t_AND = r'&'
173 t_NOT = r'~'
174 t_XOR = r'\^'
175 t_LSHIFT = r'<<'
176 t_RSHIFT = r'>>'
177 t_LOR = r'\|\|'
178 t_LAND = r'&&'
179 t_LNOT = r'!'
180 t_LT = r'<'
181 t_GT = r'>'
182 t_LE = r'<='
183 t_GE = r'>='
184 t_EQ = r'=='
185 t_NE = r'!='
186
187 # =
188 t_EQUALS = r'='
189
190 # ?
191 t_CONDOP = r'\?'
192
193 # Delimeters
194 t_LPAREN = r'\('
195 t_RPAREN = r'\)'
196 t_LBRACKET = r'\['
197 t_RBRACKET = r'\]'
198 t_LBRACE = r'\{'
199 t_RBRACE = r'\}'
200 t_COMMA = r','
201 t_SEMI = r';'
202 t_COLON = r':'
203
204 t_STRING_LITERAL = string_literal
205 t_ORDINAL = r'@[0-9]*'
206
207 # The following floating and integer constants are defined as
208 # functions to impose a strict order (otherwise, decimal
209 # is placed before the others because its regex is longer,
210 # and this is bad)
211 #
212 @TOKEN(floating_constant)
213 def t_FLOAT_CONST(self, t):
214 return t
215
216 @TOKEN(hex_floating_constant)
217 def t_HEX_FLOAT_CONST(self, t):
218 return t
219
220 @TOKEN(hex_constant)
221 def t_INT_CONST_HEX(self, t):
222 return t
223
224 @TOKEN(bad_octal_constant)
225 def t_BAD_CONST_OCT(self, t):
226 msg = "Invalid octal constant"
227 self._error(msg, t)
228
229 @TOKEN(octal_constant)
230 def t_INT_CONST_OCT(self, t):
231 return t
232
233 @TOKEN(decimal_constant)
234 def t_INT_CONST_DEC(self, t):
235 return t
236
237 # Must come before bad_char_const, to prevent it from
238 # catching valid char constants as invalid
239 #
240 @TOKEN(char_const)
241 def t_CHAR_CONST(self, t):
242 return t
243
244 @TOKEN(wchar_const)
245 def t_WCHAR_CONST(self, t):
246 return t
247
248 @TOKEN(unmatched_quote)
249 def t_UNMATCHED_QUOTE(self, t):
250 msg = "Unmatched '"
251 self._error(msg, t)
252
253 @TOKEN(bad_char_const)
254 def t_BAD_CHAR_CONST(self, t):
255 msg = "Invalid char constant %s" % t.value
256 self._error(msg, t)
257
258 @TOKEN(wstring_literal)
259 def t_WSTRING_LITERAL(self, t):
260 return t
261
262 # unmatched string literals are caught by the preprocessor
263
264 @TOKEN(bad_string_literal)
265 def t_BAD_STRING_LITERAL(self, t):
266 msg = "String contains invalid escape code"
267 self._error(msg, t)
268
269 @TOKEN(identifier)
270 def t_NAME(self, t):
271 t.type = self.keyword_map.get(t.value, "NAME")
272 return t
273
274 # Ignore C and C++ style comments
275 def t_COMMENT(self, t):
276 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
277 pass
278
279 def t_error(self, t):
280 msg = 'Illegal character %s' % repr(t.value[0])
281 self._error(msg, t)
OLDNEW
« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | mojo/public/bindings/parse/mojo_parser.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698