Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Side by Side Diff: mojo/public/bindings/parse/mojo_lexer.py

Issue 130443003: Add support for using expressions as enum values. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: copyright Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # PLY based Lexer class, based on pycparser by Eli Bendersky.
2 #
3 # Copyright (c) 2012, Eli Bendersky
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without modificatio n,
7 # are permitted provided that the following conditions are met:
8 #
9 # * Redistributions of source code must retain the above copyright notice, this
10 #   list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above copyright notice,
12 #   this list of conditions and the following disclaimer in the documentation
13 #   and/or other materials provided with the distribution.
14 # * Neither the name of Eli Bendersky nor the names of its contributors may
15 #   be used to endorse or promote products derived from this software without
16 #   specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AN D
19 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUT E
24 # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
27 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
28
29 import re
30 import sys
31 import os.path
32
33 # Try to load the ply module, if not, then assume it is in the third_party
34 # directory.
35 try:
36 # Disable lint check which fails to find the ply module.
37 # pylint: disable=F0401
38 from ply.lex import TOKEN
39 except ImportError:
40 module_path, module_name = os.path.split(__file__)
41 third_party = os.path.join(
42 module_path, os.pardir, os.pardir, os.pardir, os.pardir, 'third_party')
43 sys.path.append(third_party)
44 # pylint: disable=F0401
45 from ply.lex import TOKEN
46
47
48 class Lexer(object):
49 ######################-- PRIVATE --######################
50
51 ##
52 ## Internal auxiliary methods
53 ##
54 def _error(self, msg, token):
55 print('%s at line %d' % (msg, token.lineno))
56 self.lexer.skip(1)
57
58 ##
59 ## Reserved keywords
60 ##
61 keywords = (
62 'HANDLE',
63 'DATA_PIPE_CONSUMER',
64 'DATA_PIPE_PRODUCER',
65 'MESSAGE_PIPE',
66
67 'MODULE',
68 'STRUCT',
69 'INTERFACE',
70 'ENUM',
71 'VOID',
72 )
73
74 keyword_map = {}
75 for keyword in keywords:
76 keyword_map[keyword.lower()] = keyword
77
78 ##
79 ## All the tokens recognized by the lexer
80 ##
81 tokens = keywords + (
82 # Identifiers
83 'NAME',
84
85 # constants
86 'ORDINAL',
87 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
88 'FLOAT_CONST', 'HEX_FLOAT_CONST',
89 'CHAR_CONST',
90 'WCHAR_CONST',
91
92 # String literals
93 'STRING_LITERAL',
94 'WSTRING_LITERAL',
95
96 # Operators
97 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
98 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
99 'LOR', 'LAND', 'LNOT',
100 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
101
102 # Assignment
103 'EQUALS',
104
105 # Conditional operator (?)
106 'CONDOP',
107
108 # Delimeters
109 'LPAREN', 'RPAREN', # ( )
110 'LBRACKET', 'RBRACKET', # [ ]
111 'LBRACE', 'RBRACE', # { }
112 'SEMI', 'COLON', # ; :
113 'COMMA', # .
114 )
115
116 ##
117 ## Regexes for use in tokens
118 ##
119 ##
120
121 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
122 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
123
124 hex_prefix = '0[xX]'
125 hex_digits = '[0-9a-fA-F]+'
126
127 # integer constants (K&R2: A.2.5.1)
128 integer_suffix_opt = \
129 r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
130 decimal_constant = \
131 '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
132 octal_constant = '0[0-7]*'+integer_suffix_opt
133 hex_constant = hex_prefix+hex_digits+integer_suffix_opt
134
135 bad_octal_constant = '0[0-7]*[89]'
136
137 # character constants (K&R2: A.2.5.2)
138 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
139 # directives with Windows paths as filenames (..\..\dir\file)
140 # For the same reason, decimal_escape allows all digit sequences. We want to
141 # parse all correct code, even if it means to sometimes parse incorrect
142 # code.
143 #
144 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
145 decimal_escape = r"""(\d+)"""
146 hex_escape = r"""(x[0-9a-fA-F]+)"""
147 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
148
149 escape_sequence = \
150 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
151 cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
152 char_const = "'"+cconst_char+"'"
153 wchar_const = 'L'+char_const
154 unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
155 bad_char_const = \
156 r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
157 bad_escape+r"""[^'\n]*')"""
158
159 # string literals (K&R2: A.2.6)
160 string_char = r"""([^"\\\n]|"""+escape_sequence+')'
161 string_literal = '"'+string_char+'*"'
162 wstring_literal = 'L'+string_literal
163 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
164
165 # floating constants (K&R2: A.2.5.3)
166 exponent_part = r"""([eE][-+]?[0-9]+)"""
167 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
168 floating_constant = \
169 '(((('+fractional_constant+')'+ \
170 exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
171 binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
172 hex_fractional_constant = \
173 '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
174 hex_floating_constant = \
175 '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+ \
176 binary_exponent_part+'[FfLl]?)'
177
178 ##
179 ## Rules for the normal state
180 ##
181 t_ignore = ' \t'
182
183 # Newlines
184 def t_NEWLINE(self, t):
185 r'\n+'
186 t.lexer.lineno += t.value.count("\n")
187
188 # Operators
189 t_PLUS = r'\+'
190 t_MINUS = r'-'
191 t_TIMES = r'\*'
192 t_DIVIDE = r'/'
193 t_MOD = r'%'
194 t_OR = r'\|'
195 t_AND = r'&'
196 t_NOT = r'~'
197 t_XOR = r'\^'
198 t_LSHIFT = r'<<'
199 t_RSHIFT = r'>>'
200 t_LOR = r'\|\|'
201 t_LAND = r'&&'
202 t_LNOT = r'!'
203 t_LT = r'<'
204 t_GT = r'>'
205 t_LE = r'<='
206 t_GE = r'>='
207 t_EQ = r'=='
208 t_NE = r'!='
209
210 # =
211 t_EQUALS = r'='
212
213 # ?
214 t_CONDOP = r'\?'
215
216 # Delimeters
217 t_LPAREN = r'\('
218 t_RPAREN = r'\)'
219 t_LBRACKET = r'\['
220 t_RBRACKET = r'\]'
221 t_LBRACE = r'\{'
222 t_RBRACE = r'\}'
223 t_COMMA = r','
224 t_SEMI = r';'
225 t_COLON = r':'
226
227 t_STRING_LITERAL = string_literal
228 t_ORDINAL = r'@[0-9]*'
229
230 # The following floating and integer constants are defined as
231 # functions to impose a strict order (otherwise, decimal
232 # is placed before the others because its regex is longer,
233 # and this is bad)
234 #
235 @TOKEN(floating_constant)
236 def t_FLOAT_CONST(self, t):
237 return t
238
239 @TOKEN(hex_floating_constant)
240 def t_HEX_FLOAT_CONST(self, t):
241 return t
242
243 @TOKEN(hex_constant)
244 def t_INT_CONST_HEX(self, t):
245 return t
246
247 @TOKEN(bad_octal_constant)
248 def t_BAD_CONST_OCT(self, t):
249 msg = "Invalid octal constant"
250 self._error(msg, t)
251
252 @TOKEN(octal_constant)
253 def t_INT_CONST_OCT(self, t):
254 return t
255
256 @TOKEN(decimal_constant)
257 def t_INT_CONST_DEC(self, t):
258 return t
259
260 # Must come before bad_char_const, to prevent it from
261 # catching valid char constants as invalid
262 #
263 @TOKEN(char_const)
264 def t_CHAR_CONST(self, t):
265 return t
266
267 @TOKEN(wchar_const)
268 def t_WCHAR_CONST(self, t):
269 return t
270
271 @TOKEN(unmatched_quote)
272 def t_UNMATCHED_QUOTE(self, t):
273 msg = "Unmatched '"
274 self._error(msg, t)
275
276 @TOKEN(bad_char_const)
277 def t_BAD_CHAR_CONST(self, t):
278 msg = "Invalid char constant %s" % t.value
279 self._error(msg, t)
280
281 @TOKEN(wstring_literal)
282 def t_WSTRING_LITERAL(self, t):
283 return t
284
285 # unmatched string literals are caught by the preprocessor
286
287 @TOKEN(bad_string_literal)
288 def t_BAD_STRING_LITERAL(self, t):
289 msg = "String contains invalid escape code"
290 self._error(msg, t)
291
292 @TOKEN(identifier)
293 def t_NAME(self, t):
294 t.type = self.keyword_map.get(t.value, "NAME")
295 return t
296
297 # Ignore C and C++ style comments
298 def t_COMMENT(self, t):
299 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
300 pass
301
302 def t_error(self, t):
303 msg = 'Illegal character %s' % repr(t.value[0])
304 self._error(msg, t)
OLDNEW
« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698