Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Side by Side Diff: mojo/public/bindings/pylib/parse/mojo_lexer.py

Issue 226263002: Mojo: Move mojo/public/bindings to mojo/public/tools/bindings. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebased Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import re
6 import sys
7 import os.path
8
9 # Try to load the ply module, if not, then assume it is in the third_party
10 # directory.
11 try:
12 # Disable lint check which fails to find the ply module.
13 # pylint: disable=F0401
14 from ply.lex import TOKEN
15 except ImportError:
16 module_path, module_name = os.path.split(__file__)
17 third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir,
18 os.pardir, os.pardir, 'third_party')
19 sys.path.append(third_party)
20 # pylint: disable=F0401
21 from ply.lex import TOKEN
22
23
24 class LexError(Exception):
25 def __init__(self, filename, lineno, msg):
26 self.filename = filename
27 self.lineno = lineno
28 self.msg = msg
29
30 def __str__(self):
31 return "%s:%d: Error: %s" % (self.filename, self.lineno, self.msg)
32
33 def __repr__(self):
34 return str(self)
35
36
37 class Lexer(object):
38
39 def __init__(self, filename):
40 self.filename = filename
41
42 ######################-- PRIVATE --######################
43
44 ##
45 ## Internal auxiliary methods
46 ##
47 def _error(self, msg, token):
48 raise LexError(self.filename, token.lineno, msg)
49
50 ##
51 ## Reserved keywords
52 ##
53 keywords = (
54 'HANDLE',
55 'DATA_PIPE_CONSUMER',
56 'DATA_PIPE_PRODUCER',
57 'MESSAGE_PIPE',
58 'SHARED_BUFFER',
59
60 'IMPORT',
61 'MODULE',
62 'STRUCT',
63 'INTERFACE',
64 'ENUM',
65 )
66
67 keyword_map = {}
68 for keyword in keywords:
69 keyword_map[keyword.lower()] = keyword
70
71 ##
72 ## All the tokens recognized by the lexer
73 ##
74 tokens = keywords + (
75 # Identifiers
76 'NAME',
77
78 # Constants
79 'ORDINAL',
80 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
81 'FLOAT_CONST',
82 'CHAR_CONST',
83
84 # String literals
85 'STRING_LITERAL',
86
87 # Operators
88 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
89 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
90
91 # Assignment
92 'EQUALS',
93
94 # Request / response
95 'RESPONSE',
96
97 # Delimiters
98 'LPAREN', 'RPAREN', # ( )
99 'LBRACKET', 'RBRACKET', # [ ]
100 'LBRACE', 'RBRACE', # { }
101 'LANGLE', 'RANGLE', # < >
102 'SEMI', # ;
103 'COMMA', 'DOT' # , .
104 )
105
106 ##
107 ## Regexes for use in tokens
108 ##
109
110 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
111 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
112
113 hex_prefix = '0[xX]'
114 hex_digits = '[0-9a-fA-F]+'
115
116 # integer constants (K&R2: A.2.5.1)
117 integer_suffix_opt = \
118 r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
119 decimal_constant = \
120 '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
121 octal_constant = '0[0-7]*'+integer_suffix_opt
122 hex_constant = hex_prefix+hex_digits+integer_suffix_opt
123
124 bad_octal_constant = '0[0-7]*[89]'
125
126 # character constants (K&R2: A.2.5.2)
127 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
128 # directives with Windows paths as filenames (..\..\dir\file)
129 # For the same reason, decimal_escape allows all digit sequences. We want to
130 # parse all correct code, even if it means to sometimes parse incorrect
131 # code.
132 #
133 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
134 decimal_escape = r"""(\d+)"""
135 hex_escape = r"""(x[0-9a-fA-F]+)"""
136 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
137
138 escape_sequence = \
139 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
140 cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
141 char_const = "'"+cconst_char+"'"
142 unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
143 bad_char_const = \
144 r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
145 bad_escape+r"""[^'\n]*')"""
146
147 # string literals (K&R2: A.2.6)
148 string_char = r"""([^"\\\n]|"""+escape_sequence+')'
149 string_literal = '"'+string_char+'*"'
150 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
151
152 # floating constants (K&R2: A.2.5.3)
153 exponent_part = r"""([eE][-+]?[0-9]+)"""
154 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
155 floating_constant = \
156 '(((('+fractional_constant+')'+ \
157 exponent_part+'?)|([0-9]+'+exponent_part+')))'
158
159 # Ordinals
160 ordinal = r'@[0-9]+'
161 missing_ordinal_value = r'@'
162 # Don't allow ordinal values in octal (even invalid octal, like 09) or
163 # hexadecimal.
164 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))'
165
166 ##
167 ## Rules for the normal state
168 ##
169 t_ignore = ' \t\r'
170
171 # Newlines
172 def t_NEWLINE(self, t):
173 r'\n+'
174 t.lexer.lineno += t.value.count("\n")
175
176 # Operators
177 t_PLUS = r'\+'
178 t_MINUS = r'-'
179 t_TIMES = r'\*'
180 t_DIVIDE = r'/'
181 t_MOD = r'%'
182 t_OR = r'\|'
183 t_AND = r'&'
184 t_NOT = r'~'
185 t_XOR = r'\^'
186 t_LSHIFT = r'<<'
187 t_RSHIFT = r'>>'
188
189 # =
190 t_EQUALS = r'='
191
192 # =>
193 t_RESPONSE = r'=>'
194
195 # Delimiters
196 t_LPAREN = r'\('
197 t_RPAREN = r'\)'
198 t_LBRACKET = r'\['
199 t_RBRACKET = r'\]'
200 t_LBRACE = r'\{'
201 t_RBRACE = r'\}'
202 t_LANGLE = r'<'
203 t_RANGLE = r'>'
204 t_COMMA = r','
205 t_DOT = r'\.'
206 t_SEMI = r';'
207
208 t_STRING_LITERAL = string_literal
209
210 # The following floating and integer constants are defined as
211 # functions to impose a strict order (otherwise, decimal
212 # is placed before the others because its regex is longer,
213 # and this is bad)
214 #
215 @TOKEN(floating_constant)
216 def t_FLOAT_CONST(self, t):
217 return t
218
219 @TOKEN(hex_constant)
220 def t_INT_CONST_HEX(self, t):
221 return t
222
223 @TOKEN(bad_octal_constant)
224 def t_BAD_CONST_OCT(self, t):
225 msg = "Invalid octal constant"
226 self._error(msg, t)
227
228 @TOKEN(octal_constant)
229 def t_INT_CONST_OCT(self, t):
230 return t
231
232 @TOKEN(decimal_constant)
233 def t_INT_CONST_DEC(self, t):
234 return t
235
236 # Must come before bad_char_const, to prevent it from
237 # catching valid char constants as invalid
238 #
239 @TOKEN(char_const)
240 def t_CHAR_CONST(self, t):
241 return t
242
243 @TOKEN(unmatched_quote)
244 def t_UNMATCHED_QUOTE(self, t):
245 msg = "Unmatched '"
246 self._error(msg, t)
247
248 @TOKEN(bad_char_const)
249 def t_BAD_CHAR_CONST(self, t):
250 msg = "Invalid char constant %s" % t.value
251 self._error(msg, t)
252
253 # unmatched string literals are caught by the preprocessor
254
255 @TOKEN(bad_string_literal)
256 def t_BAD_STRING_LITERAL(self, t):
257 msg = "String contains invalid escape code"
258 self._error(msg, t)
259
260 # Handle ordinal-related tokens in the right order:
261 @TOKEN(octal_or_hex_ordinal_disallowed)
262 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):
263 msg = "Octal and hexadecimal ordinal values not allowed"
264 self._error(msg, t)
265
266 @TOKEN(ordinal)
267 def t_ORDINAL(self, t):
268 return t
269
270 @TOKEN(missing_ordinal_value)
271 def t_BAD_ORDINAL(self, t):
272 msg = "Missing ordinal value"
273 self._error(msg, t)
274
275 @TOKEN(identifier)
276 def t_NAME(self, t):
277 t.type = self.keyword_map.get(t.value, "NAME")
278 return t
279
280 # Ignore C and C++ style comments
281 def t_COMMENT(self, t):
282 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
283 pass
284
285 def t_error(self, t):
286 msg = 'Illegal character %s' % repr(t.value[0])
287 self._error(msg, t)
OLDNEW
« no previous file with comments | « mojo/public/bindings/pylib/parse/__init__.py ('k') | mojo/public/bindings/pylib/parse/mojo_lexer_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698