Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(106)

Side by Side Diff: mojo/public/tools/bindings/pylib/mojom/parse/lexer.py

Issue 814543006: Move //mojo/{public, edk} underneath //third_party (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import imp
6 import os.path
7 import sys
8
9 def _GetDirAbove(dirname):
10 """Returns the directory "above" this file containing |dirname| (which must
11 also be "above" this file)."""
12 path = os.path.abspath(__file__)
13 while True:
14 path, tail = os.path.split(path)
15 assert tail
16 if tail == dirname:
17 return path
18
19 try:
20 imp.find_module("ply")
21 except ImportError:
22 sys.path.append(os.path.join(_GetDirAbove("public"), "public/third_party"))
23 from ply.lex import TOKEN
24
25 from ..error import Error
26
27
28 class LexError(Error):
29 """Class for errors from the lexer."""
30
31 def __init__(self, filename, message, lineno):
32 Error.__init__(self, filename, message, lineno=lineno)
33
34
35 # We have methods which look like they could be functions:
36 # pylint: disable=R0201
37 class Lexer(object):
38
39 def __init__(self, filename):
40 self.filename = filename
41
42 ######################-- PRIVATE --######################
43
44 ##
45 ## Internal auxiliary methods
46 ##
47 def _error(self, msg, token):
48 raise LexError(self.filename, msg, token.lineno)
49
50 ##
51 ## Reserved keywords
52 ##
53 keywords = (
54 'HANDLE',
55
56 'IMPORT',
57 'MODULE',
58 'STRUCT',
59 'UNION',
60 'INTERFACE',
61 'ENUM',
62 'CONST',
63 'TRUE',
64 'FALSE',
65 'DEFAULT',
66 'ARRAY',
67 'MAP'
68 )
69
70 keyword_map = {}
71 for keyword in keywords:
72 keyword_map[keyword.lower()] = keyword
73
74 ##
75 ## All the tokens recognized by the lexer
76 ##
77 tokens = keywords + (
78 # Identifiers
79 'NAME',
80
81 # Constants
82 'ORDINAL',
83 'INT_CONST_DEC', 'INT_CONST_HEX',
84 'FLOAT_CONST',
85
86 # String literals
87 'STRING_LITERAL',
88
89 # Operators
90 'MINUS',
91 'PLUS',
92 'AMP',
93 'QSTN',
94
95 # Assignment
96 'EQUALS',
97
98 # Request / response
99 'RESPONSE',
100
101 # Delimiters
102 'LPAREN', 'RPAREN', # ( )
103 'LBRACKET', 'RBRACKET', # [ ]
104 'LBRACE', 'RBRACE', # { }
105 'LANGLE', 'RANGLE', # < >
106 'SEMI', # ;
107 'COMMA', 'DOT' # , .
108 )
109
110 ##
111 ## Regexes for use in tokens
112 ##
113
114 # valid C identifiers (K&R2: A.2.3)
115 identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
116
117 hex_prefix = '0[xX]'
118 hex_digits = '[0-9a-fA-F]+'
119
120 # integer constants (K&R2: A.2.5.1)
121 decimal_constant = '0|([1-9][0-9]*)'
122 hex_constant = hex_prefix+hex_digits
123 # Don't allow octal constants (even invalid octal).
124 octal_constant_disallowed = '0[0-9]+'
125
126 # character constants (K&R2: A.2.5.2)
127 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
128 # directives with Windows paths as filenames (..\..\dir\file)
129 # For the same reason, decimal_escape allows all digit sequences. We want to
130 # parse all correct code, even if it means to sometimes parse incorrect
131 # code.
132 #
133 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
134 decimal_escape = r"""(\d+)"""
135 hex_escape = r"""(x[0-9a-fA-F]+)"""
136 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
137
138 escape_sequence = \
139 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
140
141 # string literals (K&R2: A.2.6)
142 string_char = r"""([^"\\\n]|"""+escape_sequence+')'
143 string_literal = '"'+string_char+'*"'
144 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
145
146 # floating constants (K&R2: A.2.5.3)
147 exponent_part = r"""([eE][-+]?[0-9]+)"""
148 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
149 floating_constant = \
150 '(((('+fractional_constant+')'+ \
151 exponent_part+'?)|([0-9]+'+exponent_part+')))'
152
153 # Ordinals
154 ordinal = r'@[0-9]+'
155 missing_ordinal_value = r'@'
156 # Don't allow ordinal values in octal (even invalid octal, like 09) or
157 # hexadecimal.
158 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))'
159
160 ##
161 ## Rules for the normal state
162 ##
163 t_ignore = ' \t\r'
164
165 # Newlines
166 def t_NEWLINE(self, t):
167 r'\n+'
168 t.lexer.lineno += len(t.value)
169
170 # Operators
171 t_MINUS = r'-'
172 t_PLUS = r'\+'
173 t_AMP = r'&'
174 t_QSTN = r'\?'
175
176 # =
177 t_EQUALS = r'='
178
179 # =>
180 t_RESPONSE = r'=>'
181
182 # Delimiters
183 t_LPAREN = r'\('
184 t_RPAREN = r'\)'
185 t_LBRACKET = r'\['
186 t_RBRACKET = r'\]'
187 t_LBRACE = r'\{'
188 t_RBRACE = r'\}'
189 t_LANGLE = r'<'
190 t_RANGLE = r'>'
191 t_COMMA = r','
192 t_DOT = r'\.'
193 t_SEMI = r';'
194
195 t_STRING_LITERAL = string_literal
196
197 # The following floating and integer constants are defined as
198 # functions to impose a strict order (otherwise, decimal
199 # is placed before the others because its regex is longer,
200 # and this is bad)
201 #
202 @TOKEN(floating_constant)
203 def t_FLOAT_CONST(self, t):
204 return t
205
206 @TOKEN(hex_constant)
207 def t_INT_CONST_HEX(self, t):
208 return t
209
210 @TOKEN(octal_constant_disallowed)
211 def t_OCTAL_CONSTANT_DISALLOWED(self, t):
212 msg = "Octal values not allowed"
213 self._error(msg, t)
214
215 @TOKEN(decimal_constant)
216 def t_INT_CONST_DEC(self, t):
217 return t
218
219 # unmatched string literals are caught by the preprocessor
220
221 @TOKEN(bad_string_literal)
222 def t_BAD_STRING_LITERAL(self, t):
223 msg = "String contains invalid escape code"
224 self._error(msg, t)
225
226 # Handle ordinal-related tokens in the right order:
227 @TOKEN(octal_or_hex_ordinal_disallowed)
228 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):
229 msg = "Octal and hexadecimal ordinal values not allowed"
230 self._error(msg, t)
231
232 @TOKEN(ordinal)
233 def t_ORDINAL(self, t):
234 return t
235
236 @TOKEN(missing_ordinal_value)
237 def t_BAD_ORDINAL(self, t):
238 msg = "Missing ordinal value"
239 self._error(msg, t)
240
241 @TOKEN(identifier)
242 def t_NAME(self, t):
243 t.type = self.keyword_map.get(t.value, "NAME")
244 return t
245
246 # Ignore C and C++ style comments
247 def t_COMMENT(self, t):
248 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
249 t.lexer.lineno += t.value.count("\n")
250
251 def t_error(self, t):
252 msg = "Illegal character %s" % repr(t.value[0])
253 self._error(msg, t)
OLDNEW
« no previous file with comments | « mojo/public/tools/bindings/pylib/mojom/parse/ast.py ('k') | mojo/public/tools/bindings/pylib/mojom/parse/parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698