OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import imp | |
6 import os.path | |
7 import sys | |
8 | |
9 def _GetDirAbove(dirname): | |
10 """Returns the directory "above" this file containing |dirname| (which must | |
11 also be "above" this file).""" | |
12 path = os.path.abspath(__file__) | |
13 while True: | |
14 path, tail = os.path.split(path) | |
15 assert tail | |
16 if tail == dirname: | |
17 return path | |
18 | |
19 try: | |
20 imp.find_module("ply") | |
21 except ImportError: | |
22 sys.path.append(os.path.join(_GetDirAbove("public"), "public/third_party")) | |
23 from ply.lex import TOKEN | |
24 | |
25 from ..error import Error | |
26 | |
27 | |
28 class LexError(Error): | |
29 """Class for errors from the lexer.""" | |
30 | |
31 def __init__(self, filename, message, lineno): | |
32 Error.__init__(self, filename, message, lineno=lineno) | |
33 | |
34 | |
35 # We have methods which look like they could be functions: | |
36 # pylint: disable=R0201 | |
37 class Lexer(object): | |
38 | |
39 def __init__(self, filename): | |
40 self.filename = filename | |
41 | |
42 ######################-- PRIVATE --###################### | |
43 | |
44 ## | |
45 ## Internal auxiliary methods | |
46 ## | |
47 def _error(self, msg, token): | |
48 raise LexError(self.filename, msg, token.lineno) | |
49 | |
50 ## | |
51 ## Reserved keywords | |
52 ## | |
53 keywords = ( | |
54 'HANDLE', | |
55 | |
56 'IMPORT', | |
57 'MODULE', | |
58 'STRUCT', | |
59 'UNION', | |
60 'INTERFACE', | |
61 'ENUM', | |
62 'CONST', | |
63 'TRUE', | |
64 'FALSE', | |
65 'DEFAULT', | |
66 'ARRAY', | |
67 'MAP' | |
68 ) | |
69 | |
70 keyword_map = {} | |
71 for keyword in keywords: | |
72 keyword_map[keyword.lower()] = keyword | |
73 | |
74 ## | |
75 ## All the tokens recognized by the lexer | |
76 ## | |
77 tokens = keywords + ( | |
78 # Identifiers | |
79 'NAME', | |
80 | |
81 # Constants | |
82 'ORDINAL', | |
83 'INT_CONST_DEC', 'INT_CONST_HEX', | |
84 'FLOAT_CONST', | |
85 | |
86 # String literals | |
87 'STRING_LITERAL', | |
88 | |
89 # Operators | |
90 'MINUS', | |
91 'PLUS', | |
92 'AMP', | |
93 'QSTN', | |
94 | |
95 # Assignment | |
96 'EQUALS', | |
97 | |
98 # Request / response | |
99 'RESPONSE', | |
100 | |
101 # Delimiters | |
102 'LPAREN', 'RPAREN', # ( ) | |
103 'LBRACKET', 'RBRACKET', # [ ] | |
104 'LBRACE', 'RBRACE', # { } | |
105 'LANGLE', 'RANGLE', # < > | |
106 'SEMI', # ; | |
107 'COMMA', 'DOT' # , . | |
108 ) | |
109 | |
110 ## | |
111 ## Regexes for use in tokens | |
112 ## | |
113 | |
114 # valid C identifiers (K&R2: A.2.3) | |
115 identifier = r'[a-zA-Z_][0-9a-zA-Z_]*' | |
116 | |
117 hex_prefix = '0[xX]' | |
118 hex_digits = '[0-9a-fA-F]+' | |
119 | |
120 # integer constants (K&R2: A.2.5.1) | |
121 decimal_constant = '0|([1-9][0-9]*)' | |
122 hex_constant = hex_prefix+hex_digits | |
123 # Don't allow octal constants (even invalid octal). | |
124 octal_constant_disallowed = '0[0-9]+' | |
125 | |
126 # character constants (K&R2: A.2.5.2) | |
127 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line | |
128 # directives with Windows paths as filenames (..\..\dir\file) | |
129 # For the same reason, decimal_escape allows all digit sequences. We want to | |
130 # parse all correct code, even if it means to sometimes parse incorrect | |
131 # code. | |
132 # | |
133 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" | |
134 decimal_escape = r"""(\d+)""" | |
135 hex_escape = r"""(x[0-9a-fA-F]+)""" | |
136 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" | |
137 | |
138 escape_sequence = \ | |
139 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' | |
140 | |
141 # string literals (K&R2: A.2.6) | |
142 string_char = r"""([^"\\\n]|"""+escape_sequence+')' | |
143 string_literal = '"'+string_char+'*"' | |
144 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' | |
145 | |
146 # floating constants (K&R2: A.2.5.3) | |
147 exponent_part = r"""([eE][-+]?[0-9]+)""" | |
148 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" | |
149 floating_constant = \ | |
150 '(((('+fractional_constant+')'+ \ | |
151 exponent_part+'?)|([0-9]+'+exponent_part+')))' | |
152 | |
153 # Ordinals | |
154 ordinal = r'@[0-9]+' | |
155 missing_ordinal_value = r'@' | |
156 # Don't allow ordinal values in octal (even invalid octal, like 09) or | |
157 # hexadecimal. | |
158 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))' | |
159 | |
160 ## | |
161 ## Rules for the normal state | |
162 ## | |
163 t_ignore = ' \t\r' | |
164 | |
165 # Newlines | |
166 def t_NEWLINE(self, t): | |
167 r'\n+' | |
168 t.lexer.lineno += len(t.value) | |
169 | |
170 # Operators | |
171 t_MINUS = r'-' | |
172 t_PLUS = r'\+' | |
173 t_AMP = r'&' | |
174 t_QSTN = r'\?' | |
175 | |
176 # = | |
177 t_EQUALS = r'=' | |
178 | |
179 # => | |
180 t_RESPONSE = r'=>' | |
181 | |
182 # Delimiters | |
183 t_LPAREN = r'\(' | |
184 t_RPAREN = r'\)' | |
185 t_LBRACKET = r'\[' | |
186 t_RBRACKET = r'\]' | |
187 t_LBRACE = r'\{' | |
188 t_RBRACE = r'\}' | |
189 t_LANGLE = r'<' | |
190 t_RANGLE = r'>' | |
191 t_COMMA = r',' | |
192 t_DOT = r'\.' | |
193 t_SEMI = r';' | |
194 | |
195 t_STRING_LITERAL = string_literal | |
196 | |
197 # The following floating and integer constants are defined as | |
198 # functions to impose a strict order (otherwise, decimal | |
199 # is placed before the others because its regex is longer, | |
200 # and this is bad) | |
201 # | |
202 @TOKEN(floating_constant) | |
203 def t_FLOAT_CONST(self, t): | |
204 return t | |
205 | |
206 @TOKEN(hex_constant) | |
207 def t_INT_CONST_HEX(self, t): | |
208 return t | |
209 | |
210 @TOKEN(octal_constant_disallowed) | |
211 def t_OCTAL_CONSTANT_DISALLOWED(self, t): | |
212 msg = "Octal values not allowed" | |
213 self._error(msg, t) | |
214 | |
215 @TOKEN(decimal_constant) | |
216 def t_INT_CONST_DEC(self, t): | |
217 return t | |
218 | |
219 # unmatched string literals are caught by the preprocessor | |
220 | |
221 @TOKEN(bad_string_literal) | |
222 def t_BAD_STRING_LITERAL(self, t): | |
223 msg = "String contains invalid escape code" | |
224 self._error(msg, t) | |
225 | |
226 # Handle ordinal-related tokens in the right order: | |
227 @TOKEN(octal_or_hex_ordinal_disallowed) | |
228 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t): | |
229 msg = "Octal and hexadecimal ordinal values not allowed" | |
230 self._error(msg, t) | |
231 | |
232 @TOKEN(ordinal) | |
233 def t_ORDINAL(self, t): | |
234 return t | |
235 | |
236 @TOKEN(missing_ordinal_value) | |
237 def t_BAD_ORDINAL(self, t): | |
238 msg = "Missing ordinal value" | |
239 self._error(msg, t) | |
240 | |
241 @TOKEN(identifier) | |
242 def t_NAME(self, t): | |
243 t.type = self.keyword_map.get(t.value, "NAME") | |
244 return t | |
245 | |
246 # Ignore C and C++ style comments | |
247 def t_COMMENT(self, t): | |
248 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' | |
249 t.lexer.lineno += t.value.count("\n") | |
250 | |
251 def t_error(self, t): | |
252 msg = "Illegal character %s" % repr(t.value[0]) | |
253 self._error(msg, t) | |
OLD | NEW |