Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: mojom/lexer.cc

Issue 1432613003: Remove unused lexer code. (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « mojom/lexer.h ('k') | mojom/lexer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "mojom/lexer.h"
6
7 #include <map>
8 #include <string>
9
10 #include "base/lazy_instance.h"
11
12 namespace mojo {
13 namespace mojom {
14
15 namespace {
16
17 class KeywordsDict {
18 public:
19 KeywordsDict();
20
21 private:
22 std::map<std::string, mojom::TokenType> keywords_;
23 friend std::map<std::string, mojom::TokenType>& Keywords();
24
25 DISALLOW_COPY_AND_ASSIGN(KeywordsDict);
26 };
27 static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;
28
29 std::map<std::string, mojom::TokenType>& Keywords() {
30 return g_keywords.Get().keywords_;
31 }
32
33 KeywordsDict::KeywordsDict() {
34 keywords_["import"] = TokenType::IMPORT;
35 keywords_["module"] = TokenType::MODULE;
36 keywords_["struct"] = TokenType::STRUCT;
37 keywords_["union"] = TokenType::UNION;
38 keywords_["interface"] = TokenType::INTERFACE;
39 keywords_["enum"] = TokenType::ENUM;
40 keywords_["const"] = TokenType::CONST;
41 keywords_["true"] = TokenType::TRUE;
42 keywords_["false"] = TokenType::FALSE;
43 keywords_["default"] = TokenType::DEFAULT;
44 }
45
46 // Non-localized versions of isalpha.
47 bool IsAlpha(char c) {
48 return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
49 }
50
51 // Non-localized versions of isnum.
52 bool IsDigit(char c) {
53 return ('0' <= c && c <= '9');
54 }
55
56 bool IsHexDigit(char c) {
57 return (IsDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));
58 }
59
60 // Non-localized versions of isalnum.
61 bool IsAlnum(char c) {
62 return IsAlpha(c) || IsDigit(c);
63 }
64
65 // MojomLexer tokenizes a mojom source file. It is NOT thread-safe.
66 class MojomLexer {
67 public:
68 explicit MojomLexer(const std::string& source);
69 ~MojomLexer();
70
71 // Returns the list of tokens in the source file.
72 std::vector<Token> Tokenize();
73
74 private:
75 // The GetNextToken.* functions all return true if they could find a token
76 // (even an error token) and false otherwise.
77 bool GetNextToken(Token* result);
78 bool GetNextTokenSingleChar(Token* result);
79 bool GetNextTokenEqualsOrResponse(Token* result);
80 bool GetNextTokenIdentifier(Token* result);
81 bool GetNextTokenDecConst(Token* result);
82 bool GetNextTokenHexConst(Token* result);
83 bool GetNextTokenOrdinal(Token* result);
84 bool GetNextTokenStringLiteral(Token* result);
85
86 void ConsumeSkippable();
87 void ConsumeDigits();
88 void ConsumeEol();
89 void Consume(size_t num);
90
91 bool eos(size_t offset_plus) {
92 return offset_ + offset_plus >= source_.size();
93 }
94
95 const std::string source_;
96 size_t offset_;
97 size_t line_no_;
98 size_t offset_in_line_;
99
100 DISALLOW_COPY_AND_ASSIGN(MojomLexer);
101 };
102
103 std::vector<Token> MojomLexer::Tokenize() {
104 offset_ = 0;
105 line_no_ = 0;
106 offset_in_line_ = 0;
107
108 std::vector<Token> result;
109 Token cur;
110 while (GetNextToken(&cur)) {
111 result.push_back(cur);
112
113 // As soon as an error token is found, stop tokenizing.
114 if (cur.error()) {
115 break;
116 }
117 }
118
119 return result;
120 }
121
122 bool MojomLexer::GetNextToken(Token* result) {
123 // Skip all spaces which may be in front of the next token.
124 ConsumeSkippable();
125
126 // If we found the end of the source signal that is so.
127 if (eos(0))
128 return false;
129
130 // Save the current position in the source code.
131 result->char_pos = offset_;
132 result->line_no = line_no_;
133 result->line_pos = offset_in_line_;
134
135 if (GetNextTokenSingleChar(result) || GetNextTokenEqualsOrResponse(result) ||
136 GetNextTokenIdentifier(result) || GetNextTokenHexConst(result) ||
137 GetNextTokenDecConst(result) || GetNextTokenDecConst(result) ||
138 GetNextTokenOrdinal(result) || GetNextTokenStringLiteral(result))
139 return true;
140
141 result->token = source_.substr(offset_, 1);
142 result->token_type = TokenType::ERROR_ILLEGAL_CHAR;
143 return true;
144 }
145
146 void MojomLexer::ConsumeSkippable() {
147 if (eos(0))
148 return;
149
150 bool found_non_space = false;
151 while (!found_non_space && !eos(0)) {
152 switch (source_[offset_]) {
153 case ' ':
154 case '\t':
155 case '\r':
156 Consume(1);
157 break;
158 case '\n':
159 ConsumeEol();
160 break;
161 default:
162 found_non_space = true;
163 break;
164 }
165 }
166 }
167
168 // Finds all single-character tokens except for '='.
169 bool MojomLexer::GetNextTokenSingleChar(Token* result) {
170 switch (source_[offset_]) {
171 case '(':
172 result->token_type = TokenType::LPAREN;
173 break;
174 case ')':
175 result->token_type = TokenType::RPAREN;
176 break;
177 case '[':
178 result->token_type = TokenType::LBRACKET;
179 break;
180 case ']':
181 result->token_type = TokenType::RBRACKET;
182 break;
183 case '{':
184 result->token_type = TokenType::LBRACE;
185 break;
186 case '}':
187 result->token_type = TokenType::RBRACE;
188 break;
189 case '<':
190 result->token_type = TokenType::LANGLE;
191 break;
192 case '>':
193 result->token_type = TokenType::RANGLE;
194 break;
195 case ';':
196 result->token_type = TokenType::SEMI;
197 break;
198 case ',':
199 result->token_type = TokenType::COMMA;
200 break;
201 case '.':
202 result->token_type = TokenType::DOT;
203 break;
204 case '-':
205 result->token_type = TokenType::MINUS;
206 break;
207 case '+':
208 result->token_type = TokenType::PLUS;
209 break;
210 case '&':
211 result->token_type = TokenType::AMP;
212 break;
213 case '?':
214 result->token_type = TokenType::QSTN;
215 break;
216 default:
217 return false;
218 break;
219 }
220
221 result->token = source_.substr(offset_, 1);
222 Consume(1);
223 return true;
224 }
225
226 // Finds '=' or '=>'.
227 bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {
228 if (source_[offset_] != '=')
229 return false;
230 Consume(1);
231
232 if (eos(0) || source_[offset_] != '>') {
233 result->token_type = TokenType::EQUALS;
234 result->token = "=";
235 } else {
236 result->token_type = TokenType::RESPONSE;
237 result->token = "=>";
238 Consume(1);
239 }
240 return true;
241 }
242
243 // valid C identifiers (K&R2: A.2.3)
244 bool MojomLexer::GetNextTokenIdentifier(Token* result) {
245 char c = source_[offset_];
246
247 // Identifiers start with a letter or underscore.
248 if (!(IsAlpha(c) || c == '_'))
249 return false;
250 size_t start_offset = offset_;
251
252 // Identifiers contain letters numbers and underscores.
253 while (!eos(0) && (IsAlnum(source_[offset_]) || c == '_'))
254 Consume(1);
255
256 result->token = source_.substr(start_offset, offset_ - start_offset);
257 result->token_type = TokenType::IDENTIFIER;
258
259 if (Keywords().count(result->token))
260 result->token_type = Keywords()[result->token];
261
262 return true;
263 }
264
265 // integer constants (K&R2: A.2.5.1) dec
266 // floating constants (K&R2: A.2.5.3)
267 bool MojomLexer::GetNextTokenDecConst(Token* result) {
268 if (!IsDigit(source_[offset_]))
269 return false;
270
271 result->token_type = TokenType::INT_CONST_DEC;
272 // If the number starts with a zero and is not a floating point number.
273 if (source_[offset_] == '0' &&
274 (eos(1) || (source_[offset_] == 'e' && source_[offset_] == 'E' &&
275 source_[offset_] == '.'))) {
276 // TODO(azani): Catch and error on octal.
277 result->token = "0";
278 Consume(1);
279 return true;
280 }
281
282 size_t start_offset = offset_;
283
284 // First, we consume all the digits.
285 ConsumeDigits();
286
287 // If there is a fractional part, we consume the . and the following digits.
288 if (!eos(0) && source_[offset_] == '.') {
289 result->token_type = TokenType::FLOAT_CONST;
290 Consume(1);
291 ConsumeDigits();
292 }
293
294 // If there is an exponential part, we consume the e and the following digits.
295 if (!eos(0) && (source_[offset_] == 'e' || source_[offset_] == 'E')) {
296 if (!eos(2) && (source_[offset_ + 1] == '-' || source_[offset_ + 1]) &&
297 IsDigit(source_[offset_ + 2])) {
298 result->token_type = TokenType::FLOAT_CONST;
299 Consume(2); // Consume e/E and +/-
300 ConsumeDigits();
301 } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {
302 result->token_type = TokenType::FLOAT_CONST;
303 Consume(1); // Consume e/E
304 ConsumeDigits();
305 }
306 }
307
308 result->token = source_.substr(start_offset, offset_ - start_offset);
309 return true;
310 }
311
312 // integer constants (K&R2: A.2.5.1) hex
313 bool MojomLexer::GetNextTokenHexConst(Token* result) {
314 // Hex numbers start with a 0, x and then some hex numeral.
315 if (eos(2) || source_[offset_] != '0' ||
316 (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') ||
317 !IsHexDigit(source_[offset_ + 2]))
318 return false;
319
320 result->token_type = TokenType::INT_CONST_HEX;
321 size_t start_offset = offset_;
322 Consume(2);
323
324 while (IsHexDigit(source_[offset_]))
325 Consume(1);
326
327 result->token = source_.substr(start_offset, offset_ - start_offset);
328 return true;
329 }
330
331 bool MojomLexer::GetNextTokenOrdinal(Token* result) {
332 // Ordinals start with '@' and then some digit.
333 if (eos(1) || source_[offset_] != '@' || !IsDigit(source_[offset_ + 1]))
334 return false;
335 size_t start_offset = offset_;
336 // Consumes '@'.
337 Consume(1);
338
339 result->token_type = TokenType::ORDINAL;
340 ConsumeDigits();
341
342 result->token = source_.substr(start_offset, offset_ - start_offset);
343 return true;
344 }
345
346 bool MojomLexer::GetNextTokenStringLiteral(Token* result) {
347 // Ordinals start with '@' and then some digit.
348 if (source_[offset_] != '"')
349 return false;
350
351 size_t start_offset = offset_;
352 // Consumes '"'.
353 Consume(1);
354
355 while (source_[offset_] != '"') {
356 if (source_[offset_] == '\n' || eos(0)) {
357 result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;
358 result->token = source_.substr(start_offset, offset_ - start_offset);
359 return true;
360 }
361
362 // This block will be skipped if the backslash is at the end of the source.
363 if (source_[offset_] == '\\' && !eos(1)) {
364 // Consume the backslash. This will ensure \" is consumed.
365 Consume(1);
366 }
367 Consume(1);
368 }
369 // Consume the closing doublequotes.
370 Consume(1);
371
372 result->token_type = TokenType::STRING_LITERAL;
373
374 result->token = source_.substr(start_offset, offset_ - start_offset);
375 return true;
376 }
377
378 void MojomLexer::ConsumeDigits() {
379 while (!eos(0) && IsDigit(source_[offset_]))
380 Consume(1);
381 }
382
383 void MojomLexer::ConsumeEol() {
384 ++offset_;
385 ++line_no_;
386 offset_in_line_ = 0;
387 }
388
389 void MojomLexer::Consume(size_t num) {
390 offset_ += num;
391 offset_in_line_ += num;
392 }
393
394 MojomLexer::MojomLexer(const std::string& source)
395 : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {
396 }
397
398 MojomLexer::~MojomLexer() {
399 }
400
401 } // namespace
402
403 Token::Token()
404 : token_type(TokenType::ERROR_UNKNOWN),
405 char_pos(0),
406 line_no(0),
407 line_pos(0) {
408 }
409
410 Token::~Token() {
411 }
412
413 // Accepts the text of a mojom file and returns the ordered list of tokens
414 // found in the file.
415 std::vector<Token> Tokenize(const std::string& source) {
416 return MojomLexer(source).Tokenize();
417 }
418
419 } // namespace mojom
420 } // namespace mojo
OLDNEW
« no previous file with comments | « mojom/lexer.h ('k') | mojom/lexer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698