OLD | NEW |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "tools/gn/tokenizer.h" | 5 #include "tools/gn/tokenizer.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "base/strings/string_util.h" |
8 #include "tools/gn/input_file.h" | 9 #include "tools/gn/input_file.h" |
9 | 10 |
10 namespace { | 11 namespace { |
11 | 12 |
12 bool CouldBeTwoCharOperatorBegin(char c) { | 13 bool CouldBeTwoCharOperatorBegin(char c) { |
13 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || | 14 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || |
14 c == '+' || c == '|' || c == '&'; | 15 c == '+' || c == '|' || c == '&'; |
15 } | 16 } |
16 | 17 |
17 bool CouldBeTwoCharOperatorEnd(char c) { | 18 bool CouldBeTwoCharOperatorEnd(char c) { |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 } | 101 } |
101 size_t token_begin = cur_; | 102 size_t token_begin = cur_; |
102 AdvanceToEndOfToken(location, type); | 103 AdvanceToEndOfToken(location, type); |
103 if (has_error()) | 104 if (has_error()) |
104 break; | 105 break; |
105 size_t token_end = cur_; | 106 size_t token_end = cur_; |
106 | 107 |
107 base::StringPiece token_value(&input_.data()[token_begin], | 108 base::StringPiece token_value(&input_.data()[token_begin], |
108 token_end - token_begin); | 109 token_end - token_begin); |
109 | 110 |
110 if (type == Token::UNCLASSIFIED_OPERATOR) | 111 if (type == Token::UNCLASSIFIED_OPERATOR) { |
111 type = GetSpecificOperatorType(token_value); | 112 type = GetSpecificOperatorType(token_value); |
112 if (type == Token::IDENTIFIER) { | 113 } else if (type == Token::IDENTIFIER) { |
113 if (token_value == "if") | 114 if (token_value == "if") |
114 type = Token::IF; | 115 type = Token::IF; |
115 else if (token_value == "else") | 116 else if (token_value == "else") |
116 type = Token::ELSE; | 117 type = Token::ELSE; |
117 else if (token_value == "true") | 118 else if (token_value == "true") |
118 type = Token::TRUE_TOKEN; | 119 type = Token::TRUE_TOKEN; |
119 else if (token_value == "false") | 120 else if (token_value == "false") |
120 type = Token::FALSE_TOKEN; | 121 type = Token::FALSE_TOKEN; |
| 122 } else if (type == Token::UNCLASSIFIED_COMMENT) { |
| 123 if (AtStartOfLine(token_begin)) |
| 124 type = Token::LINE_COMMENT; |
| 125 else |
| 126 type = Token::SUFFIX_COMMENT; |
121 } | 127 } |
122 | 128 |
123 // TODO(brettw) This just strips comments from the token stream. This | 129 tokens_.push_back(Token(location, type, token_value)); |
124 // is probably wrong, they should be removed at a later stage so we can | |
125 // do things like rewrite the file. But this makes the parser simpler and | |
126 // is OK for now. | |
127 if (type != Token::COMMENT) | |
128 tokens_.push_back(Token(location, type, token_value)); | |
129 } | 130 } |
130 if (err_->has_error()) | 131 if (err_->has_error()) |
131 tokens_.clear(); | 132 tokens_.clear(); |
132 return tokens_; | 133 return tokens_; |
133 } | 134 } |
134 | 135 |
135 // static | 136 // static |
136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { | 137 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { |
137 DCHECK_GT(n, 0); | 138 DCHECK_GT(n, 0); |
138 | 139 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
192 return Token::LEFT_BRACE; | 193 return Token::LEFT_BRACE; |
193 if (next_char == '}') | 194 if (next_char == '}') |
194 return Token::RIGHT_BRACE; | 195 return Token::RIGHT_BRACE; |
195 | 196 |
196 if (next_char == '.') | 197 if (next_char == '.') |
197 return Token::DOT; | 198 return Token::DOT; |
198 if (next_char == ',') | 199 if (next_char == ',') |
199 return Token::COMMA; | 200 return Token::COMMA; |
200 | 201 |
201 if (next_char == '#') | 202 if (next_char == '#') |
202 return Token::COMMENT; | 203 return Token::UNCLASSIFIED_COMMENT; |
203 | 204 |
204 // For the case of '-' differentiate between a negative number and anything | 205 // For the case of '-' differentiate between a negative number and anything |
205 // else. | 206 // else. |
206 if (next_char == '-') { | 207 if (next_char == '-') { |
207 if (!CanIncrement()) | 208 if (!CanIncrement()) |
208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of | 209 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of |
209 // file. | 210 // file. |
210 char following_char = input_[cur_ + 1]; | 211 char following_char = input_[cur_ + 1]; |
211 if (IsAsciiDigit(following_char)) | 212 if (IsAsciiDigit(following_char)) |
212 return Token::INTEGER; | 213 return Token::INTEGER; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
278 case Token::RIGHT_BRACKET: | 279 case Token::RIGHT_BRACKET: |
279 case Token::LEFT_BRACE: | 280 case Token::LEFT_BRACE: |
280 case Token::RIGHT_BRACE: | 281 case Token::RIGHT_BRACE: |
281 case Token::LEFT_PAREN: | 282 case Token::LEFT_PAREN: |
282 case Token::RIGHT_PAREN: | 283 case Token::RIGHT_PAREN: |
283 case Token::DOT: | 284 case Token::DOT: |
284 case Token::COMMA: | 285 case Token::COMMA: |
285 Advance(); // All are one char. | 286 Advance(); // All are one char. |
286 break; | 287 break; |
287 | 288 |
288 case Token::COMMENT: | 289 case Token::UNCLASSIFIED_COMMENT: |
289 // Eat to EOL. | 290 // Eat to EOL. |
290 while (!at_end() && !IsCurrentNewline()) | 291 while (!at_end() && !IsCurrentNewline()) |
291 Advance(); | 292 Advance(); |
292 break; | 293 break; |
293 | 294 |
294 case Token::INVALID: | 295 case Token::INVALID: |
295 default: | 296 default: |
296 *err_ = Err(location, "Everything is all messed up", | 297 *err_ = Err(location, "Everything is all messed up", |
297 "Please insert system disk in drive A: and press any key."); | 298 "Please insert system disk in drive A: and press any key."); |
298 NOTREACHED(); | 299 NOTREACHED(); |
299 return; | 300 return; |
300 } | 301 } |
301 } | 302 } |
302 | 303 |
| 304 bool Tokenizer::AtStartOfLine(size_t location) const { |
| 305 while (location > 0) { |
| 306 --location; |
| 307 char c = input_[location]; |
| 308 if (c == '\n') |
| 309 return true; |
| 310 if (c != ' ') |
| 311 return false; |
| 312 } |
| 313 return true; |
| 314 } |
| 315 |
303 bool Tokenizer::IsCurrentWhitespace() const { | 316 bool Tokenizer::IsCurrentWhitespace() const { |
304 DCHECK(!at_end()); | 317 DCHECK(!at_end()); |
305 char c = input_[cur_]; | 318 char c = input_[cur_]; |
306 // Note that tab (0x09) is illegal. | 319 // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal. |
307 return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20; | 320 return c == 0x0A || c == 0x0D || c == 0x20; |
308 } | 321 } |
309 | 322 |
310 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const { | 323 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const { |
311 DCHECK(!at_end()); | 324 DCHECK(!at_end()); |
312 if (cur_char() != quote_char) | 325 if (cur_char() != quote_char) |
313 return false; | 326 return false; |
314 | 327 |
315 // Check for escaping. \" is not a string terminator, but \\" is. Count | 328 // Check for escaping. \" is not a string terminator, but \\" is. Count |
316 // the number of preceeding backslashes. | 329 // the number of preceeding backslashes. |
317 int num_backslashes = 0; | 330 int num_backslashes = 0; |
(...skipping 14 matching lines...) Expand all Loading... |
332 if (IsCurrentNewline()) { | 345 if (IsCurrentNewline()) { |
333 line_number_++; | 346 line_number_++; |
334 char_in_line_ = 1; | 347 char_in_line_ = 1; |
335 } else { | 348 } else { |
336 char_in_line_++; | 349 char_in_line_++; |
337 } | 350 } |
338 cur_++; | 351 cur_++; |
339 } | 352 } |
340 | 353 |
341 Location Tokenizer::GetCurrentLocation() const { | 354 Location Tokenizer::GetCurrentLocation() const { |
342 return Location(input_file_, line_number_, char_in_line_); | 355 return Location( |
| 356 input_file_, line_number_, char_in_line_, static_cast<int>(cur_)); |
343 } | 357 } |
344 | 358 |
345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { | 359 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { |
346 std::string help; | 360 std::string help; |
347 if (cur_char() == ';') { | 361 if (cur_char() == ';') { |
348 // Semicolon. | 362 // Semicolon. |
349 help = "Semicolons are not needed, delete this one."; | 363 help = "Semicolons are not needed, delete this one."; |
350 } else if (cur_char() == '\t') { | 364 } else if (cur_char() == '\t') { |
351 // Tab. | 365 // Tab. |
352 help = "You got a tab character in here. Tabs are evil. " | 366 help = "You got a tab character in here. Tabs are evil. " |
353 "Convert to spaces."; | 367 "Convert to spaces."; |
354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && | 368 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && |
355 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { | 369 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { |
356 // Different types of comments. | 370 // Different types of comments. |
357 help = "Comments should start with # instead"; | 371 help = "Comments should start with # instead"; |
358 } else { | 372 } else { |
359 help = "I have no idea what this is."; | 373 help = "I have no idea what this is."; |
360 } | 374 } |
361 | 375 |
362 return Err(location, "Invalid token.", help); | 376 return Err(location, "Invalid token.", help); |
363 } | 377 } |
OLD | NEW |