| OLD | NEW |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "tools/gn/tokenizer.h" | 5 #include "tools/gn/tokenizer.h" |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "base/strings/string_util.h" |
| 8 #include "tools/gn/input_file.h" | 9 #include "tools/gn/input_file.h" |
| 9 | 10 |
| 10 namespace { | 11 namespace { |
| 11 | 12 |
| 12 bool CouldBeTwoCharOperatorBegin(char c) { | 13 bool CouldBeTwoCharOperatorBegin(char c) { |
| 13 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || | 14 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || |
| 14 c == '+' || c == '|' || c == '&'; | 15 c == '+' || c == '|' || c == '&'; |
| 15 } | 16 } |
| 16 | 17 |
| 17 bool CouldBeTwoCharOperatorEnd(char c) { | 18 bool CouldBeTwoCharOperatorEnd(char c) { |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 100 } | 101 } |
| 101 size_t token_begin = cur_; | 102 size_t token_begin = cur_; |
| 102 AdvanceToEndOfToken(location, type); | 103 AdvanceToEndOfToken(location, type); |
| 103 if (has_error()) | 104 if (has_error()) |
| 104 break; | 105 break; |
| 105 size_t token_end = cur_; | 106 size_t token_end = cur_; |
| 106 | 107 |
| 107 base::StringPiece token_value(&input_.data()[token_begin], | 108 base::StringPiece token_value(&input_.data()[token_begin], |
| 108 token_end - token_begin); | 109 token_end - token_begin); |
| 109 | 110 |
| 110 if (type == Token::UNCLASSIFIED_OPERATOR) | 111 if (type == Token::UNCLASSIFIED_OPERATOR) { |
| 111 type = GetSpecificOperatorType(token_value); | 112 type = GetSpecificOperatorType(token_value); |
| 112 if (type == Token::IDENTIFIER) { | 113 } else if (type == Token::IDENTIFIER) { |
| 113 if (token_value == "if") | 114 if (token_value == "if") |
| 114 type = Token::IF; | 115 type = Token::IF; |
| 115 else if (token_value == "else") | 116 else if (token_value == "else") |
| 116 type = Token::ELSE; | 117 type = Token::ELSE; |
| 117 else if (token_value == "true") | 118 else if (token_value == "true") |
| 118 type = Token::TRUE_TOKEN; | 119 type = Token::TRUE_TOKEN; |
| 119 else if (token_value == "false") | 120 else if (token_value == "false") |
| 120 type = Token::FALSE_TOKEN; | 121 type = Token::FALSE_TOKEN; |
| 122 } else if (type == Token::UNCLASSIFIED_COMMENT) { |
| 123 if (AtStartOfLine(token_begin)) |
| 124 type = Token::LINE_COMMENT; |
| 125 else |
| 126 type = Token::SUFFIX_COMMENT; |
| 121 } | 127 } |
| 122 | 128 |
| 123 // TODO(brettw) This just strips comments from the token stream. This | 129 tokens_.push_back(Token(location, type, token_value)); |
| 124 // is probably wrong, they should be removed at a later stage so we can | |
| 125 // do things like rewrite the file. But this makes the parser simpler and | |
| 126 // is OK for now. | |
| 127 if (type != Token::COMMENT) | |
| 128 tokens_.push_back(Token(location, type, token_value)); | |
| 129 } | 130 } |
| 130 if (err_->has_error()) | 131 if (err_->has_error()) |
| 131 tokens_.clear(); | 132 tokens_.clear(); |
| 132 return tokens_; | 133 return tokens_; |
| 133 } | 134 } |
| 134 | 135 |
| 135 // static | 136 // static |
| 136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { | 137 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { |
| 137 DCHECK_GT(n, 0); | 138 DCHECK_GT(n, 0); |
| 138 | 139 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 192 return Token::LEFT_BRACE; | 193 return Token::LEFT_BRACE; |
| 193 if (next_char == '}') | 194 if (next_char == '}') |
| 194 return Token::RIGHT_BRACE; | 195 return Token::RIGHT_BRACE; |
| 195 | 196 |
| 196 if (next_char == '.') | 197 if (next_char == '.') |
| 197 return Token::DOT; | 198 return Token::DOT; |
| 198 if (next_char == ',') | 199 if (next_char == ',') |
| 199 return Token::COMMA; | 200 return Token::COMMA; |
| 200 | 201 |
| 201 if (next_char == '#') | 202 if (next_char == '#') |
| 202 return Token::COMMENT; | 203 return Token::UNCLASSIFIED_COMMENT; |
| 203 | 204 |
| 204 // For the case of '-' differentiate between a negative number and anything | 205 // For the case of '-' differentiate between a negative number and anything |
| 205 // else. | 206 // else. |
| 206 if (next_char == '-') { | 207 if (next_char == '-') { |
| 207 if (!CanIncrement()) | 208 if (!CanIncrement()) |
| 208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of | 209 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of |
| 209 // file. | 210 // file. |
| 210 char following_char = input_[cur_ + 1]; | 211 char following_char = input_[cur_ + 1]; |
| 211 if (IsAsciiDigit(following_char)) | 212 if (IsAsciiDigit(following_char)) |
| 212 return Token::INTEGER; | 213 return Token::INTEGER; |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 278 case Token::RIGHT_BRACKET: | 279 case Token::RIGHT_BRACKET: |
| 279 case Token::LEFT_BRACE: | 280 case Token::LEFT_BRACE: |
| 280 case Token::RIGHT_BRACE: | 281 case Token::RIGHT_BRACE: |
| 281 case Token::LEFT_PAREN: | 282 case Token::LEFT_PAREN: |
| 282 case Token::RIGHT_PAREN: | 283 case Token::RIGHT_PAREN: |
| 283 case Token::DOT: | 284 case Token::DOT: |
| 284 case Token::COMMA: | 285 case Token::COMMA: |
| 285 Advance(); // All are one char. | 286 Advance(); // All are one char. |
| 286 break; | 287 break; |
| 287 | 288 |
| 288 case Token::COMMENT: | 289 case Token::UNCLASSIFIED_COMMENT: |
| 289 // Eat to EOL. | 290 // Eat to EOL. |
| 290 while (!at_end() && !IsCurrentNewline()) | 291 while (!at_end() && !IsCurrentNewline()) |
| 291 Advance(); | 292 Advance(); |
| 292 break; | 293 break; |
| 293 | 294 |
| 294 case Token::INVALID: | 295 case Token::INVALID: |
| 295 default: | 296 default: |
| 296 *err_ = Err(location, "Everything is all messed up", | 297 *err_ = Err(location, "Everything is all messed up", |
| 297 "Please insert system disk in drive A: and press any key."); | 298 "Please insert system disk in drive A: and press any key."); |
| 298 NOTREACHED(); | 299 NOTREACHED(); |
| 299 return; | 300 return; |
| 300 } | 301 } |
| 301 } | 302 } |
| 302 | 303 |
| 304 bool Tokenizer::AtStartOfLine(size_t location) const { |
| 305 while (location > 0) { |
| 306 --location; |
| 307 char c = input_[location]; |
| 308 if (c == '\n') |
| 309 return true; |
| 310 if (c != ' ') |
| 311 return false; |
| 312 } |
| 313 return true; |
| 314 } |
| 315 |
| 303 bool Tokenizer::IsCurrentWhitespace() const { | 316 bool Tokenizer::IsCurrentWhitespace() const { |
| 304 DCHECK(!at_end()); | 317 DCHECK(!at_end()); |
| 305 char c = input_[cur_]; | 318 char c = input_[cur_]; |
| 306 // Note that tab (0x09) is illegal. | 319 // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal. |
| 307 return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20; | 320 return c == 0x0A || c == 0x0D || c == 0x20; |
| 308 } | 321 } |
| 309 | 322 |
| 310 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const { | 323 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const { |
| 311 DCHECK(!at_end()); | 324 DCHECK(!at_end()); |
| 312 if (cur_char() != quote_char) | 325 if (cur_char() != quote_char) |
| 313 return false; | 326 return false; |
| 314 | 327 |
| 315 // Check for escaping. \" is not a string terminator, but \\" is. Count | 328 // Check for escaping. \" is not a string terminator, but \\" is. Count |
| 316 // the number of preceeding backslashes. | 329 // the number of preceeding backslashes. |
| 317 int num_backslashes = 0; | 330 int num_backslashes = 0; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 332 if (IsCurrentNewline()) { | 345 if (IsCurrentNewline()) { |
| 333 line_number_++; | 346 line_number_++; |
| 334 char_in_line_ = 1; | 347 char_in_line_ = 1; |
| 335 } else { | 348 } else { |
| 336 char_in_line_++; | 349 char_in_line_++; |
| 337 } | 350 } |
| 338 cur_++; | 351 cur_++; |
| 339 } | 352 } |
| 340 | 353 |
| 341 Location Tokenizer::GetCurrentLocation() const { | 354 Location Tokenizer::GetCurrentLocation() const { |
| 342 return Location(input_file_, line_number_, char_in_line_); | 355 return Location( |
| 356 input_file_, line_number_, char_in_line_, static_cast<int>(cur_)); |
| 343 } | 357 } |
| 344 | 358 |
| 345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { | 359 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { |
| 346 std::string help; | 360 std::string help; |
| 347 if (cur_char() == ';') { | 361 if (cur_char() == ';') { |
| 348 // Semicolon. | 362 // Semicolon. |
| 349 help = "Semicolons are not needed, delete this one."; | 363 help = "Semicolons are not needed, delete this one."; |
| 350 } else if (cur_char() == '\t') { | 364 } else if (cur_char() == '\t') { |
| 351 // Tab. | 365 // Tab. |
| 352 help = "You got a tab character in here. Tabs are evil. " | 366 help = "You got a tab character in here. Tabs are evil. " |
| 353 "Convert to spaces."; | 367 "Convert to spaces."; |
| 354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && | 368 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && |
| 355 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { | 369 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { |
| 356 // Different types of comments. | 370 // Different types of comments. |
| 357 help = "Comments should start with # instead"; | 371 help = "Comments should start with # instead"; |
| 358 } else { | 372 } else { |
| 359 help = "I have no idea what this is."; | 373 help = "I have no idea what this is."; |
| 360 } | 374 } |
| 361 | 375 |
| 362 return Err(location, "Invalid token.", help); | 376 return Err(location, "Invalid token.", help); |
| 363 } | 377 } |
| OLD | NEW |