| Index: tools/gn/tokenizer.cc
|
| diff --git a/tools/gn/tokenizer.cc b/tools/gn/tokenizer.cc
|
| index 87a4d909f35343c4b3f79fa0cc3cd42b636554eb..b4f364f2a6e382125abcd5a9e852cc6efd8afa83 100644
|
| --- a/tools/gn/tokenizer.cc
|
| +++ b/tools/gn/tokenizer.cc
|
| @@ -5,6 +5,7 @@
|
| #include "tools/gn/tokenizer.h"
|
|
|
| #include "base/logging.h"
|
| +#include "base/strings/string_util.h"
|
| #include "tools/gn/input_file.h"
|
|
|
| namespace {
|
| @@ -107,9 +108,9 @@ std::vector<Token> Tokenizer::Run() {
|
| base::StringPiece token_value(&input_.data()[token_begin],
|
| token_end - token_begin);
|
|
|
| - if (type == Token::UNCLASSIFIED_OPERATOR)
|
| + if (type == Token::UNCLASSIFIED_OPERATOR) {
|
| type = GetSpecificOperatorType(token_value);
|
| - if (type == Token::IDENTIFIER) {
|
| + } else if (type == Token::IDENTIFIER) {
|
| if (token_value == "if")
|
| type = Token::IF;
|
| else if (token_value == "else")
|
| @@ -118,14 +119,14 @@ std::vector<Token> Tokenizer::Run() {
|
| type = Token::TRUE_TOKEN;
|
| else if (token_value == "false")
|
| type = Token::FALSE_TOKEN;
|
| + } else if (type == Token::UNCLASSIFIED_COMMENT) {
|
| + if (AtStartOfLine(token_begin))
|
| + type = Token::LINE_COMMENT;
|
| + else
|
| + type = Token::SUFFIX_COMMENT;
|
| }
|
|
|
| - // TODO(brettw) This just strips comments from the token stream. This
|
| - // is probably wrong, they should be removed at a later stage so we can
|
| - // do things like rewrite the file. But this makes the parser simpler and
|
| - // is OK for now.
|
| - if (type != Token::COMMENT)
|
| - tokens_.push_back(Token(location, type, token_value));
|
| + tokens_.push_back(Token(location, type, token_value));
|
| }
|
| if (err_->has_error())
|
| tokens_.clear();
|
| @@ -199,7 +200,7 @@ Token::Type Tokenizer::ClassifyCurrent() const {
|
| return Token::COMMA;
|
|
|
| if (next_char == '#')
|
| - return Token::COMMENT;
|
| + return Token::UNCLASSIFIED_COMMENT;
|
|
|
| // For the case of '-' differentiate between a negative number and anything
|
| // else.
|
| @@ -285,7 +286,7 @@ void Tokenizer::AdvanceToEndOfToken(const Location& location,
|
| Advance(); // All are one char.
|
| break;
|
|
|
| - case Token::COMMENT:
|
| + case Token::UNCLASSIFIED_COMMENT:
|
| // Eat to EOL.
|
| while (!at_end() && !IsCurrentNewline())
|
| Advance();
|
| @@ -300,11 +301,23 @@ void Tokenizer::AdvanceToEndOfToken(const Location& location,
|
| }
|
| }
|
|
|
| +bool Tokenizer::AtStartOfLine(size_t location) const {
|
| + while (location > 0) {
|
| + --location;
|
| + char c = input_[location];
|
| + if (c == '\n')
|
| + return true;
|
| + if (c != ' ')
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| bool Tokenizer::IsCurrentWhitespace() const {
|
| DCHECK(!at_end());
|
| char c = input_[cur_];
|
| - // Note that tab (0x09) is illegal.
|
| - return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20;
|
| + // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal.
|
| + return c == 0x0A || c == 0x0D || c == 0x20;
|
| }
|
|
|
| bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {
|
| @@ -339,7 +352,8 @@ void Tokenizer::Advance() {
|
| }
|
|
|
| Location Tokenizer::GetCurrentLocation() const {
|
| - return Location(input_file_, line_number_, char_in_line_);
|
| + return Location(
|
| + input_file_, line_number_, char_in_line_, static_cast<int>(cur_));
|
| }
|
|
|
| Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {
|
|
|