Chromium Code Reviews| Index: tools/gn/tokenizer.cc |
| diff --git a/tools/gn/tokenizer.cc b/tools/gn/tokenizer.cc |
| index 87a4d909f35343c4b3f79fa0cc3cd42b636554eb..ea7c55ab8b95ea10892bcb39d672ea7e5c844961 100644 |
| --- a/tools/gn/tokenizer.cc |
| +++ b/tools/gn/tokenizer.cc |
| @@ -5,6 +5,7 @@ |
| #include "tools/gn/tokenizer.h" |
| #include "base/logging.h" |
| +#include "base/strings/string_util.h" |
| #include "tools/gn/input_file.h" |
| namespace { |
| @@ -107,9 +108,9 @@ std::vector<Token> Tokenizer::Run() { |
| base::StringPiece token_value(&input_.data()[token_begin], |
| token_end - token_begin); |
| - if (type == Token::UNCLASSIFIED_OPERATOR) |
| + if (type == Token::UNCLASSIFIED_OPERATOR) { |
| type = GetSpecificOperatorType(token_value); |
| - if (type == Token::IDENTIFIER) { |
| + } else if (type == Token::IDENTIFIER) { |
| if (token_value == "if") |
| type = Token::IF; |
| else if (token_value == "else") |
| @@ -118,14 +119,22 @@ std::vector<Token> Tokenizer::Run() { |
| type = Token::TRUE_TOKEN; |
| else if (token_value == "false") |
| type = Token::FALSE_TOKEN; |
| + } else if (type == Token::UNCLASSIFIED_COMMENT) { |
| + // Find back to the previous \n, and trim. If it's only whitespace, then |
| + // this is on a line alone, otherwise it's a suffix comment. |
| + size_t newline_location = input_.find_last_of('\n', token_begin); |
| + base::StringPiece to_newline = input_.substr( |
| + newline_location + 1, token_begin - (newline_location + 1)); |
| + std::string trimmed; |
| + // TODO(scottmg): Should write TrimWhitespace for StringPiece. |
| + base::TrimWhitespace(to_newline.as_string(), base::TRIM_ALL, &trimmed); |
|
brettw
2014/09/23 21:33:15
I'd like to resolve this if possible, the tokenize
scottmg
2014/09/23 22:15:37
Done. (as special function, that makes more sense,
|
| + if (trimmed.empty()) |
| + type = Token::LINE_COMMENT; |
| + else |
| + type = Token::SUFFIX_COMMENT; |
| } |
| - // TODO(brettw) This just strips comments from the token stream. This |
| - // is probably wrong, they should be removed at a later stage so we can |
| - // do things like rewrite the file. But this makes the parser simpler and |
| - // is OK for now. |
| - if (type != Token::COMMENT) |
| - tokens_.push_back(Token(location, type, token_value)); |
| + tokens_.push_back(Token(location, type, token_value)); |
| } |
| if (err_->has_error()) |
| tokens_.clear(); |
| @@ -199,7 +208,7 @@ Token::Type Tokenizer::ClassifyCurrent() const { |
| return Token::COMMA; |
| if (next_char == '#') |
| - return Token::COMMENT; |
| + return Token::UNCLASSIFIED_COMMENT; |
| // For the case of '-' differentiate between a negative number and anything |
| // else. |
| @@ -285,7 +294,7 @@ void Tokenizer::AdvanceToEndOfToken(const Location& location, |
| Advance(); // All are one char. |
| break; |
| - case Token::COMMENT: |
| + case Token::UNCLASSIFIED_COMMENT: |
| // Eat to EOL. |
| while (!at_end() && !IsCurrentNewline()) |
| Advance(); |
| @@ -339,7 +348,7 @@ void Tokenizer::Advance() { |
| } |
| Location Tokenizer::GetCurrentLocation() const { |
| - return Location(input_file_, line_number_, char_in_line_); |
| + return Location(input_file_, line_number_, char_in_line_, cur_); |
| } |
| Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { |