Index: tools/gn/tokenizer.cc |
diff --git a/tools/gn/tokenizer.cc b/tools/gn/tokenizer.cc |
index 87a4d909f35343c4b3f79fa0cc3cd42b636554eb..ea7c55ab8b95ea10892bcb39d672ea7e5c844961 100644 |
--- a/tools/gn/tokenizer.cc |
+++ b/tools/gn/tokenizer.cc |
@@ -5,6 +5,7 @@ |
#include "tools/gn/tokenizer.h" |
#include "base/logging.h" |
+#include "base/strings/string_util.h" |
#include "tools/gn/input_file.h" |
namespace { |
@@ -107,9 +108,9 @@ std::vector<Token> Tokenizer::Run() { |
base::StringPiece token_value(&input_.data()[token_begin], |
token_end - token_begin); |
- if (type == Token::UNCLASSIFIED_OPERATOR) |
+ if (type == Token::UNCLASSIFIED_OPERATOR) { |
type = GetSpecificOperatorType(token_value); |
- if (type == Token::IDENTIFIER) { |
+ } else if (type == Token::IDENTIFIER) { |
if (token_value == "if") |
type = Token::IF; |
else if (token_value == "else") |
@@ -118,14 +119,22 @@ std::vector<Token> Tokenizer::Run() { |
type = Token::TRUE_TOKEN; |
else if (token_value == "false") |
type = Token::FALSE_TOKEN; |
+ } else if (type == Token::UNCLASSIFIED_COMMENT) { |
+ // Find back to the previous \n, and trim. If it's only whitespace, then |
+ // this is on a line alone, otherwise it's a suffix comment. |
+ size_t newline_location = input_.find_last_of('\n', token_begin); |
+ base::StringPiece to_newline = input_.substr( |
+ newline_location + 1, token_begin - (newline_location + 1)); |
+ std::string trimmed; |
+ // TODO(scottmg): Should write TrimWhitespace for StringPiece. |
+ base::TrimWhitespace(to_newline.as_string(), base::TRIM_ALL, &trimmed); |
brettw
2014/09/23 21:33:15
I'd like to resolve this if possible, the tokenize
scottmg
2014/09/23 22:15:37
Done. (as special function, that makes more sense,
|
+ if (trimmed.empty()) |
+ type = Token::LINE_COMMENT; |
+ else |
+ type = Token::SUFFIX_COMMENT; |
} |
- // TODO(brettw) This just strips comments from the token stream. This |
- // is probably wrong, they should be removed at a later stage so we can |
- // do things like rewrite the file. But this makes the parser simpler and |
- // is OK for now. |
- if (type != Token::COMMENT) |
- tokens_.push_back(Token(location, type, token_value)); |
+ tokens_.push_back(Token(location, type, token_value)); |
} |
if (err_->has_error()) |
tokens_.clear(); |
@@ -199,7 +208,7 @@ Token::Type Tokenizer::ClassifyCurrent() const { |
return Token::COMMA; |
if (next_char == '#') |
- return Token::COMMENT; |
+ return Token::UNCLASSIFIED_COMMENT; |
// For the case of '-' differentiate between a negative number and anything |
// else. |
@@ -285,7 +294,7 @@ void Tokenizer::AdvanceToEndOfToken(const Location& location, |
Advance(); // All are one char. |
break; |
- case Token::COMMENT: |
+ case Token::UNCLASSIFIED_COMMENT: |
// Eat to EOL. |
while (!at_end() && !IsCurrentNewline()) |
Advance(); |
@@ -339,7 +348,7 @@ void Tokenizer::Advance() { |
} |
Location Tokenizer::GetCurrentLocation() const { |
- return Location(input_file_, line_number_, char_in_line_); |
+ return Location(input_file_, line_number_, char_in_line_, cur_); |
} |
Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { |