Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(152)

Unified Diff: trunk/src/tools/gn/tokenizer.cc

Issue 21084010: Revert 214254 "Add initial prototype for the GN meta-buildsystem." (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « trunk/src/tools/gn/tokenizer.h ('k') | trunk/src/tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: trunk/src/tools/gn/tokenizer.cc
===================================================================
--- trunk/src/tools/gn/tokenizer.cc (revision 214322)
+++ trunk/src/tools/gn/tokenizer.cc (working copy)
@@ -1,309 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "tools/gn/tokenizer.h"
-
-#include "base/logging.h"
-#include "tools/gn/input_file.h"
-
-namespace {
-
-bool IsNumberChar(char c) {
- return c == '-' || (c >= '0' && c <= '9');
-}
-
-bool CouldBeTwoCharOperatorBegin(char c) {
- return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' ||
- c == '+' || c == '|' || c == '&';
-}
-
-bool CouldBeTwoCharOperatorEnd(char c) {
- return c == '=' || c == '|' || c == '&';
-}
-
-bool CouldBeOneCharOperator(char c) {
- return c == '=' || c == '<' || c == '>' || c == '+' || c == '!' ||
- c == ':' || c == '|' || c == '&' || c == '-';
-}
-
-bool CouldBeOperator(char c) {
- return CouldBeOneCharOperator(c) || CouldBeTwoCharOperatorBegin(c);
-}
-
-bool IsSeparatorChar(char c) {
- return c == ',';
-}
-
-bool IsScoperChar(char c) {
- return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}';
-}
-
-} // namespace
-
-Tokenizer::Tokenizer(const InputFile* input_file, Err* err)
- : input_file_(input_file),
- input_(input_file->contents()),
- err_(err),
- cur_(0),
- line_number_(1),
- char_in_line_(1) {
-}
-
-Tokenizer::~Tokenizer() {
-}
-
-// static
-std::vector<Token> Tokenizer::Tokenize(const InputFile* input_file, Err* err) {
- Tokenizer t(input_file, err);
- return t.Run();
-}
-
-std::vector<Token> Tokenizer::Run() {
- std::vector<Token> tokens;
- while (!done()) {
- AdvanceToNextToken();
- if (done())
- break;
- Location location = GetCurrentLocation();
-
- Token::Type type = ClassifyCurrent();
- if (type == Token::INVALID) {
- *err_ = GetErrorForInvalidToken(location);
- break;
- }
- size_t token_begin = cur_;
- AdvanceToEndOfToken(location, type);
- if (has_error())
- break;
- size_t token_end = cur_;
-
- // TODO(brettw) This just strips comments from the token stream. This
- // is probably wrong, they should be removed at a later stage so we can
- // do things like rewrite the file. But this makes the parser simpler and
- // is OK for now.
- if (type != Token::COMMENT) {
- tokens.push_back(Token(
- location,
- type,
- base::StringPiece(&input_.data()[token_begin],
- token_end - token_begin)));
- }
- }
- if (err_->has_error())
- tokens.clear();
- return tokens;
-}
-
-// static
-size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {
- int cur_line = 1;
- size_t cur_byte = 0;
-
- DCHECK(n > 0);
-
- if (n == 1)
- return 0;
-
- while (cur_byte < buf.size()) {
- if (IsNewline(buf, cur_byte)) {
- cur_line++;
- if (cur_line == n)
- return cur_byte + 1;
- }
- cur_byte++;
- }
- return -1;
-}
-
-// static
-bool Tokenizer::IsNewline(const base::StringPiece& buffer, size_t offset) {
- DCHECK(offset < buffer.size());
- // We may need more logic here to handle different line ending styles.
- return buffer[offset] == '\n';
-}
-
-
-void Tokenizer::AdvanceToNextToken() {
- while (!at_end() && IsCurrentWhitespace())
- Advance();
-}
-
-Token::Type Tokenizer::ClassifyCurrent() const {
- DCHECK(!at_end());
- char next_char = cur_char();
- if (next_char >= '0' && next_char <= '9')
- return Token::INTEGER;
- if (next_char == '"')
- return Token::STRING;
-
- // Note: '-' handled specially below.
- if (next_char != '-' && CouldBeOperator(next_char))
- return Token::OPERATOR;
-
- if (IsIdentifierFirstChar(next_char))
- return Token::IDENTIFIER;
-
- if (IsScoperChar(next_char))
- return Token::SCOPER;
-
- if (IsSeparatorChar(next_char))
- return Token::SEPARATOR;
-
- if (next_char == '#')
- return Token::COMMENT;
-
- // For the case of '-' differentiate between a negative number and anything
- // else.
- if (next_char == '-') {
- if (!CanIncrement())
- return Token::OPERATOR; // Just the minus before end of file.
- char following_char = input_[cur_ + 1];
- if (following_char >= '0' && following_char <= '9')
- return Token::INTEGER;
- return Token::OPERATOR;
- }
-
- return Token::INVALID;
-}
-
-void Tokenizer::AdvanceToEndOfToken(const Location& location,
- Token::Type type) {
- switch (type) {
- case Token::INTEGER:
- do {
- Advance();
- } while (!at_end() && IsNumberChar(cur_char()));
- if (!at_end()) {
- // Require the char after a number to be some kind of space, scope,
- // or operator.
- char c = cur_char();
- if (!IsCurrentWhitespace() && !CouldBeOperator(c) &&
- !IsScoperChar(c) && !IsSeparatorChar(c)) {
- *err_ = Err(GetCurrentLocation(),
- "This is not a valid number.",
- "Learn to count.");
- // Highlight the number.
- err_->AppendRange(LocationRange(location, GetCurrentLocation()));
- }
- }
- break;
-
- case Token::STRING: {
- char initial = cur_char();
- Advance(); // Advance past initial "
- for (;;) {
- if (at_end()) {
- *err_ = Err(LocationRange(location,
- Location(input_file_, line_number_, char_in_line_)),
- "Unterminated string literal.",
- "Don't leave me hanging like this!");
- break;
- }
- if (IsCurrentStringTerminator(initial)) {
- Advance(); // Skip past last "
- break;
- } else if (cur_char() == '\n') {
- *err_ = Err(LocationRange(location,
- GetCurrentLocation()),
- "Newline in string constant.");
- }
- Advance();
- }
- break;
- }
-
- case Token::OPERATOR:
- // Some operators are two characters, some are one.
- if (CouldBeTwoCharOperatorBegin(cur_char())) {
- if (CanIncrement() && CouldBeTwoCharOperatorEnd(input_[cur_ + 1]))
- Advance();
- }
- Advance();
- break;
-
- case Token::IDENTIFIER:
- while (!at_end() && IsIdentifierContinuingChar(cur_char()))
- Advance();
- break;
-
- case Token::SCOPER:
- case Token::SEPARATOR:
- Advance(); // All are one char.
- break;
-
- case Token::COMMENT:
- // Eat to EOL.
- while (!at_end() && !IsCurrentNewline())
- Advance();
- break;
-
- case Token::INVALID:
- *err_ = Err(location, "Everything is all messed up",
- "Please insert system disk in drive A: and press any key.");
- NOTREACHED();
- return;
- }
-}
-
-bool Tokenizer::IsCurrentWhitespace() const {
- DCHECK(!at_end());
- char c = input_[cur_];
- // Note that tab (0x09) is illegal.
- return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20;
-}
-
-bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {
- DCHECK(!at_end());
- if (cur_char() != quote_char)
- return false;
-
- // Check for escaping. \" is not a string terminator, but \\" is. Count
- // the number of preceeding backslashes.
- int num_backslashes = 0;
- for (int i = static_cast<int>(cur_) - 1; i >= 0 && input_[i] == '\\'; i--)
- num_backslashes++;
-
- // Even backslashes mean that they were escaping each other and don't count
- // as escaping this quote.
- return (num_backslashes % 2) == 0;
-}
-
-bool Tokenizer::IsCurrentNewline() const {
- return IsNewline(input_, cur_);
-}
-
-void Tokenizer::Advance() {
- DCHECK(cur_ < input_.size());
- if (IsCurrentNewline()) {
- line_number_++;
- char_in_line_ = 1;
- } else {
- char_in_line_++;
- }
- cur_++;
-}
-
-Location Tokenizer::GetCurrentLocation() const {
- return Location(input_file_, line_number_, char_in_line_);
-}
-
-Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {
- std::string help;
- if (cur_char() == ';') {
- // Semicolon.
- help = "Semicolons are not needed, delete this one.";
- } else if (cur_char() == '\t') {
- // Tab.
- help = "You got a tab character in here. Tabs are evil. "
- "Convert to spaces.";
- } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&
- (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) {
- // Different types of comments.
- help = "Comments should start with # instead";
- } else {
- help = "I have no idea what this is.";
- }
-
- return Err(location, "Invalid token.", help);
-}
« no previous file with comments | « trunk/src/tools/gn/tokenizer.h ('k') | trunk/src/tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698