| Index: mojom/lexer.cc
|
| diff --git a/mojom/lexer.cc b/mojom/lexer.cc
|
| deleted file mode 100644
|
| index e55e2fbca0b2f5160a69e8b37257b74c2cd861bd..0000000000000000000000000000000000000000
|
| --- a/mojom/lexer.cc
|
| +++ /dev/null
|
| @@ -1,420 +0,0 @@
|
| -// Copyright 2015 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "mojom/lexer.h"
|
| -
|
| -#include <map>
|
| -#include <string>
|
| -
|
| -#include "base/lazy_instance.h"
|
| -
|
| -namespace mojo {
|
| -namespace mojom {
|
| -
|
| -namespace {
|
| -
|
| -class KeywordsDict {
|
| - public:
|
| - KeywordsDict();
|
| -
|
| - private:
|
| - std::map<std::string, mojom::TokenType> keywords_;
|
| - friend std::map<std::string, mojom::TokenType>& Keywords();
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(KeywordsDict);
|
| -};
|
| -static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;
|
| -
|
| -std::map<std::string, mojom::TokenType>& Keywords() {
|
| - return g_keywords.Get().keywords_;
|
| -}
|
| -
|
| -KeywordsDict::KeywordsDict() {
|
| - keywords_["import"] = TokenType::IMPORT;
|
| - keywords_["module"] = TokenType::MODULE;
|
| - keywords_["struct"] = TokenType::STRUCT;
|
| - keywords_["union"] = TokenType::UNION;
|
| - keywords_["interface"] = TokenType::INTERFACE;
|
| - keywords_["enum"] = TokenType::ENUM;
|
| - keywords_["const"] = TokenType::CONST;
|
| - keywords_["true"] = TokenType::TRUE;
|
| - keywords_["false"] = TokenType::FALSE;
|
| - keywords_["default"] = TokenType::DEFAULT;
|
| -}
|
| -
|
| -// Non-localized versions of isalpha.
|
| -bool IsAlpha(char c) {
|
| - return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
|
| -}
|
| -
|
| -// Non-localized versions of isnum.
|
| -bool IsDigit(char c) {
|
| - return ('0' <= c && c <= '9');
|
| -}
|
| -
|
| -bool IsHexDigit(char c) {
|
| - return (IsDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));
|
| -}
|
| -
|
| -// Non-localized versions of isalnum.
|
| -bool IsAlnum(char c) {
|
| - return IsAlpha(c) || IsDigit(c);
|
| -}
|
| -
|
| -// MojomLexer tokenizes a mojom source file. It is NOT thread-safe.
|
| -class MojomLexer {
|
| - public:
|
| - explicit MojomLexer(const std::string& source);
|
| - ~MojomLexer();
|
| -
|
| - // Returns the list of tokens in the source file.
|
| - std::vector<Token> Tokenize();
|
| -
|
| - private:
|
| - // The GetNextToken.* functions all return true if they could find a token
|
| - // (even an error token) and false otherwise.
|
| - bool GetNextToken(Token* result);
|
| - bool GetNextTokenSingleChar(Token* result);
|
| - bool GetNextTokenEqualsOrResponse(Token* result);
|
| - bool GetNextTokenIdentifier(Token* result);
|
| - bool GetNextTokenDecConst(Token* result);
|
| - bool GetNextTokenHexConst(Token* result);
|
| - bool GetNextTokenOrdinal(Token* result);
|
| - bool GetNextTokenStringLiteral(Token* result);
|
| -
|
| - void ConsumeSkippable();
|
| - void ConsumeDigits();
|
| - void ConsumeEol();
|
| - void Consume(size_t num);
|
| -
|
| - bool eos(size_t offset_plus) {
|
| - return offset_ + offset_plus >= source_.size();
|
| - }
|
| -
|
| - const std::string source_;
|
| - size_t offset_;
|
| - size_t line_no_;
|
| - size_t offset_in_line_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(MojomLexer);
|
| -};
|
| -
|
| -std::vector<Token> MojomLexer::Tokenize() {
|
| - offset_ = 0;
|
| - line_no_ = 0;
|
| - offset_in_line_ = 0;
|
| -
|
| - std::vector<Token> result;
|
| - Token cur;
|
| - while (GetNextToken(&cur)) {
|
| - result.push_back(cur);
|
| -
|
| - // As soon as an error token is found, stop tokenizing.
|
| - if (cur.error()) {
|
| - break;
|
| - }
|
| - }
|
| -
|
| - return result;
|
| -}
|
| -
|
| -bool MojomLexer::GetNextToken(Token* result) {
|
| - // Skip all spaces which may be in front of the next token.
|
| - ConsumeSkippable();
|
| -
|
| - // If we found the end of the source signal that is so.
|
| - if (eos(0))
|
| - return false;
|
| -
|
| - // Save the current position in the source code.
|
| - result->char_pos = offset_;
|
| - result->line_no = line_no_;
|
| - result->line_pos = offset_in_line_;
|
| -
|
| - if (GetNextTokenSingleChar(result) || GetNextTokenEqualsOrResponse(result) ||
|
| - GetNextTokenIdentifier(result) || GetNextTokenHexConst(result) ||
|
| - GetNextTokenDecConst(result) || GetNextTokenDecConst(result) ||
|
| - GetNextTokenOrdinal(result) || GetNextTokenStringLiteral(result))
|
| - return true;
|
| -
|
| - result->token = source_.substr(offset_, 1);
|
| - result->token_type = TokenType::ERROR_ILLEGAL_CHAR;
|
| - return true;
|
| -}
|
| -
|
| -void MojomLexer::ConsumeSkippable() {
|
| - if (eos(0))
|
| - return;
|
| -
|
| - bool found_non_space = false;
|
| - while (!found_non_space && !eos(0)) {
|
| - switch (source_[offset_]) {
|
| - case ' ':
|
| - case '\t':
|
| - case '\r':
|
| - Consume(1);
|
| - break;
|
| - case '\n':
|
| - ConsumeEol();
|
| - break;
|
| - default:
|
| - found_non_space = true;
|
| - break;
|
| - }
|
| - }
|
| -}
|
| -
|
| -// Finds all single-character tokens except for '='.
|
| -bool MojomLexer::GetNextTokenSingleChar(Token* result) {
|
| - switch (source_[offset_]) {
|
| - case '(':
|
| - result->token_type = TokenType::LPAREN;
|
| - break;
|
| - case ')':
|
| - result->token_type = TokenType::RPAREN;
|
| - break;
|
| - case '[':
|
| - result->token_type = TokenType::LBRACKET;
|
| - break;
|
| - case ']':
|
| - result->token_type = TokenType::RBRACKET;
|
| - break;
|
| - case '{':
|
| - result->token_type = TokenType::LBRACE;
|
| - break;
|
| - case '}':
|
| - result->token_type = TokenType::RBRACE;
|
| - break;
|
| - case '<':
|
| - result->token_type = TokenType::LANGLE;
|
| - break;
|
| - case '>':
|
| - result->token_type = TokenType::RANGLE;
|
| - break;
|
| - case ';':
|
| - result->token_type = TokenType::SEMI;
|
| - break;
|
| - case ',':
|
| - result->token_type = TokenType::COMMA;
|
| - break;
|
| - case '.':
|
| - result->token_type = TokenType::DOT;
|
| - break;
|
| - case '-':
|
| - result->token_type = TokenType::MINUS;
|
| - break;
|
| - case '+':
|
| - result->token_type = TokenType::PLUS;
|
| - break;
|
| - case '&':
|
| - result->token_type = TokenType::AMP;
|
| - break;
|
| - case '?':
|
| - result->token_type = TokenType::QSTN;
|
| - break;
|
| - default:
|
| - return false;
|
| - break;
|
| - }
|
| -
|
| - result->token = source_.substr(offset_, 1);
|
| - Consume(1);
|
| - return true;
|
| -}
|
| -
|
| -// Finds '=' or '=>'.
|
| -bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {
|
| - if (source_[offset_] != '=')
|
| - return false;
|
| - Consume(1);
|
| -
|
| - if (eos(0) || source_[offset_] != '>') {
|
| - result->token_type = TokenType::EQUALS;
|
| - result->token = "=";
|
| - } else {
|
| - result->token_type = TokenType::RESPONSE;
|
| - result->token = "=>";
|
| - Consume(1);
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -// valid C identifiers (K&R2: A.2.3)
|
| -bool MojomLexer::GetNextTokenIdentifier(Token* result) {
|
| - char c = source_[offset_];
|
| -
|
| - // Identifiers start with a letter or underscore.
|
| - if (!(IsAlpha(c) || c == '_'))
|
| - return false;
|
| - size_t start_offset = offset_;
|
| -
|
| - // Identifiers contain letters numbers and underscores.
|
| - while (!eos(0) && (IsAlnum(source_[offset_]) || c == '_'))
|
| - Consume(1);
|
| -
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - result->token_type = TokenType::IDENTIFIER;
|
| -
|
| - if (Keywords().count(result->token))
|
| - result->token_type = Keywords()[result->token];
|
| -
|
| - return true;
|
| -}
|
| -
|
| -// integer constants (K&R2: A.2.5.1) dec
|
| -// floating constants (K&R2: A.2.5.3)
|
| -bool MojomLexer::GetNextTokenDecConst(Token* result) {
|
| - if (!IsDigit(source_[offset_]))
|
| - return false;
|
| -
|
| - result->token_type = TokenType::INT_CONST_DEC;
|
| - // If the number starts with a zero and is not a floating point number.
|
| - if (source_[offset_] == '0' &&
|
| - (eos(1) || (source_[offset_] == 'e' && source_[offset_] == 'E' &&
|
| - source_[offset_] == '.'))) {
|
| - // TODO(azani): Catch and error on octal.
|
| - result->token = "0";
|
| - Consume(1);
|
| - return true;
|
| - }
|
| -
|
| - size_t start_offset = offset_;
|
| -
|
| - // First, we consume all the digits.
|
| - ConsumeDigits();
|
| -
|
| - // If there is a fractional part, we consume the . and the following digits.
|
| - if (!eos(0) && source_[offset_] == '.') {
|
| - result->token_type = TokenType::FLOAT_CONST;
|
| - Consume(1);
|
| - ConsumeDigits();
|
| - }
|
| -
|
| - // If there is an exponential part, we consume the e and the following digits.
|
| - if (!eos(0) && (source_[offset_] == 'e' || source_[offset_] == 'E')) {
|
| - if (!eos(2) && (source_[offset_ + 1] == '-' || source_[offset_ + 1]) &&
|
| - IsDigit(source_[offset_ + 2])) {
|
| - result->token_type = TokenType::FLOAT_CONST;
|
| - Consume(2); // Consume e/E and +/-
|
| - ConsumeDigits();
|
| - } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {
|
| - result->token_type = TokenType::FLOAT_CONST;
|
| - Consume(1); // Consume e/E
|
| - ConsumeDigits();
|
| - }
|
| - }
|
| -
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - return true;
|
| -}
|
| -
|
| -// integer constants (K&R2: A.2.5.1) hex
|
| -bool MojomLexer::GetNextTokenHexConst(Token* result) {
|
| - // Hex numbers start with a 0, x and then some hex numeral.
|
| - if (eos(2) || source_[offset_] != '0' ||
|
| - (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') ||
|
| - !IsHexDigit(source_[offset_ + 2]))
|
| - return false;
|
| -
|
| - result->token_type = TokenType::INT_CONST_HEX;
|
| - size_t start_offset = offset_;
|
| - Consume(2);
|
| -
|
| - while (IsHexDigit(source_[offset_]))
|
| - Consume(1);
|
| -
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - return true;
|
| -}
|
| -
|
| -bool MojomLexer::GetNextTokenOrdinal(Token* result) {
|
| - // Ordinals start with '@' and then some digit.
|
| - if (eos(1) || source_[offset_] != '@' || !IsDigit(source_[offset_ + 1]))
|
| - return false;
|
| - size_t start_offset = offset_;
|
| - // Consumes '@'.
|
| - Consume(1);
|
| -
|
| - result->token_type = TokenType::ORDINAL;
|
| - ConsumeDigits();
|
| -
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - return true;
|
| -}
|
| -
|
| -bool MojomLexer::GetNextTokenStringLiteral(Token* result) {
|
| - // Ordinals start with '@' and then some digit.
|
| - if (source_[offset_] != '"')
|
| - return false;
|
| -
|
| - size_t start_offset = offset_;
|
| - // Consumes '"'.
|
| - Consume(1);
|
| -
|
| - while (source_[offset_] != '"') {
|
| - if (source_[offset_] == '\n' || eos(0)) {
|
| - result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - return true;
|
| - }
|
| -
|
| - // This block will be skipped if the backslash is at the end of the source.
|
| - if (source_[offset_] == '\\' && !eos(1)) {
|
| - // Consume the backslash. This will ensure \" is consumed.
|
| - Consume(1);
|
| - }
|
| - Consume(1);
|
| - }
|
| - // Consume the closing doublequotes.
|
| - Consume(1);
|
| -
|
| - result->token_type = TokenType::STRING_LITERAL;
|
| -
|
| - result->token = source_.substr(start_offset, offset_ - start_offset);
|
| - return true;
|
| -}
|
| -
|
| -void MojomLexer::ConsumeDigits() {
|
| - while (!eos(0) && IsDigit(source_[offset_]))
|
| - Consume(1);
|
| -}
|
| -
|
| -void MojomLexer::ConsumeEol() {
|
| - ++offset_;
|
| - ++line_no_;
|
| - offset_in_line_ = 0;
|
| -}
|
| -
|
| -void MojomLexer::Consume(size_t num) {
|
| - offset_ += num;
|
| - offset_in_line_ += num;
|
| -}
|
| -
|
| -MojomLexer::MojomLexer(const std::string& source)
|
| - : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {
|
| -}
|
| -
|
| -MojomLexer::~MojomLexer() {
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -Token::Token()
|
| - : token_type(TokenType::ERROR_UNKNOWN),
|
| - char_pos(0),
|
| - line_no(0),
|
| - line_pos(0) {
|
| -}
|
| -
|
| -Token::~Token() {
|
| -}
|
| -
|
| -// Accepts the text of a mojom file and returns the ordered list of tokens
|
| -// found in the file.
|
| -std::vector<Token> Tokenize(const std::string& source) {
|
| - return MojomLexer(source).Tokenize();
|
| -}
|
| -
|
| -} // namespace mojom
|
| -} // namespace mojo
|
|
|