mojom/lexer.cc - Issue 1432613003: Remove unused lexer code.

Unified Diff: mojom/lexer.cc

Issue 1432613003: Remove unused lexer code. (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: mojom/lexer.cc

diff --git a/mojom/lexer.cc b/mojom/lexer.cc

deleted file mode 100644

index e55e2fbca0b2f5160a69e8b37257b74c2cd861bd..0000000000000000000000000000000000000000

--- a/mojom/lexer.cc

+++ /dev/null

@@ -1,420 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "mojom/lexer.h"

-#include <map>

-#include <string>

-#include "base/lazy_instance.h"

-namespace mojo {

-namespace mojom {

-namespace {

-class KeywordsDict {

- public:

- KeywordsDict();

- private:

- std::map<std::string, mojom::TokenType> keywords_;

- friend std::map<std::string, mojom::TokenType>& Keywords();

- DISALLOW_COPY_AND_ASSIGN(KeywordsDict);

-};

-static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;

-std::map<std::string, mojom::TokenType>& Keywords() {

- return g_keywords.Get().keywords_;

-KeywordsDict::KeywordsDict() {

- keywords_["import"] = TokenType::IMPORT;

- keywords_["module"] = TokenType::MODULE;

- keywords_["struct"] = TokenType::STRUCT;

- keywords_["union"] = TokenType::UNION;

- keywords_["interface"] = TokenType::INTERFACE;

- keywords_["enum"] = TokenType::ENUM;

- keywords_["const"] = TokenType::CONST;

- keywords_["true"] = TokenType::TRUE;

- keywords_["false"] = TokenType::FALSE;

- keywords_["default"] = TokenType::DEFAULT;

-// Non-localized versions of isalpha.

-bool IsAlpha(char c) {

- return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));

-// Non-localized versions of isnum.

-bool IsDigit(char c) {

- return ('0' <= c && c <= '9');

-bool IsHexDigit(char c) {

- return (IsDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));

-// Non-localized versions of isalnum.

-bool IsAlnum(char c) {

- return IsAlpha(c) || IsDigit(c);

-// MojomLexer tokenizes a mojom source file. It is NOT thread-safe.

-class MojomLexer {

- public:

- explicit MojomLexer(const std::string& source);

- ~MojomLexer();

- // Returns the list of tokens in the source file.

- std::vector<Token> Tokenize();

- private:

- // The GetNextToken.* functions all return true if they could find a token

- // (even an error token) and false otherwise.

- bool GetNextToken(Token* result);

- bool GetNextTokenSingleChar(Token* result);

- bool GetNextTokenEqualsOrResponse(Token* result);

- bool GetNextTokenIdentifier(Token* result);

- bool GetNextTokenDecConst(Token* result);

- bool GetNextTokenHexConst(Token* result);

- bool GetNextTokenOrdinal(Token* result);

- bool GetNextTokenStringLiteral(Token* result);

- void ConsumeSkippable();

- void ConsumeDigits();

- void ConsumeEol();

- void Consume(size_t num);

- bool eos(size_t offset_plus) {

- return offset_ + offset_plus >= source_.size();

- }

- const std::string source_;

- size_t offset_;

- size_t line_no_;

- size_t offset_in_line_;

- DISALLOW_COPY_AND_ASSIGN(MojomLexer);

-};

-std::vector<Token> MojomLexer::Tokenize() {

- offset_ = 0;

- line_no_ = 0;

- offset_in_line_ = 0;

- std::vector<Token> result;

- Token cur;

- while (GetNextToken(&cur)) {

- result.push_back(cur);

- // As soon as an error token is found, stop tokenizing.

- if (cur.error()) {

- break;

- }

- return result;

-bool MojomLexer::GetNextToken(Token* result) {

- // Skip all spaces which may be in front of the next token.

- ConsumeSkippable();

- // If we found the end of the source signal that is so.

- if (eos(0))

- return false;

- // Save the current position in the source code.

- result->char_pos = offset_;

- result->line_no = line_no_;

- result->line_pos = offset_in_line_;

- if (GetNextTokenSingleChar(result) || GetNextTokenEqualsOrResponse(result) ||

- GetNextTokenIdentifier(result) || GetNextTokenHexConst(result) ||

- GetNextTokenDecConst(result) || GetNextTokenDecConst(result) ||

- GetNextTokenOrdinal(result) || GetNextTokenStringLiteral(result))

- return true;

- result->token = source_.substr(offset_, 1);

- result->token_type = TokenType::ERROR_ILLEGAL_CHAR;

- return true;

-void MojomLexer::ConsumeSkippable() {

- if (eos(0))

- return;

- bool found_non_space = false;

- while (!found_non_space && !eos(0)) {

- switch (source_[offset_]) {

- case ' ':

- case '\t':

- case '\r':

- Consume(1);

- break;

- case '\n':

- ConsumeEol();

- break;

- default:

- found_non_space = true;

- break;

- }

-// Finds all single-character tokens except for '='.

-bool MojomLexer::GetNextTokenSingleChar(Token* result) {

- switch (source_[offset_]) {

- case '(':

- result->token_type = TokenType::LPAREN;

- break;

- case ')':

- result->token_type = TokenType::RPAREN;

- break;

- case '[':

- result->token_type = TokenType::LBRACKET;

- break;

- case ']':

- result->token_type = TokenType::RBRACKET;

- break;

- case '{':

- result->token_type = TokenType::LBRACE;

- break;

- case '}':

- result->token_type = TokenType::RBRACE;

- break;

- case '<':

- result->token_type = TokenType::LANGLE;

- break;

- case '>':

- result->token_type = TokenType::RANGLE;

- break;

- case ';':

- result->token_type = TokenType::SEMI;

- break;

- case ',':

- result->token_type = TokenType::COMMA;

- break;

- case '.':

- result->token_type = TokenType::DOT;

- break;

- case '-':

- result->token_type = TokenType::MINUS;

- break;

- case '+':

- result->token_type = TokenType::PLUS;

- break;

- case '&':

- result->token_type = TokenType::AMP;

- break;

- case '?':

- result->token_type = TokenType::QSTN;

- break;

- default:

- return false;

- break;

- }

- result->token = source_.substr(offset_, 1);

- Consume(1);

- return true;

-// Finds '=' or '=>'.

-bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {

- if (source_[offset_] != '=')

- return false;

- Consume(1);

- if (eos(0) || source_[offset_] != '>') {

- result->token_type = TokenType::EQUALS;

- result->token = "=";

- } else {

- result->token_type = TokenType::RESPONSE;

- result->token = "=>";

- Consume(1);

- }

- return true;

-// valid C identifiers (K&R2: A.2.3)

-bool MojomLexer::GetNextTokenIdentifier(Token* result) {

- char c = source_[offset_];

- // Identifiers start with a letter or underscore.

- if (!(IsAlpha(c) || c == '_'))

- return false;

- size_t start_offset = offset_;

- // Identifiers contain letters numbers and underscores.

- while (!eos(0) && (IsAlnum(source_[offset_]) || c == '_'))

- Consume(1);

- result->token = source_.substr(start_offset, offset_ - start_offset);

- result->token_type = TokenType::IDENTIFIER;

- if (Keywords().count(result->token))

- result->token_type = Keywords()[result->token];

- return true;

-// integer constants (K&R2: A.2.5.1) dec

-// floating constants (K&R2: A.2.5.3)

-bool MojomLexer::GetNextTokenDecConst(Token* result) {

- if (!IsDigit(source_[offset_]))

- return false;

- result->token_type = TokenType::INT_CONST_DEC;

- // If the number starts with a zero and is not a floating point number.

- if (source_[offset_] == '0' &&

- (eos(1) || (source_[offset_] == 'e' && source_[offset_] == 'E' &&

- source_[offset_] == '.'))) {

- // TODO(azani): Catch and error on octal.

- result->token = "0";

- Consume(1);

- return true;

- }

- size_t start_offset = offset_;

- // First, we consume all the digits.

- ConsumeDigits();

- // If there is a fractional part, we consume the . and the following digits.

- if (!eos(0) && source_[offset_] == '.') {

- result->token_type = TokenType::FLOAT_CONST;

- Consume(1);

- ConsumeDigits();

- }

- // If there is an exponential part, we consume the e and the following digits.

- if (!eos(0) && (source_[offset_] == 'e' || source_[offset_] == 'E')) {

- if (!eos(2) && (source_[offset_ + 1] == '-' || source_[offset_ + 1]) &&

- IsDigit(source_[offset_ + 2])) {

- result->token_type = TokenType::FLOAT_CONST;

- Consume(2); // Consume e/E and +/-

- ConsumeDigits();

- } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {

- result->token_type = TokenType::FLOAT_CONST;

- Consume(1); // Consume e/E

- ConsumeDigits();

- }

- result->token = source_.substr(start_offset, offset_ - start_offset);

- return true;

-// integer constants (K&R2: A.2.5.1) hex

-bool MojomLexer::GetNextTokenHexConst(Token* result) {

- // Hex numbers start with a 0, x and then some hex numeral.

- if (eos(2) || source_[offset_] != '0' ||

- (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') ||

- !IsHexDigit(source_[offset_ + 2]))

- return false;

- result->token_type = TokenType::INT_CONST_HEX;

- size_t start_offset = offset_;

- Consume(2);

- while (IsHexDigit(source_[offset_]))

- Consume(1);

- result->token = source_.substr(start_offset, offset_ - start_offset);

- return true;

-bool MojomLexer::GetNextTokenOrdinal(Token* result) {

- // Ordinals start with '@' and then some digit.

- if (eos(1) || source_[offset_] != '@' || !IsDigit(source_[offset_ + 1]))

- return false;

- size_t start_offset = offset_;

- // Consumes '@'.

- Consume(1);

- result->token_type = TokenType::ORDINAL;

- ConsumeDigits();

- result->token = source_.substr(start_offset, offset_ - start_offset);

- return true;

-bool MojomLexer::GetNextTokenStringLiteral(Token* result) {

- // Ordinals start with '@' and then some digit.

- if (source_[offset_] != '"')

- return false;

- size_t start_offset = offset_;

- // Consumes '"'.

- Consume(1);

- while (source_[offset_] != '"') {

- if (source_[offset_] == '\n' || eos(0)) {

- result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;

- result->token = source_.substr(start_offset, offset_ - start_offset);

- return true;

- }

- // This block will be skipped if the backslash is at the end of the source.

- if (source_[offset_] == '\\' && !eos(1)) {

- // Consume the backslash. This will ensure \" is consumed.

- Consume(1);

- }

- Consume(1);

- }

- // Consume the closing doublequotes.

- Consume(1);

- result->token_type = TokenType::STRING_LITERAL;

- result->token = source_.substr(start_offset, offset_ - start_offset);

- return true;

-void MojomLexer::ConsumeDigits() {

- while (!eos(0) && IsDigit(source_[offset_]))

- Consume(1);

-void MojomLexer::ConsumeEol() {

- ++offset_;

- ++line_no_;

- offset_in_line_ = 0;

-void MojomLexer::Consume(size_t num) {

- offset_ += num;

- offset_in_line_ += num;

-MojomLexer::MojomLexer(const std::string& source)

- : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {

-MojomLexer::~MojomLexer() {

-} // namespace

-Token::Token()

- : token_type(TokenType::ERROR_UNKNOWN),

- char_pos(0),

- line_no(0),

- line_pos(0) {

-Token::~Token() {

-// Accepts the text of a mojom file and returns the ordered list of tokens

-// found in the file.

-std::vector<Token> Tokenize(const std::string& source) {

- return MojomLexer(source).Tokenize();

-} // namespace mojom

-} // namespace mojo

« no previous file with comments | « mojom/lexer.h ('k') | mojom/lexer_unittest.cc » ('j') | no next file with comments »