| Index: src/asmjs/asm-scanner.cc
|
| diff --git a/src/asmjs/asm-scanner.cc b/src/asmjs/asm-scanner.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..949b44a65f7bb773cdd190ed5240855aaa139ffe
|
| --- /dev/null
|
| +++ b/src/asmjs/asm-scanner.cc
|
| @@ -0,0 +1,413 @@
|
| +// Copyright 2017 the V8 project authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "src/asmjs/asm-scanner.h"
|
| +
|
| +#include "src/conversions.h"
|
| +#include "src/flags.h"
|
| +#include "src/parsing/scanner.h"
|
| +#include "src/unicode-cache.h"
|
| +
|
| +namespace v8 {
|
| +namespace internal {
|
| +
|
| +namespace {
|
| +// Cap number of identifiers to ensure we can assign both global and
|
| +// local ones a token id in the range of an int32_t.
|
| +static const int kMaxIdentifierCount = 0xf000000;
|
| +};
|
| +
|
| +AsmJsScanner::AsmJsScanner()
|
| + : token_(kUninitialized),
|
| + preceding_token_(kUninitialized),
|
| + next_token_(kUninitialized),
|
| + rewind_(false),
|
| + in_local_scope_(false),
|
| + global_count_(0),
|
| + double_value_(0.0),
|
| + unsigned_value_(0),
|
| + preceded_by_newline_(false) {
|
| +#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
|
| + STDLIB_MATH_FUNCTION_LIST(V)
|
| + STDLIB_ARRAY_TYPE_LIST(V)
|
| +#undef V
|
| +#define V(name) property_names_[#name] = kToken_##name;
|
| + STDLIB_MATH_VALUE_LIST(V)
|
| + STDLIB_OTHER_LIST(V)
|
| +#undef V
|
| +#define V(name) global_names_[#name] = kToken_##name;
|
| + KEYWORD_NAME_LIST(V)
|
| +#undef V
|
| +}
|
| +
|
| +void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) {
|
| + stream_ = std::move(stream);
|
| + Next();
|
| +}
|
| +
|
| +void AsmJsScanner::Next() {
|
| + if (rewind_) {
|
| + preceding_token_ = token_;
|
| + token_ = next_token_;
|
| + next_token_ = kUninitialized;
|
| + rewind_ = false;
|
| + return;
|
| + }
|
| +
|
| + if (token_ == kEndOfInput || token_ == kParseError) {
|
| + return;
|
| + }
|
| +
|
| +#if DEBUG
|
| + if (FLAG_trace_asm_scanner) {
|
| + if (Token() == kDouble) {
|
| + PrintF("%lf ", AsDouble());
|
| + } else if (Token() == kUnsigned) {
|
| + PrintF("%" PRIu64 " ", AsUnsigned());
|
| + } else {
|
| + std::string name = Name(Token());
|
| + PrintF("%s ", name.c_str());
|
| + }
|
| + }
|
| +#endif
|
| +
|
| + preceded_by_newline_ = false;
|
| + preceding_token_ = token_;
|
| + for (;;) {
|
| + uc32 ch = stream_->Advance();
|
| + switch (ch) {
|
| + case ' ':
|
| + case '\t':
|
| + case '\r':
|
| + // Ignore whitespace.
|
| + break;
|
| +
|
| + case '\n':
|
| + // Track when we've passed a newline for optional semicolon support,
|
| + // but keep scanning.
|
| + preceded_by_newline_ = true;
|
| + break;
|
| +
|
| + case kEndOfInput:
|
| + token_ = kEndOfInput;
|
| + return;
|
| +
|
| + case '\'':
|
| + case '"':
|
| + ConsumeString(ch);
|
| + return;
|
| +
|
| + case '/':
|
| + ch = stream_->Advance();
|
| + if (ch == '/') {
|
| + ConsumeCPPComment();
|
| + } else if (ch == '*') {
|
| + if (!ConsumeCComment()) {
|
| + token_ = kParseError;
|
| + return;
|
| + }
|
| + } else {
|
| + stream_->Back();
|
| + token_ = '/';
|
| + return;
|
| + }
|
| + // Breaks out of switch, but loops again (i.e. the case when we parsed
|
| + // a comment, but need to continue to look for the next token).
|
| + break;
|
| +
|
| + case '<':
|
| + case '>':
|
| + case '=':
|
| + case '!':
|
| + ConsumeCompareOrShift(ch);
|
| + return;
|
| +
|
| +#define V(single_char_token) case single_char_token:
|
| + SIMPLE_SINGLE_TOKEN_LIST(V)
|
| +#undef V
|
| + // Use fixed token IDs for ASCII.
|
| + token_ = ch;
|
| + return;
|
| +
|
| + default:
|
| + if (IsIdentifierStart(ch)) {
|
| + ConsumeIdentifier(ch);
|
| + } else if (IsNumberStart(ch)) {
|
| + ConsumeNumber(ch);
|
| + } else {
|
| + // TODO(bradnelson): Support unicode (probably via UnicodeCache).
|
| + token_ = kParseError;
|
| + }
|
| + return;
|
| + }
|
| + }
|
| +}
|
| +
|
| +void AsmJsScanner::Rewind() {
|
| + DCHECK(!rewind_);
|
| + next_token_ = token_;
|
| + token_ = preceding_token_;
|
| + preceding_token_ = kUninitialized;
|
| + rewind_ = true;
|
| + preceded_by_newline_ = false;
|
| + identifier_string_.clear();
|
| +}
|
| +
|
| +void AsmJsScanner::ResetLocals() { local_names_.clear(); }
|
| +
|
| +#if DEBUG
|
| +// Only used for debugging.
|
| +std::string AsmJsScanner::Name(token_t token) const {
|
| + if (token >= 32 && token < 127) {
|
| + return std::string(1, static_cast<char>(token));
|
| + }
|
| + for (auto& i : local_names_) {
|
| + if (i.second == token) {
|
| + return i.first;
|
| + }
|
| + }
|
| + for (auto& i : global_names_) {
|
| + if (i.second == token) {
|
| + return i.first;
|
| + }
|
| + }
|
| + for (auto& i : property_names_) {
|
| + if (i.second == token) {
|
| + return i.first;
|
| + }
|
| + }
|
| + switch (token) {
|
| +#define V(rawname, name) \
|
| + case kToken_##name: \
|
| + return rawname;
|
| + LONG_SYMBOL_NAME_LIST(V)
|
| +#undef V
|
| +#define V(name, value, string_name) \
|
| + case name: \
|
| + return string_name;
|
| + SPECIAL_TOKEN_LIST(V)
|
| + default:
|
| + break;
|
| + }
|
| + UNREACHABLE();
|
| + return "{unreachable}";
|
| +}
|
| +#endif
|
| +
|
| +int AsmJsScanner::GetPosition() const {
|
| + DCHECK(!rewind_);
|
| + return static_cast<int>(stream_->pos());
|
| +}
|
| +
|
| +void AsmJsScanner::Seek(int pos) {
|
| + stream_->Seek(pos);
|
| + preceding_token_ = kUninitialized;
|
| + token_ = kUninitialized;
|
| + next_token_ = kUninitialized;
|
| + rewind_ = false;
|
| + Next();
|
| +}
|
| +
|
| +void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
|
| + // Consume characters while still part of the identifier.
|
| + identifier_string_.clear();
|
| + while (IsIdentifierPart(ch)) {
|
| + identifier_string_ += ch;
|
| + ch = stream_->Advance();
|
| + }
|
| + // Go back one for next time.
|
| + stream_->Back();
|
| +
|
| + // Decode what the identifier means.
|
| + if (preceding_token_ == '.') {
|
| + auto i = property_names_.find(identifier_string_);
|
| + if (i != property_names_.end()) {
|
| + token_ = i->second;
|
| + return;
|
| + }
|
| + } else {
|
| + {
|
| + auto i = local_names_.find(identifier_string_);
|
| + if (i != local_names_.end()) {
|
| + token_ = i->second;
|
| + return;
|
| + }
|
| + }
|
| + if (!in_local_scope_) {
|
| + auto i = global_names_.find(identifier_string_);
|
| + if (i != global_names_.end()) {
|
| + token_ = i->second;
|
| + return;
|
| + }
|
| + }
|
| + }
|
| + if (preceding_token_ == '.') {
|
| + CHECK(global_count_ < kMaxIdentifierCount);
|
| + token_ = kGlobalsStart + global_count_++;
|
| + property_names_[identifier_string_] = token_;
|
| + } else if (in_local_scope_) {
|
| + CHECK(local_names_.size() < kMaxIdentifierCount);
|
| + token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
|
| + local_names_[identifier_string_] = token_;
|
| + } else {
|
| + CHECK(global_count_ < kMaxIdentifierCount);
|
| + token_ = kGlobalsStart + global_count_++;
|
| + global_names_[identifier_string_] = token_;
|
| + }
|
| +}
|
| +
|
| +void AsmJsScanner::ConsumeNumber(uc32 ch) {
|
| + std::string number;
|
| + number = ch;
|
| + bool has_dot = ch == '.';
|
| + for (;;) {
|
| + ch = stream_->Advance();
|
| + if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
|
| + (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
|
| + ch == 'x' ||
|
| + ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' ||
|
| + number[number.size() - 1] == 'E'))) {
|
| + // TODO(bradnelson): Test weird cases ending in -.
|
| + if (ch == '.') {
|
| + has_dot = true;
|
| + }
|
| + number.push_back(ch);
|
| + } else {
|
| + break;
|
| + }
|
| + }
|
| + stream_->Back();
|
| + // Special case the most common number.
|
| + if (number.size() == 1 && number[0] == '0') {
|
| + unsigned_value_ = 0;
|
| + token_ = kUnsigned;
|
| + return;
|
| + }
|
| + // Pick out dot.
|
| + if (number.size() == 1 && number[0] == '.') {
|
| + token_ = '.';
|
| + return;
|
| + }
|
| + // Decode numbers.
|
| + UnicodeCache cache;
|
| + double_value_ = StringToDouble(
|
| + &cache,
|
| + Vector<uint8_t>(
|
| + const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())),
|
| + static_cast<int>(number.size())),
|
| + ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
|
| + if (std::isnan(double_value_)) {
|
| + // Check if string to number conversion didn't consume all the characters.
|
| + // This happens if the character filter let through something invalid
|
| + // like: 0123ef for example.
|
| + // TODO(bradnelson): Check if this happens often enough to be a perf
|
| + // problem.
|
| + if (number[0] == '.') {
|
| + for (size_t k = 1; k < number.size(); ++k) {
|
| + stream_->Back();
|
| + }
|
| + token_ = '.';
|
| + return;
|
| + }
|
| + // Anything else that doesn't parse is an error.
|
| + token_ = kParseError;
|
| + return;
|
| + }
|
| + if (has_dot) {
|
| + token_ = kDouble;
|
| + } else {
|
| + unsigned_value_ = static_cast<uint32_t>(double_value_);
|
| + token_ = kUnsigned;
|
| + }
|
| +}
|
| +
|
| +bool AsmJsScanner::ConsumeCComment() {
|
| + for (;;) {
|
| + uc32 ch = stream_->Advance();
|
| + while (ch == '*') {
|
| + ch = stream_->Advance();
|
| + if (ch == '/') {
|
| + return true;
|
| + }
|
| + }
|
| + if (ch == kEndOfInput) {
|
| + return false;
|
| + }
|
| + }
|
| +}
|
| +
|
| +void AsmJsScanner::ConsumeCPPComment() {
|
| + for (;;) {
|
| + uc32 ch = stream_->Advance();
|
| + if (ch == '\n' || ch == kEndOfInput) {
|
| + return;
|
| + }
|
| + }
|
| +}
|
| +
|
| +void AsmJsScanner::ConsumeString(uc32 quote) {
|
| + // Only string allowed is 'use asm' / "use asm".
|
| + const char* expected = "use asm";
|
| + for (; *expected != '\0'; ++expected) {
|
| + if (stream_->Advance() != *expected) {
|
| + token_ = kParseError;
|
| + return;
|
| + }
|
| + }
|
| + if (stream_->Advance() != quote) {
|
| + token_ = kParseError;
|
| + return;
|
| + }
|
| + token_ = kToken_UseAsm;
|
| +}
|
| +
|
| +void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
|
| + uc32 next_ch = stream_->Advance();
|
| + if (next_ch == '=') {
|
| + switch (ch) {
|
| + case '<':
|
| + token_ = kToken_LE;
|
| + break;
|
| + case '>':
|
| + token_ = kToken_GE;
|
| + break;
|
| + case '=':
|
| + token_ = kToken_EQ;
|
| + break;
|
| + case '!':
|
| + token_ = kToken_NE;
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + }
|
| + } else if (ch == '<' && next_ch == '<') {
|
| + token_ = kToken_SHL;
|
| + } else if (ch == '>' && next_ch == '>') {
|
| + if (stream_->Advance() == '>') {
|
| + token_ = kToken_SHR;
|
| + } else {
|
| + token_ = kToken_SAR;
|
| + stream_->Back();
|
| + }
|
| + } else {
|
| + stream_->Back();
|
| + token_ = ch;
|
| + }
|
| +}
|
| +
|
| +bool AsmJsScanner::IsIdentifierStart(uc32 ch) {
|
| + return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
|
| + ch == '$';
|
| +}
|
| +
|
| +bool AsmJsScanner::IsIdentifierPart(uc32 ch) {
|
| + return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9');
|
| +}
|
| +
|
| +bool AsmJsScanner::IsNumberStart(uc32 ch) {
|
| + return ch == '.' || (ch >= '0' && ch <= '9');
|
| +}
|
| +
|
| +} // namespace internal
|
| +} // namespace v8
|
|
|