Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Unified Diff: src/asmjs/asm-scanner.cc

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)
Patch Set: fix Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/asmjs/asm-scanner.cc
diff --git a/src/asmjs/asm-scanner.cc b/src/asmjs/asm-scanner.cc
new file mode 100644
index 0000000000000000000000000000000000000000..949b44a65f7bb773cdd190ed5240855aaa139ffe
--- /dev/null
+++ b/src/asmjs/asm-scanner.cc
@@ -0,0 +1,413 @@
+// Copyright 2017 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/asmjs/asm-scanner.h"
+
+#include "src/conversions.h"
+#include "src/flags.h"
+#include "src/parsing/scanner.h"
+#include "src/unicode-cache.h"
+
+namespace v8 {
+namespace internal {
+
+namespace {
+// Cap number of identifiers to ensure we can assign both global and
+// local ones a token id in the range of an int32_t.
+static const int kMaxIdentifierCount = 0xf000000;
+};
+
+AsmJsScanner::AsmJsScanner()
+ : token_(kUninitialized),
+ preceding_token_(kUninitialized),
+ next_token_(kUninitialized),
+ rewind_(false),
+ in_local_scope_(false),
+ global_count_(0),
+ double_value_(0.0),
+ unsigned_value_(0),
+ preceded_by_newline_(false) {
+#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
+ STDLIB_MATH_FUNCTION_LIST(V)
+ STDLIB_ARRAY_TYPE_LIST(V)
+#undef V
+#define V(name) property_names_[#name] = kToken_##name;
+ STDLIB_MATH_VALUE_LIST(V)
+ STDLIB_OTHER_LIST(V)
+#undef V
+#define V(name) global_names_[#name] = kToken_##name;
+ KEYWORD_NAME_LIST(V)
+#undef V
+}
+
+void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) {
+ stream_ = std::move(stream);
+ Next();
+}
+
+void AsmJsScanner::Next() {
+ if (rewind_) {
+ preceding_token_ = token_;
+ token_ = next_token_;
+ next_token_ = kUninitialized;
+ rewind_ = false;
+ return;
+ }
+
+ if (token_ == kEndOfInput || token_ == kParseError) {
+ return;
+ }
+
+#if DEBUG
+ if (FLAG_trace_asm_scanner) {
+ if (Token() == kDouble) {
+ PrintF("%lf ", AsDouble());
+ } else if (Token() == kUnsigned) {
+ PrintF("%" PRIu64 " ", AsUnsigned());
+ } else {
+ std::string name = Name(Token());
+ PrintF("%s ", name.c_str());
+ }
+ }
+#endif
+
+ preceded_by_newline_ = false;
+ preceding_token_ = token_;
+ for (;;) {
+ uc32 ch = stream_->Advance();
+ switch (ch) {
+ case ' ':
+ case '\t':
+ case '\r':
+ // Ignore whitespace.
+ break;
+
+ case '\n':
+ // Track when we've passed a newline for optional semicolon support,
+ // but keep scanning.
+ preceded_by_newline_ = true;
+ break;
+
+ case kEndOfInput:
+ token_ = kEndOfInput;
+ return;
+
+ case '\'':
+ case '"':
+ ConsumeString(ch);
+ return;
+
+ case '/':
+ ch = stream_->Advance();
+ if (ch == '/') {
+ ConsumeCPPComment();
+ } else if (ch == '*') {
+ if (!ConsumeCComment()) {
+ token_ = kParseError;
+ return;
+ }
+ } else {
+ stream_->Back();
+ token_ = '/';
+ return;
+ }
+ // Breaks out of switch, but loops again (i.e. the case when we parsed
+ // a comment, but need to continue to look for the next token).
+ break;
+
+ case '<':
+ case '>':
+ case '=':
+ case '!':
+ ConsumeCompareOrShift(ch);
+ return;
+
+#define V(single_char_token) case single_char_token:
+ SIMPLE_SINGLE_TOKEN_LIST(V)
+#undef V
+ // Use fixed token IDs for ASCII.
+ token_ = ch;
+ return;
+
+ default:
+ if (IsIdentifierStart(ch)) {
+ ConsumeIdentifier(ch);
+ } else if (IsNumberStart(ch)) {
+ ConsumeNumber(ch);
+ } else {
+ // TODO(bradnelson): Support unicode (probably via UnicodeCache).
+ token_ = kParseError;
+ }
+ return;
+ }
+ }
+}
+
+void AsmJsScanner::Rewind() {
+ DCHECK(!rewind_);
+ next_token_ = token_;
+ token_ = preceding_token_;
+ preceding_token_ = kUninitialized;
+ rewind_ = true;
+ preceded_by_newline_ = false;
+ identifier_string_.clear();
+}
+
+void AsmJsScanner::ResetLocals() { local_names_.clear(); }
+
+#if DEBUG
+// Only used for debugging.
+std::string AsmJsScanner::Name(token_t token) const {
+ if (token >= 32 && token < 127) {
+ return std::string(1, static_cast<char>(token));
+ }
+ for (auto& i : local_names_) {
+ if (i.second == token) {
+ return i.first;
+ }
+ }
+ for (auto& i : global_names_) {
+ if (i.second == token) {
+ return i.first;
+ }
+ }
+ for (auto& i : property_names_) {
+ if (i.second == token) {
+ return i.first;
+ }
+ }
+ switch (token) {
+#define V(rawname, name) \
+ case kToken_##name: \
+ return rawname;
+ LONG_SYMBOL_NAME_LIST(V)
+#undef V
+#define V(name, value, string_name) \
+ case name: \
+ return string_name;
+ SPECIAL_TOKEN_LIST(V)
+ default:
+ break;
+ }
+ UNREACHABLE();
+ return "{unreachable}";
+}
+#endif
+
+int AsmJsScanner::GetPosition() const {
+ DCHECK(!rewind_);
+ return static_cast<int>(stream_->pos());
+}
+
+void AsmJsScanner::Seek(int pos) {
+ stream_->Seek(pos);
+ preceding_token_ = kUninitialized;
+ token_ = kUninitialized;
+ next_token_ = kUninitialized;
+ rewind_ = false;
+ Next();
+}
+
+void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
+ // Consume characters while still part of the identifier.
+ identifier_string_.clear();
+ while (IsIdentifierPart(ch)) {
+ identifier_string_ += ch;
+ ch = stream_->Advance();
+ }
+ // Go back one for next time.
+ stream_->Back();
+
+ // Decode what the identifier means.
+ if (preceding_token_ == '.') {
+ auto i = property_names_.find(identifier_string_);
+ if (i != property_names_.end()) {
+ token_ = i->second;
+ return;
+ }
+ } else {
+ {
+ auto i = local_names_.find(identifier_string_);
+ if (i != local_names_.end()) {
+ token_ = i->second;
+ return;
+ }
+ }
+ if (!in_local_scope_) {
+ auto i = global_names_.find(identifier_string_);
+ if (i != global_names_.end()) {
+ token_ = i->second;
+ return;
+ }
+ }
+ }
+ if (preceding_token_ == '.') {
+ CHECK(global_count_ < kMaxIdentifierCount);
+ token_ = kGlobalsStart + global_count_++;
+ property_names_[identifier_string_] = token_;
+ } else if (in_local_scope_) {
+ CHECK(local_names_.size() < kMaxIdentifierCount);
+ token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
+ local_names_[identifier_string_] = token_;
+ } else {
+ CHECK(global_count_ < kMaxIdentifierCount);
+ token_ = kGlobalsStart + global_count_++;
+ global_names_[identifier_string_] = token_;
+ }
+}
+
+void AsmJsScanner::ConsumeNumber(uc32 ch) {
+ std::string number;
+ number = ch;
+ bool has_dot = ch == '.';
+ for (;;) {
+ ch = stream_->Advance();
+ if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
+ (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
+ ch == 'x' ||
+ ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' ||
+ number[number.size() - 1] == 'E'))) {
+ // TODO(bradnelson): Test weird cases ending in -.
+ if (ch == '.') {
+ has_dot = true;
+ }
+ number.push_back(ch);
+ } else {
+ break;
+ }
+ }
+ stream_->Back();
+ // Special case the most common number.
+ if (number.size() == 1 && number[0] == '0') {
+ unsigned_value_ = 0;
+ token_ = kUnsigned;
+ return;
+ }
+ // Pick out dot.
+ if (number.size() == 1 && number[0] == '.') {
+ token_ = '.';
+ return;
+ }
+ // Decode numbers.
+ UnicodeCache cache;
+ double_value_ = StringToDouble(
+ &cache,
+ Vector<uint8_t>(
+ const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())),
+ static_cast<int>(number.size())),
+ ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
+ if (std::isnan(double_value_)) {
+ // Check if string to number conversion didn't consume all the characters.
+ // This happens if the character filter let through something invalid
+ // like: 0123ef for example.
+ // TODO(bradnelson): Check if this happens often enough to be a perf
+ // problem.
+ if (number[0] == '.') {
+ for (size_t k = 1; k < number.size(); ++k) {
+ stream_->Back();
+ }
+ token_ = '.';
+ return;
+ }
+ // Anything else that doesn't parse is an error.
+ token_ = kParseError;
+ return;
+ }
+ if (has_dot) {
+ token_ = kDouble;
+ } else {
+ unsigned_value_ = static_cast<uint32_t>(double_value_);
+ token_ = kUnsigned;
+ }
+}
+
+bool AsmJsScanner::ConsumeCComment() {
+ for (;;) {
+ uc32 ch = stream_->Advance();
+ while (ch == '*') {
+ ch = stream_->Advance();
+ if (ch == '/') {
+ return true;
+ }
+ }
+ if (ch == kEndOfInput) {
+ return false;
+ }
+ }
+}
+
+void AsmJsScanner::ConsumeCPPComment() {
+ for (;;) {
+ uc32 ch = stream_->Advance();
+ if (ch == '\n' || ch == kEndOfInput) {
+ return;
+ }
+ }
+}
+
+void AsmJsScanner::ConsumeString(uc32 quote) {
+ // Only string allowed is 'use asm' / "use asm".
+ const char* expected = "use asm";
+ for (; *expected != '\0'; ++expected) {
+ if (stream_->Advance() != *expected) {
+ token_ = kParseError;
+ return;
+ }
+ }
+ if (stream_->Advance() != quote) {
+ token_ = kParseError;
+ return;
+ }
+ token_ = kToken_UseAsm;
+}
+
+void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
+ uc32 next_ch = stream_->Advance();
+ if (next_ch == '=') {
+ switch (ch) {
+ case '<':
+ token_ = kToken_LE;
+ break;
+ case '>':
+ token_ = kToken_GE;
+ break;
+ case '=':
+ token_ = kToken_EQ;
+ break;
+ case '!':
+ token_ = kToken_NE;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ } else if (ch == '<' && next_ch == '<') {
+ token_ = kToken_SHL;
+ } else if (ch == '>' && next_ch == '>') {
+ if (stream_->Advance() == '>') {
+ token_ = kToken_SHR;
+ } else {
+ token_ = kToken_SAR;
+ stream_->Back();
+ }
+ } else {
+ stream_->Back();
+ token_ = ch;
+ }
+}
+
+bool AsmJsScanner::IsIdentifierStart(uc32 ch) {
+ return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
+ ch == '$';
+}
+
+bool AsmJsScanner::IsIdentifierPart(uc32 ch) {
+ return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9');
+}
+
+bool AsmJsScanner::IsNumberStart(uc32 ch) {
+ return ch == '.' || (ch >= '0' && ch <= '9');
+}
+
+} // namespace internal
+} // namespace v8
« src/asmjs/asm-scanner.h ('K') | « src/asmjs/asm-scanner.h ('k') | src/flag-definitions.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698