Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Unified Diff: src/asmjs/asm-lexer.cc

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)
Patch Set: fix warning Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/asmjs/asm-lexer.cc
diff --git a/src/asmjs/asm-lexer.cc b/src/asmjs/asm-lexer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4970cc860fba490cd30cebf3b6d913a46b1838c3
--- /dev/null
+++ b/src/asmjs/asm-lexer.cc
@@ -0,0 +1,346 @@
+// Copyright 2017 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/asmjs/asm-lexer.h"
+
+#include <stdlib.h>
+
+#include "src/objects-inl.h"
+#include "src/parsing/scanner-character-streams.h"
+#include "src/parsing/scanner.h"
+
+namespace v8 {
+namespace internal {
+
+AsmJsLexer::AsmJsLexer(Isolate* isolate, Handle<Script> script, int start,
+ int end)
+ : script_(script),
+ source_(String::cast(script->source()), isolate),
+ stream_(ScannerStream::For(source_, start, end)),
+ token_(0),
+ last_token_(0),
+ next_token_(0),
+ rewind_(false),
+ local_(false),
+ global_count_(0),
+ double_value_(0.0),
+ unsigned_value_(0),
+ preceeded_by_newline_(false) {
+#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
+ STDLIB_MATH_FUNCTION_LIST(V)
+ STDLIB_ARRAY_TYPE_LIST(V)
+#undef V
+#define V(name) property_names_[#name] = kToken_##name;
+ STDLIB_MATH_VALUE_LIST(V)
+ STDLIB_OTHER_LIST(V)
+#undef V
+#define V(name) global_names_[#name] = kToken_##name;
+ KEYWORD_NAME_LIST(V)
+#undef V
+ Next();
+}
+
+void AsmJsLexer::Next() {
+ if (rewind_) {
+ last_token_ = token_;
+ token_ = next_token_;
+ next_token_ = 0;
+ rewind_ = false;
+ return;
+ }
+
+ if (token_ == kEndOfInput || token_ == kParseError) {
+ return;
+ }
+
+#if 0
vogelheim 2017/03/14 13:36:37 Please don't do this.
bradn 2017/03/15 07:53:03 Changed to a trace flag. This ends up being useful
+ // Uncomment for debug raw token stream.
+ if (Token() != 0) {
+ if (Token() == kDouble) {
+ fprintf(stderr, "%lf ", AsDouble());
+ } else if (Token() == kUnsigned) {
+ fprintf(stderr, "%lu ", AsUnsigned());
+ } else {
+ fprintf(stderr, "%s ", Name(Token()));
+ }
+ }
+#endif
+
+ preceeded_by_newline_ = false;
+ last_token_ = token_;
+ for (;;) {
+ token_t ch = stream_->Advance();
vogelheim 2017/03/14 13:36:37 (Here & below.) Using token_t for individual chara
bradn 2017/03/15 07:53:02 Switched all of these inside to uc32.
+ if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
Karl 2017/03/14 18:00:47 Would a switch statement be cleaner here?
bradn 2017/03/15 07:53:02 Done.
+ // Skip whitespace.
+ if (ch == '\n') {
+ preceeded_by_newline_ = true;
+ }
+ continue;
+ } else if (ch == kEndOfInput) {
marja 2017/03/14 11:11:47 (general comment) The structure is getting a bit c
bradn 2017/03/15 07:53:03 Decomposed into more functions, hope that helps.
+ token_ = kEndOfInput;
+ break;
marja 2017/03/14 11:11:47 Why break, why not return? (Now it's not trivial t
bradn 2017/03/15 07:53:03 Redone in more functions, avoids the break.
+ } else if (ch < 32 || ch >= 127) {
Karl 2017/03/14 18:00:47 If you use a switch statement, either explicitly e
bradn 2017/03/15 07:53:02 Done.
+ // Disallow non-ascii for now.
+ token_ = kParseError;
+ break;
+ } else if (ch == '\'' || ch == '\"') {
+ // Only string allowed is 'use asm' / "use asm".
+ const char* use_asm = "use asm";
Karl 2017/03/14 18:00:47 Should this be a constexpr?
bradn 2017/03/15 07:53:03 Changed round.
+ const char* pos = use_asm;
+ while (*pos) {
+ token_t och = stream_->Advance();
vogelheim 2017/03/14 13:36:37 och ? [here & below]
bradn 2017/03/15 07:53:02 Renamed and refactored.
+ if (och != *pos) {
+ token_ = kParseError;
+ return;
+ }
+ ++pos;
+ }
+ token_t och = stream_->Advance();
+ if (och != ch) {
marja 2017/03/14 11:11:47 Lost here... what's this? Ahh, it's checking the
bradn 2017/03/15 07:53:02 renamed variable to highlight that.
+ token_ = kParseError;
+ break;
+ }
+ token_ = kToken_UseAsm;
+ break;
+ } else if (ch == '/') {
+ ch = stream_->Advance();
+ if (ch == '/') {
+ for (;;) {
+ ch = stream_->Advance();
+ if (ch == '\n' || ch == kEndOfInput) {
+ break;
+ }
+ }
+ continue;
+ } else if (ch == '*') {
+ for (;;) {
+ ch = stream_->Advance();
+ if (ch == '*') {
+ ch = stream_->Advance();
+ if (ch == '/') {
vogelheim 2017/03/14 13:36:37 +1 to Marja's comments. Also, would this work on
bradn 2017/03/15 07:53:02 Yeah, this was wrong. Factor to function and fixed
+ if (ch == '*') {
marja 2017/03/14 11:11:47 if ch == '/' on the line above, it cannot be '*' h
bradn 2017/03/15 07:53:02 Oops, fixed.
+ stream_->Back();
+ }
+ break;
marja 2017/03/14 11:11:47 I'm lost here anyway, what's this block, what are
bradn 2017/03/15 07:53:02 This was meant to back up if you saw a * inside a
+ }
+ } else if (ch == kEndOfInput) {
+ break;
+ }
+ }
+ continue;
+ } else {
marja 2017/03/14 11:11:47 No idea here anymore which if this else associates
bradn 2017/03/15 07:53:03 Restructured, should be more clear now.
+ stream_->Back();
+ token_ = '/';
+ break;
+ }
+ } else if (ch == '<' || ch == '>' || ch == '=' || ch == '!') {
+ token_t och = stream_->Advance();
+ if (och == '=') {
+ if (ch == '<') {
+ token_ = kToken_LE;
+ break;
+ } else if (ch == '>') {
+ token_ = kToken_GE;
+ break;
+ } else if (ch == '=') {
+ token_ = kToken_EQ;
+ break;
+ } else if (ch == '!') {
+ token_ = kToken_NE;
+ break;
+ } else {
+ UNREACHABLE();
+ }
+ } else if (ch == '<' && och == '<') {
+ token_ = kToken_SHL;
+ break;
+ } else if (ch == '>' && och == '>') {
+ token_t ooch = stream_->Advance();
vogelheim 2017/03/14 13:36:37 ooch ?
bradn 2017/03/15 07:53:02 Hah, terrible name, sorry, dropped variable comple
+ if (ooch == '>') {
+ token_ = kToken_SHR;
+ } else {
+ token_ = kToken_SAR;
+ stream_->Back();
+ }
+ break;
+ } else {
+ stream_->Back();
+ token_ = ch;
+ break;
+ }
+ } else if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') ||
+ ch == '_' || ch == '$') {
vogelheim 2017/03/14 13:36:37 Could you introduce helper functions for the chara
bradn 2017/03/15 07:53:02 Done.
+ name_ = ch;
+ for (;;) {
+ ch = stream_->Advance();
+ if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
+ ch == '$' || (ch >= '0' && ch <= '9')) {
+ name_ += ch;
+ } else {
+ break;
marja 2017/03/14 11:11:47 Why not while(ch >= ...) { name_ += ch; ch =
bradn 2017/03/15 07:53:03 Done.
+ }
+ }
+ stream_->Back();
+ if (last_token_ == '.') {
+ auto i = property_names_.find(name_);
+ if (i != property_names_.end()) {
+ token_ = i->second;
+ break;
marja 2017/03/14 11:11:47 E.g,. here it would be less confusing to use retur
bradn 2017/03/15 07:53:03 Done.
+ }
+ } else {
+ {
+ auto i = local_names_.find(name_);
+ if (i != local_names_.end()) {
+ token_ = i->second;
+ break;
+ }
+ }
+ if (!local_) {
marja 2017/03/14 11:11:47 What's local_?
bradn 2017/03/15 07:53:03 Renamed.
+ auto i = global_names_.find(name_);
+ if (i != global_names_.end()) {
+ token_ = i->second;
+ break;
+ }
+ }
+ }
+ if (last_token_ == '.') {
+ // TODO(bradnelson): Assert no overflow.
+ token_ = kGlobalsStart + global_count_++;
+ property_names_[name_] = token_;
+ } else if (local_) {
+ // TODO(bradnelson): Assert no overflow.
+ token_ = kLocalsStart - local_names_.size();
+ local_names_[name_] = token_;
+ } else {
+ // TODO(bradnelson): Assert no overflow.
+ token_ = kGlobalsStart + global_count_++;
+ global_names_[name_] = token_;
+ }
+ break;
+ } else if (ch == '.' || (ch >= '0' && ch <= '9')) {
+ bool has_dot = ch == '.';
+ name_ = ch;
+ for (;;) {
+ ch = stream_->Advance();
+ if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
marja 2017/03/14 11:11:47 Would it be feasible to have a helper function for
bradn 2017/03/15 07:53:03 I've added a TODO to do this. Might require some c
+ (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'x' ||
+ ((ch == '-' || ch == '+') && (name_[name_.size() - 1] == 'e' ||
+ name_[name_.size() - 1] == 'E'))) {
+ // TODO(bradnelson): Test weird cases ending in -.
+ if (ch == '.') {
+ has_dot = true;
+ }
+ name_ += ch;
+ } else {
+ break;
+ }
+ }
+ stream_->Back();
+ // Special case the most common number.
+ if (name_ == "0") {
+ unsigned_value_ = 0;
+ token_ = kUnsigned;
+ break;
+ }
+ // Pick out dot.
+ if (name_ == ".") {
+ token_ = '.';
+ break;
+ }
+ // Decode numbers.
+ char* end;
+ if (has_dot) {
+ double_value_ = strtod(name_.c_str(), &end);
vogelheim 2017/03/14 13:36:37 strtod may depend on the current locale. Are you r
bradn 2017/03/15 07:53:03 Yeah, it's a fair point these probably aren't idea
vogelheim 2017/03/15 12:07:40 Note that this is a correctness, not a performance
vogelheim 2017/03/15 12:10:11 My gut feeling is that parser & scanner are tightl
+ token_ = kDouble;
+ } else {
+ if (name_.size() > 2 && name_[0] == '0' && name_[1] == 'x') {
+ unsigned_value_ = strtoul(name_.c_str() + 2, &end, 16);
+ } else if (name_.size() > 1 && name_[0] == '0') {
+ unsigned_value_ = strtoul(name_.c_str() + 1, &end, 8);
+ } else {
+ double_value_ = strtod(name_.c_str(), &end);
+ unsigned_value_ = static_cast<uint32_t>(double_value_);
marja 2017/03/14 11:11:47 Why strtod if it's guaranteed to be an integer (no
bradn 2017/03/15 07:53:02 Asm.js uses 1e2 for 100 (as an integer :-) Added a
+ }
+ token_ = kUnsigned;
+ }
+ if (end != name_.c_str() + name_.size()) {
vogelheim 2017/03/14 13:36:37 I'm confused. When does this happen?
bradn 2017/03/15 07:53:03 When a number failed to parse, added a comment + e
+ // Handle mistaken parse of '.' as number.
marja 2017/03/14 11:11:47 How does this relate to the "Pick out dot" above?
bradn 2017/03/15 07:53:02 Reworded. The idea here is that if the number pars
+ if (name_[0] == '.') {
+ for (size_t k = 1; k < name_.size(); ++k) {
+ stream_->Back();
+ }
+ token_ = '.';
+ break;
+ }
+ token_ = kParseError;
+ return;
+ }
+ break;
+ } else {
+ token_ = ch;
+ break;
+ }
+ }
+}
+
+void AsmJsLexer::Rewind() {
+ DCHECK(!rewind_);
+ next_token_ = token_;
+ token_ = last_token_;
+ last_token_ = 0;
+ rewind_ = true;
vogelheim 2017/03/14 13:36:37 This doesn't update name_. Is this intentional?
vogelheim 2017/03/14 13:36:37 This doesn't update preceeded_by_newline_. Is this
bradn 2017/03/15 07:53:02 Clearing it for good measure here (didn't want to
bradn 2017/03/15 07:53:03 Clobbering for good measure here, also commented a
+}
+
+void AsmJsLexer::ResetLocals() { local_names_.clear(); }
+
+const char* AsmJsLexer::Name(token_t token) const {
+ // TODO(bradnelson): Make thread safe (and maybe debug only).
+ if (token >= 32 && token < 127) {
+ static char chname[2];
+ chname[0] = static_cast<char>(token);
vogelheim 2017/03/14 13:36:37 chname[1] = '\0' ??
bradn 2017/03/15 07:53:02 Done. Whoops.
+ return chname;
+ }
+ for (auto i = local_names_.begin(); i != local_names_.end(); ++i) {
vogelheim 2017/03/14 13:36:37 style nitpick: I'd use the for(auto& i : local_nam
bradn 2017/03/15 07:53:03 Done.
+ if (i->second == token) {
+ return i->first.c_str();
+ }
+ }
+ for (auto i = global_names_.begin(); i != global_names_.end(); ++i) {
+ if (i->second == token) {
+ return i->first.c_str();
+ }
+ }
+ for (auto i = property_names_.begin(); i != property_names_.end(); ++i) {
+ if (i->second == token) {
+ return i->first.c_str();
+ }
+ }
+ switch (token) {
+#define V(rawname, name) \
+ case kToken_##name: \
+ return rawname;
+ LONG_SYMBOL_NAME_LIST(V)
+#undef V
+ default:
+ break;
+ }
+ if (token == kUnsigned) {
+ return "{unsigned value}";
+ } else if (token == kDouble) {
+ return "{double value}";
+ } else if (token == kParseError) {
+ return "{parse error}";
+ } else if (token == kEndOfInput) {
+ return "{end of input}";
+ }
+ UNREACHABLE();
+ return "{unreachable}";
+}
+
+int AsmJsLexer::position() const { return static_cast<int>(stream_->pos()); }
+
+void AsmJsLexer::Seek(int pos) { stream_->Seek(pos); }
+
+} // namespace internal
+} // namespace v8

Powered by Google App Engine
This is Rietveld 408576698