Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Unified Diff: src/asmjs/asm-scanner.h

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)
Patch Set: fix Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/asmjs/asm-names.h ('k') | src/asmjs/asm-scanner.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/asmjs/asm-scanner.h
diff --git a/src/asmjs/asm-scanner.h b/src/asmjs/asm-scanner.h
new file mode 100644
index 0000000000000000000000000000000000000000..1e4b9f3d3f369e2994d6847a0be1f2a8eea53627
--- /dev/null
+++ b/src/asmjs/asm-scanner.h
@@ -0,0 +1,158 @@
+// Copyright 2017 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_ASMJS_ASM_SCANNER_H_
+#define V8_ASMJS_ASM_SCANNER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "src/asmjs/asm-names.h"
+#include "src/base/logging.h"
+#include "src/globals.h"
+
+namespace v8 {
+namespace internal {
+
+class Utf16CharacterStream;
+
+// A custom scanner to extract the token stream needed to parse valid
+// asm.js: http://asmjs.org/spec/latest/
+// This scanner intentionally avoids the portion of JavaScript lexing
+// that are not required to determine if code is valid asm.js code.
+// * Strings are disallowed except for 'use asm'.
+// * Only the subset of keywords needed to check asm.js invariants are
+// included.
+// * Identifiers are accumulated into local + global string tables
+// (for performance).
+class V8_EXPORT_PRIVATE AsmJsScanner {
+ public:
+ typedef int32_t token_t;
+
+ AsmJsScanner();
+ // Pick the stream to parse (must be called before anything else).
+ void SetStream(std::unique_ptr<Utf16CharacterStream> stream);
+
+ // Get current token.
+ token_t Token() const { return token_; }
+ // Advance to the next token.
+ void Next();
+ // Back up by one token.
+ void Rewind();
+ // Get raw string for current identifier.
+ const std::string& GetIdentifierString() const {
+ // Identifier strings don't work after a rewind.
+ DCHECK(!rewind_);
+ return identifier_string_;
+ }
+ // Check if we just passed a newline.
+ bool IsPrecededByNewline() const {
+ // Newline tracking doesn't work if you back up.
+ DCHECK(!rewind_);
+ return preceded_by_newline_;
+ }
+
+#if DEBUG
+ // Debug only method to go from a token back to its name.
+ // Slow, only use for debugging.
+ std::string Name(token_t token) const;
+#endif
+
+ // Get current position (to use with Seek).
+ int GetPosition() const;
+ // Restores old position (token after that position).
+ void Seek(int pos);
+
+ // Select whether identifiers are resolved in global or local scope,
+ // and which scope new identifiers are added to.
+ void EnterLocalScope() { in_local_scope_ = true; }
+ void EnterGlobalScope() { in_local_scope_ = false; }
+ // Drop all current local identifiers.
+ void ResetLocals();
+
+ // Methods to check if a token is an identifier and which scope.
+ bool IsLocal() const { return IsLocal(Token()); }
+ bool IsGlobal() const { return IsGlobal(Token()); }
+ static bool IsLocal(token_t token) { return token <= kLocalsStart; }
+ static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
+ // Methods to find the index position of an identifier (count starting from
+ // 0 for each scope separately).
+ static size_t LocalIndex(token_t token) {
+ DCHECK(IsLocal(token));
+ return -(token - kLocalsStart);
+ }
+ static size_t GlobalIndex(token_t token) {
+ DCHECK(IsGlobal(token));
+ return token - kGlobalsStart;
+ }
+
+ // Methods to check if the current token is an asm.js "number" (contains a
+ // dot) or an "unsigned" (a number without a dot).
+ bool IsUnsigned() const { return Token() == kUnsigned; }
+ uint64_t AsUnsigned() const { return unsigned_value_; }
+ bool IsDouble() const { return Token() == kDouble; }
+ double AsDouble() const { return double_value_; }
+
+ // clang-format off
+ enum {
+ // [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers
marja 2017/03/20 14:29:24 Nit: you probably meant: [-10000-kMaxIdentifierCou
+ // [-10000 .. -1) :: Builtin tokens like keywords
+ // (also includes some special
+ // ones like end of input)
+ // 0 .. 255 :: Single char tokens
+ // 256 .. 256+kMaxIdentifierCount :: Global identifiers
+ kLocalsStart = -10000,
+#define V(name, _junk1, _junk2, _junk3) kToken_##name,
+ STDLIB_MATH_FUNCTION_LIST(V)
+ STDLIB_ARRAY_TYPE_LIST(V)
+#undef V
+#define V(name) kToken_##name,
+ STDLIB_OTHER_LIST(V)
+ STDLIB_MATH_VALUE_LIST(V)
+ KEYWORD_NAME_LIST(V)
+#undef V
+#define V(rawname, name) kToken_##name,
+ LONG_SYMBOL_NAME_LIST(V)
+#undef V
+#define V(name, value, string_name) name = value,
+ SPECIAL_TOKEN_LIST(V)
+#undef V
+ kGlobalsStart = 256,
+ };
+ // clang-format on
+
+ private:
+ std::unique_ptr<Utf16CharacterStream> stream_;
+ token_t token_;
+ token_t preceding_token_;
+ token_t next_token_;
+ bool rewind_;
+ std::string identifier_string_;
+ bool in_local_scope_;
+ std::unordered_map<std::string, token_t> local_names_;
+ std::unordered_map<std::string, token_t> global_names_;
+ std::unordered_map<std::string, token_t> property_names_;
+ int global_count_;
+ double double_value_;
+ uint64_t unsigned_value_;
+ bool preceded_by_newline_;
+
+ // Consume multiple characters.
+ void ConsumeIdentifier(uc32 ch);
+ void ConsumeNumber(uc32 ch);
+ bool ConsumeCComment();
+ void ConsumeCPPComment();
+ void ConsumeString(uc32 quote);
+ void ConsumeCompareOrShift(uc32 ch);
+
+ // Classify character categories.
+ bool IsIdentifierStart(uc32 ch);
+ bool IsIdentifierPart(uc32 ch);
+ bool IsNumberStart(uc32 ch);
+};
+
+} // namespace internal
+} // namespace v8
+#endif
« no previous file with comments | « src/asmjs/asm-names.h ('k') | src/asmjs/asm-scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698