Chromium Code Reviews| Index: src/asmjs/asm-scanner.h |
| diff --git a/src/asmjs/asm-scanner.h b/src/asmjs/asm-scanner.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..1e4b9f3d3f369e2994d6847a0be1f2a8eea53627 |
| --- /dev/null |
| +++ b/src/asmjs/asm-scanner.h |
| @@ -0,0 +1,158 @@ |
| +// Copyright 2017 the V8 project authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef V8_ASMJS_ASM_SCANNER_H_ |
| +#define V8_ASMJS_ASM_SCANNER_H_ |
| + |
| +#include <memory> |
| +#include <string> |
| +#include <unordered_map> |
| + |
| +#include "src/asmjs/asm-names.h" |
| +#include "src/base/logging.h" |
| +#include "src/globals.h" |
| + |
| +namespace v8 { |
| +namespace internal { |
| + |
| +class Utf16CharacterStream; |
| + |
| +// A custom scanner to extract the token stream needed to parse valid |
| +// asm.js: http://asmjs.org/spec/latest/ |
| +// This scanner intentionally avoids the portion of JavaScript lexing |
| +// that are not required to determine if code is valid asm.js code. |
| +// * Strings are disallowed except for 'use asm'. |
| +// * Only the subset of keywords needed to check asm.js invariants are |
| +// included. |
| +// * Identifiers are accumulated into local + global string tables |
| +// (for performance). |
| +class V8_EXPORT_PRIVATE AsmJsScanner { |
| + public: |
| + typedef int32_t token_t; |
| + |
| + AsmJsScanner(); |
| + // Pick the stream to parse (must be called before anything else). |
| + void SetStream(std::unique_ptr<Utf16CharacterStream> stream); |
| + |
| + // Get current token. |
| + token_t Token() const { return token_; } |
| + // Advance to the next token. |
| + void Next(); |
| + // Back up by one token. |
| + void Rewind(); |
| + // Get raw string for current identifier. |
| + const std::string& GetIdentifierString() const { |
| + // Identifier strings don't work after a rewind. |
| + DCHECK(!rewind_); |
| + return identifier_string_; |
| + } |
| + // Check if we just passed a newline. |
| + bool IsPrecededByNewline() const { |
| + // Newline tracking doesn't work if you back up. |
| + DCHECK(!rewind_); |
| + return preceded_by_newline_; |
| + } |
| + |
| +#if DEBUG |
| + // Debug only method to go from a token back to its name. |
| + // Slow, only use for debugging. |
| + std::string Name(token_t token) const; |
| +#endif |
| + |
| + // Get current position (to use with Seek). |
| + int GetPosition() const; |
| + // Restores old position (token after that position). |
| + void Seek(int pos); |
| + |
| + // Select whether identifiers are resolved in global or local scope, |
| + // and which scope new identifiers are added to. |
| + void EnterLocalScope() { in_local_scope_ = true; } |
| + void EnterGlobalScope() { in_local_scope_ = false; } |
| + // Drop all current local identifiers. |
| + void ResetLocals(); |
| + |
| + // Methods to check if a token is an identifier and which scope. |
| + bool IsLocal() const { return IsLocal(Token()); } |
| + bool IsGlobal() const { return IsGlobal(Token()); } |
| + static bool IsLocal(token_t token) { return token <= kLocalsStart; } |
| + static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } |
| + // Methods to find the index position of an identifier (count starting from |
| + // 0 for each scope separately). |
| + static size_t LocalIndex(token_t token) { |
| + DCHECK(IsLocal(token)); |
| + return -(token - kLocalsStart); |
| + } |
| + static size_t GlobalIndex(token_t token) { |
| + DCHECK(IsGlobal(token)); |
| + return token - kGlobalsStart; |
| + } |
| + |
| + // Methods to check if the current token is an asm.js "number" (contains a |
| + // dot) or an "unsigned" (a number without a dot). |
| + bool IsUnsigned() const { return Token() == kUnsigned; } |
| + uint64_t AsUnsigned() const { return unsigned_value_; } |
| + bool IsDouble() const { return Token() == kDouble; } |
| + double AsDouble() const { return double_value_; } |
| + |
| + // clang-format off |
| + enum { |
| + // [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers |
|
marja
2017/03/20 14:29:24
Nit: you probably meant:
[-10000-kMaxIdentifierCou
|
| + // [-10000 .. -1) :: Builtin tokens like keywords |
| + // (also includes some special |
| + // ones like end of input) |
| + // 0 .. 255 :: Single char tokens |
| + // 256 .. 256+kMaxIdentifierCount :: Global identifiers |
| + kLocalsStart = -10000, |
| +#define V(name, _junk1, _junk2, _junk3) kToken_##name, |
| + STDLIB_MATH_FUNCTION_LIST(V) |
| + STDLIB_ARRAY_TYPE_LIST(V) |
| +#undef V |
| +#define V(name) kToken_##name, |
| + STDLIB_OTHER_LIST(V) |
| + STDLIB_MATH_VALUE_LIST(V) |
| + KEYWORD_NAME_LIST(V) |
| +#undef V |
| +#define V(rawname, name) kToken_##name, |
| + LONG_SYMBOL_NAME_LIST(V) |
| +#undef V |
| +#define V(name, value, string_name) name = value, |
| + SPECIAL_TOKEN_LIST(V) |
| +#undef V |
| + kGlobalsStart = 256, |
| + }; |
| + // clang-format on |
| + |
| + private: |
| + std::unique_ptr<Utf16CharacterStream> stream_; |
| + token_t token_; |
| + token_t preceding_token_; |
| + token_t next_token_; |
| + bool rewind_; |
| + std::string identifier_string_; |
| + bool in_local_scope_; |
| + std::unordered_map<std::string, token_t> local_names_; |
| + std::unordered_map<std::string, token_t> global_names_; |
| + std::unordered_map<std::string, token_t> property_names_; |
| + int global_count_; |
| + double double_value_; |
| + uint64_t unsigned_value_; |
| + bool preceded_by_newline_; |
| + |
| + // Consume multiple characters. |
| + void ConsumeIdentifier(uc32 ch); |
| + void ConsumeNumber(uc32 ch); |
| + bool ConsumeCComment(); |
| + void ConsumeCPPComment(); |
| + void ConsumeString(uc32 quote); |
| + void ConsumeCompareOrShift(uc32 ch); |
| + |
| + // Classify character categories. |
| + bool IsIdentifierStart(uc32 ch); |
| + bool IsIdentifierPart(uc32 ch); |
| + bool IsNumberStart(uc32 ch); |
| +}; |
| + |
| +} // namespace internal |
| +} // namespace v8 |
| +#endif |