OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2017 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef V8_ASMJS_ASM_SCANNER_H_ | |
6 #define V8_ASMJS_ASM_SCANNER_H_ | |
7 | |
8 #include <memory> | |
9 #include <string> | |
10 #include <unordered_map> | |
11 | |
12 #include "src/asmjs/asm-names.h" | |
13 #include "src/base/logging.h" | |
14 #include "src/globals.h" | |
15 | |
16 namespace v8 { | |
17 namespace internal { | |
18 | |
19 class Utf16CharacterStream; | |
20 | |
21 // A custom scanner to extract the token stream needed to parse valid | |
22 // asm.js: http://asmjs.org/spec/latest/ | |
23 // This scanner intentionally avoids the portion of JavaScript lexing | |
24 // that are not required to determine if code is valid asm.js code. | |
25 // * Strings are disallowed except for 'use asm'. | |
26 // * Only the subset of keywords needed to check asm.js invariants are | |
27 // included. | |
28 // * Identifiers are accumulated into local + global string tables | |
29 // (for performance). | |
30 class AsmJsScanner { | |
31 public: | |
32 typedef int32_t token_t; | |
33 | |
34 AsmJsScanner(); | |
35 // Pick the stream to parse (must be called before anything else). | |
36 void SetStream(std::unique_ptr<Utf16CharacterStream> stream); | |
37 | |
38 // Get current token. | |
39 token_t Token() const { return token_; } | |
40 // Advance to the next token. | |
41 void Next(); | |
42 // Back up by one token. | |
43 void Rewind(); | |
44 // Get raw string for current indentifier. | |
vogelheim
2017/03/16 12:46:48
indentifier -> identifier
Unless it's a string th
bradn
2017/03/16 17:03:15
Done.
| |
45 const std::string& GetIdentifierString() const { | |
46 // Identifier strings don't work after a rewind. | |
47 DCHECK(!rewind_); | |
48 return identifier_string_; | |
49 } | |
50 // Check if we just passed a newline. | |
51 bool IsPrecededByNewline() const { | |
52 // Newline tracking doesn't work if you back up. | |
53 DCHECK(!rewind_); | |
54 return preceded_by_newline_; | |
55 } | |
56 | |
57 #if DEBUG | |
58 // Debug only method to go from a token back to its name. | |
59 // Slow, only use for debugging. | |
60 std::string Name(token_t token) const; | |
61 #endif | |
62 | |
63 // Get current position (to use with Seek). | |
64 int GetPosition() const; | |
65 // Restores old position (token after that position). | |
66 void Seek(int pos); | |
67 | |
68 // Select whether identifiers are resolved in global or local scope, | |
69 // and which scope new identifiers are added to. | |
70 void EnterLocalScope() { in_local_scope_ = true; } | |
71 void EnterGlobalScope() { in_local_scope_ = false; } | |
72 // Drop all current local identifiers. | |
73 void ResetLocals(); | |
74 | |
75 // Methods to check if a token is an identifier and which scope. | |
76 bool IsLocal() const { return IsLocal(Token()); } | |
77 bool IsGlobal() const { return IsGlobal(Token()); } | |
78 static bool IsLocal(token_t token) { return token <= kLocalsStart; } | |
79 static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } | |
80 // Methods to find the index position of an identifier (count starting from | |
81 // 0 for each scope separately). | |
82 static size_t LocalIndex(token_t token) { | |
83 DCHECK(IsLocal(token)); | |
84 return -(token - kLocalsStart); | |
85 } | |
86 static size_t GlobalIndex(token_t token) { | |
87 DCHECK(IsGlobal(token)); | |
88 return token - kGlobalsStart; | |
89 } | |
90 | |
91 // Methods to check if the current token is an asm.js "number" (contains a | |
92 // dot) or an "unsigned" (a number without a dot). | |
93 bool IsUnsigned() const { return Token() == kUnsigned; } | |
94 uint64_t AsUnsigned() const { return unsigned_value_; } | |
95 bool IsDouble() const { return Token() == kDouble; } | |
96 double AsDouble() const { return double_value_; } | |
97 | |
98 // clang-format off | |
99 enum { | |
100 // [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers | |
101 // [-10000 .. -1) :: Builtin tokens like keywords | |
102 // (also includes some special | |
103 // ones like end of input) | |
104 // 0 .. 255 :: Single char tokens | |
105 // 256 .. 256+kMaxIdentifierCount :: Global identifiers | |
106 kLocalsStart = -10000, | |
107 #define V(name, _junk1, _junk2, _junk3) kToken_##name, | |
108 STDLIB_MATH_FUNCTION_LIST(V) | |
109 STDLIB_ARRAY_TYPE_LIST(V) | |
110 #undef V | |
111 #define V(name) kToken_##name, | |
112 STDLIB_OTHER_LIST(V) | |
113 STDLIB_MATH_VALUE_LIST(V) | |
114 KEYWORD_NAME_LIST(V) | |
115 #undef V | |
116 #define V(rawname, name) kToken_##name, | |
117 LONG_SYMBOL_NAME_LIST(V) | |
118 #undef V | |
119 #define V(name, value, string_name) name = value, | |
120 SPECIAL_TOKEN_LIST(V) | |
121 #undef V | |
122 kGlobalsStart = 256, | |
123 }; | |
124 // clang-format on | |
125 | |
126 private: | |
127 std::unique_ptr<Utf16CharacterStream> stream_; | |
128 token_t token_; | |
129 token_t preceding_token_; | |
130 token_t next_token_; | |
131 bool rewind_; | |
132 std::string identifier_string_; | |
133 bool in_local_scope_; | |
134 std::unordered_map<std::string, token_t> local_names_; | |
135 std::unordered_map<std::string, token_t> global_names_; | |
136 std::unordered_map<std::string, token_t> property_names_; | |
137 int global_count_; | |
138 double double_value_; | |
139 uint64_t unsigned_value_; | |
140 bool preceded_by_newline_; | |
141 | |
142 // Consume multiple characters. | |
143 void ConsumeIdentifier(uc32 ch); | |
144 void ConsumeNumber(uc32 ch); | |
145 bool ConsumeCComment(); | |
146 void ConsumeCPPComment(); | |
147 void ConsumeString(uc32 quote); | |
148 void ConsumeCompareOrShift(uc32 ch); | |
149 | |
150 // Classify character categories. | |
151 bool IsIdentifierStart(uc32 ch); | |
152 bool IsIdentifierPart(uc32 ch); | |
153 bool IsNumberStart(uc32 ch); | |
154 }; | |
155 | |
156 } // namespace internal | |
157 } // namespace v8 | |
158 #endif | |
OLD | NEW |