OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2017 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "src/asmjs/asm-scanner.h" | |
6 | |
7 #include "src/conversions.h" | |
8 #include "src/flags.h" | |
9 #include "src/parsing/scanner.h" | |
10 #include "src/unicode-cache.h" | |
11 | |
12 namespace v8 { | |
13 namespace internal { | |
14 | |
15 namespace { | |
16 // Cap number of identifiers to ensure we can assign both global and | |
17 // local ones a token id in the range of an int32_t. | |
18 static const int kMaxIdentifierCount = 0xf000000; | |
19 }; | |
20 | |
21 AsmJsScanner::AsmJsScanner() | |
22 : token_(kUninitialized), | |
23 preceding_token_(kUninitialized), | |
24 next_token_(kUninitialized), | |
25 rewind_(false), | |
26 in_local_scope_(false), | |
27 global_count_(0), | |
28 double_value_(0.0), | |
29 unsigned_value_(0), | |
30 preceded_by_newline_(false) { | |
31 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; | |
32 STDLIB_MATH_FUNCTION_LIST(V) | |
33 STDLIB_ARRAY_TYPE_LIST(V) | |
34 #undef V | |
35 #define V(name) property_names_[#name] = kToken_##name; | |
36 STDLIB_MATH_VALUE_LIST(V) | |
37 STDLIB_OTHER_LIST(V) | |
38 #undef V | |
39 #define V(name) global_names_[#name] = kToken_##name; | |
40 KEYWORD_NAME_LIST(V) | |
41 #undef V | |
42 } | |
43 | |
44 void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) { | |
45 stream_ = std::move(stream); | |
46 Next(); | |
47 } | |
48 | |
49 void AsmJsScanner::Next() { | |
50 if (rewind_) { | |
51 preceding_token_ = token_; | |
52 token_ = next_token_; | |
53 next_token_ = kUninitialized; | |
54 rewind_ = false; | |
55 return; | |
56 } | |
57 | |
58 if (token_ == kEndOfInput || token_ == kParseError) { | |
59 return; | |
60 } | |
61 | |
62 #if DEBUG | |
63 if (FLAG_trace_asm_scanner) { | |
64 if (Token() == kDouble) { | |
65 PrintF("%lf ", AsDouble()); | |
66 } else if (Token() == kUnsigned) { | |
67 PrintF("%" PRIu64 " ", AsUnsigned()); | |
68 } else { | |
69 std::string name = Name(Token()); | |
70 PrintF("%s ", name.c_str()); | |
71 } | |
72 } | |
73 #endif | |
74 | |
75 preceded_by_newline_ = false; | |
76 preceding_token_ = token_; | |
77 for (;;) { | |
78 uc32 ch = stream_->Advance(); | |
79 switch (ch) { | |
80 case ' ': | |
81 case '\t': | |
82 case '\r': | |
83 // Ignore whitespace. | |
84 break; | |
85 | |
86 case '\n': | |
87 // Track when we've passed a newline for optional semicolon support, | |
88 // but keep scanning. | |
89 preceded_by_newline_ = true; | |
90 break; | |
91 | |
92 case kEndOfInput: | |
93 token_ = kEndOfInput; | |
94 return; | |
95 | |
96 case '\'': | |
97 case '"': | |
98 ConsumeString(ch); | |
99 return; | |
100 | |
101 case '/': | |
102 ch = stream_->Advance(); | |
103 if (ch == '/') { | |
104 ConsumeCPPComment(); | |
105 } else if (ch == '*') { | |
106 if (!ConsumeCComment()) { | |
107 token_ = kParseError; | |
108 return; | |
109 } | |
110 } else { | |
111 stream_->Back(); | |
112 token_ = '/'; | |
113 return; | |
114 } | |
115 // Breaks out of switch, but loops again (i.e. the case when we parsed | |
116 // a comment, but need to continue to look for the next token). | |
117 break; | |
118 | |
119 case '<': | |
120 case '>': | |
121 case '=': | |
122 case '!': | |
123 ConsumeCompareOrShift(ch); | |
124 return; | |
125 | |
126 #define V(single_char_token) case single_char_token: | |
127 SIMPLE_SINGLE_TOKEN_LIST(V) | |
128 #undef V | |
129 // Use fixed token IDs for ASCII. | |
130 token_ = ch; | |
131 return; | |
132 | |
133 default: | |
134 if (IsIdentifierStart(ch)) { | |
135 ConsumeIdentifier(ch); | |
136 } else if (IsNumberStart(ch)) { | |
137 ConsumeNumber(ch); | |
138 } else { | |
139 // TODO(bradnelson): Support unicode (probably via UnicodeCache). | |
140 token_ = kParseError; | |
141 } | |
142 return; | |
143 } | |
144 } | |
145 } | |
146 | |
147 void AsmJsScanner::Rewind() { | |
148 DCHECK(!rewind_); | |
149 next_token_ = token_; | |
150 token_ = preceding_token_; | |
151 preceding_token_ = kUninitialized; | |
152 rewind_ = true; | |
153 preceded_by_newline_ = false; | |
154 identifier_string_.clear(); | |
155 } | |
156 | |
157 void AsmJsScanner::ResetLocals() { local_names_.clear(); } | |
158 | |
159 #if DEBUG | |
160 // Only used for debugging. | |
161 std::string AsmJsScanner::Name(token_t token) const { | |
162 if (token >= 32 && token < 127) { | |
163 return std::string(1, static_cast<char>(token)); | |
164 } | |
165 for (auto& i : local_names_) { | |
166 if (i.second == token) { | |
167 return i.first; | |
168 } | |
169 } | |
170 for (auto& i : global_names_) { | |
171 if (i.second == token) { | |
172 return i.first; | |
173 } | |
174 } | |
175 for (auto& i : property_names_) { | |
176 if (i.second == token) { | |
177 return i.first; | |
178 } | |
179 } | |
180 switch (token) { | |
181 #define V(rawname, name) \ | |
182 case kToken_##name: \ | |
183 return rawname; | |
184 LONG_SYMBOL_NAME_LIST(V) | |
185 #undef V | |
186 #define V(name, value, string_name) \ | |
187 case name: \ | |
188 return string_name; | |
189 SPECIAL_TOKEN_LIST(V) | |
190 default: | |
191 break; | |
192 } | |
193 UNREACHABLE(); | |
194 return "{unreachable}"; | |
195 } | |
196 #endif | |
197 | |
198 int AsmJsScanner::GetPosition() const { | |
199 DCHECK(!rewind_); | |
200 return static_cast<int>(stream_->pos()); | |
201 } | |
202 | |
203 void AsmJsScanner::Seek(int pos) { | |
204 stream_->Seek(pos); | |
205 preceding_token_ = kUninitialized; | |
206 token_ = kUninitialized; | |
207 next_token_ = kUninitialized; | |
208 rewind_ = false; | |
209 Next(); | |
210 } | |
211 | |
212 void AsmJsScanner::ConsumeIdentifier(uc32 ch) { | |
213 // Consume characters while still part of the identifier. | |
214 identifier_string_.clear(); | |
215 while (IsIdentifierPart(ch)) { | |
216 identifier_string_ += ch; | |
217 ch = stream_->Advance(); | |
218 } | |
219 // Go back one for next time. | |
220 stream_->Back(); | |
221 | |
222 // Decode what the identifier means. | |
223 if (preceding_token_ == '.') { | |
224 auto i = property_names_.find(identifier_string_); | |
225 if (i != property_names_.end()) { | |
226 token_ = i->second; | |
227 return; | |
228 } | |
229 } else { | |
230 { | |
231 auto i = local_names_.find(identifier_string_); | |
232 if (i != local_names_.end()) { | |
233 token_ = i->second; | |
234 return; | |
235 } | |
236 } | |
237 if (!in_local_scope_) { | |
238 auto i = global_names_.find(identifier_string_); | |
239 if (i != global_names_.end()) { | |
240 token_ = i->second; | |
241 return; | |
242 } | |
243 } | |
244 } | |
245 if (preceding_token_ == '.') { | |
246 CHECK(global_count_ < kMaxIdentifierCount); | |
247 token_ = kGlobalsStart + global_count_++; | |
248 property_names_[identifier_string_] = token_; | |
249 } else if (in_local_scope_) { | |
250 CHECK(local_names_.size() < kMaxIdentifierCount); | |
251 token_ = kLocalsStart - static_cast<token_t>(local_names_.size()); | |
252 local_names_[identifier_string_] = token_; | |
253 } else { | |
254 CHECK(global_count_ < kMaxIdentifierCount); | |
255 token_ = kGlobalsStart + global_count_++; | |
256 global_names_[identifier_string_] = token_; | |
257 } | |
258 } | |
259 | |
260 void AsmJsScanner::ConsumeNumber(uc32 ch) { | |
261 std::string number; | |
262 number = ch; | |
263 bool has_dot = ch == '.'; | |
264 for (;;) { | |
265 ch = stream_->Advance(); | |
266 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || | |
267 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' || | |
268 ch == 'x' || | |
269 ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' || | |
270 number[number.size() - 1] == 'E'))) { | |
271 // TODO(bradnelson): Test weird cases ending in -. | |
272 if (ch == '.') { | |
273 has_dot = true; | |
274 } | |
275 number.push_back(ch); | |
276 } else { | |
277 break; | |
278 } | |
279 } | |
280 stream_->Back(); | |
281 // Special case the most common number. | |
282 if (number.size() == 1 && number[0] == '0') { | |
283 unsigned_value_ = 0; | |
284 token_ = kUnsigned; | |
285 return; | |
286 } | |
287 // Pick out dot. | |
288 if (number.size() == 1 && number[0] == '.') { | |
289 token_ = '.'; | |
290 return; | |
291 } | |
292 // Decode numbers. | |
293 UnicodeCache cache; | |
294 double_value_ = StringToDouble( | |
295 &cache, | |
296 Vector<uint8_t>( | |
297 const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())), | |
298 number.size()), | |
299 ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL); | |
300 if (std::isnan(double_value_)) { | |
301 // Check if string to number conversion didn't consume all the characters. | |
302 // This happens if the character filter let through something invalid | |
303 // like: 0123ef for example. | |
304 // TODO(bradnelson): Check if this happens often enough to be a perf | |
305 // problem. | |
306 if (number[0] == '.') { | |
307 for (size_t k = 1; k < number.size(); ++k) { | |
308 stream_->Back(); | |
309 } | |
310 token_ = '.'; | |
311 return; | |
312 } | |
313 // Anything else that doesn't parse is an error. | |
314 token_ = kParseError; | |
315 return; | |
316 } | |
317 if (has_dot) { | |
318 token_ = kDouble; | |
319 } else { | |
320 unsigned_value_ = static_cast<uint32_t>(double_value_); | |
321 token_ = kUnsigned; | |
322 } | |
323 } | |
324 | |
325 bool AsmJsScanner::ConsumeCComment() { | |
326 for (;;) { | |
327 uc32 ch = stream_->Advance(); | |
328 while (ch == '*') { | |
329 ch = stream_->Advance(); | |
330 if (ch == '/') { | |
331 return true; | |
332 } | |
333 if (ch == kEndOfInput) { | |
vogelheim
2017/03/16 12:46:47
I think you can just drop this if.
If ch is kEndO
bradn
2017/03/16 17:03:15
Done.
| |
334 return false; | |
335 } | |
336 } | |
337 if (ch == kEndOfInput) { | |
338 return false; | |
339 } | |
340 } | |
341 } | |
342 | |
343 void AsmJsScanner::ConsumeCPPComment() { | |
344 for (;;) { | |
345 uc32 ch = stream_->Advance(); | |
346 if (ch == '\n' || ch == kEndOfInput) { | |
347 return; | |
348 } | |
349 } | |
350 } | |
351 | |
352 void AsmJsScanner::ConsumeString(uc32 quote) { | |
353 // Only string allowed is 'use asm' / "use asm". | |
354 const char* expected = "use asm"; | |
355 for (; *expected != '\0'; ++expected) { | |
356 if (stream_->Advance() != *expected) { | |
357 token_ = kParseError; | |
358 return; | |
359 } | |
360 } | |
361 if (stream_->Advance() != quote) { | |
362 token_ = kParseError; | |
363 return; | |
364 } | |
365 token_ = kToken_UseAsm; | |
366 } | |
367 | |
368 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) { | |
369 uc32 next_ch = stream_->Advance(); | |
370 if (next_ch == '=') { | |
371 switch (ch) { | |
372 case '<': | |
373 token_ = kToken_LE; | |
374 break; | |
375 case '>': | |
376 token_ = kToken_GE; | |
377 break; | |
378 case '=': | |
379 token_ = kToken_EQ; | |
380 break; | |
381 case '!': | |
382 token_ = kToken_NE; | |
383 break; | |
384 default: | |
385 UNREACHABLE(); | |
386 } | |
387 } else if (ch == '<' && next_ch == '<') { | |
388 token_ = kToken_SHL; | |
389 } else if (ch == '>' && next_ch == '>') { | |
390 if (stream_->Advance() == '>') { | |
391 token_ = kToken_SHR; | |
392 } else { | |
393 token_ = kToken_SAR; | |
394 stream_->Back(); | |
395 } | |
396 } else { | |
397 stream_->Back(); | |
398 token_ = ch; | |
399 } | |
400 } | |
401 | |
402 bool AsmJsScanner::IsIdentifierStart(uc32 ch) { | |
403 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || | |
404 ch == '$'; | |
405 } | |
406 | |
407 bool AsmJsScanner::IsIdentifierPart(uc32 ch) { | |
408 return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9'); | |
409 } | |
410 | |
411 bool AsmJsScanner::IsNumberStart(uc32 ch) { | |
412 return ch == '.' || (ch >= '0' && ch <= '9'); | |
413 } | |
414 | |
415 } // namespace internal | |
416 } // namespace v8 | |
OLD | NEW |