Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: src/asmjs/asm-scanner.cc

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)
Patch Set: fix Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/asmjs/asm-scanner.h"
6
7 #include "src/conversions.h"
8 #include "src/flags.h"
9 #include "src/parsing/scanner.h"
10 #include "src/unicode-cache.h"
11
12 namespace v8 {
13 namespace internal {
14
15 namespace {
16 // Cap number of identifiers to ensure we can assign both global and
17 // local ones a token id in the range of an int32_t.
18 static const int kMaxIdentifierCount = 0xf000000;
19 };
20
21 AsmJsScanner::AsmJsScanner()
22 : token_(kUninitialized),
23 preceding_token_(kUninitialized),
24 next_token_(kUninitialized),
25 rewind_(false),
26 in_local_scope_(false),
27 global_count_(0),
28 double_value_(0.0),
29 unsigned_value_(0),
30 preceded_by_newline_(false) {
31 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
32 STDLIB_MATH_FUNCTION_LIST(V)
33 STDLIB_ARRAY_TYPE_LIST(V)
34 #undef V
35 #define V(name) property_names_[#name] = kToken_##name;
36 STDLIB_MATH_VALUE_LIST(V)
37 STDLIB_OTHER_LIST(V)
38 #undef V
39 #define V(name) global_names_[#name] = kToken_##name;
40 KEYWORD_NAME_LIST(V)
41 #undef V
42 }
43
44 void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) {
45 stream_ = std::move(stream);
46 Next();
47 }
48
49 void AsmJsScanner::Next() {
50 if (rewind_) {
51 preceding_token_ = token_;
52 token_ = next_token_;
53 next_token_ = kUninitialized;
54 rewind_ = false;
55 return;
56 }
57
58 if (token_ == kEndOfInput || token_ == kParseError) {
59 return;
60 }
61
62 #if DEBUG
63 if (FLAG_trace_asm_scanner) {
64 if (Token() == kDouble) {
65 PrintF("%lf ", AsDouble());
66 } else if (Token() == kUnsigned) {
67 PrintF("%" PRIu64 " ", AsUnsigned());
68 } else {
69 std::string name = Name(Token());
70 PrintF("%s ", name.c_str());
71 }
72 }
73 #endif
74
75 preceded_by_newline_ = false;
76 preceding_token_ = token_;
77 for (;;) {
78 uc32 ch = stream_->Advance();
79 switch (ch) {
80 case ' ':
81 case '\t':
82 case '\r':
83 // Ignore whitespace.
84 break;
85
86 case '\n':
87 // Track when we've passed a newline for optional semicolon support,
88 // but keep scanning.
89 preceded_by_newline_ = true;
90 break;
91
92 case kEndOfInput:
93 token_ = kEndOfInput;
94 return;
95
96 case '\'':
97 case '"':
98 ConsumeString(ch);
99 return;
100
101 case '/':
102 ch = stream_->Advance();
103 if (ch == '/') {
104 ConsumeCPPComment();
105 } else if (ch == '*') {
106 if (!ConsumeCComment()) {
107 token_ = kParseError;
108 return;
109 }
110 } else {
111 stream_->Back();
112 token_ = '/';
113 return;
114 }
115 // Breaks out of switch, but loops again (i.e. the case when we parsed
116 // a comment, but need to continue to look for the next token).
117 break;
118
119 case '<':
120 case '>':
121 case '=':
122 case '!':
123 ConsumeCompareOrShift(ch);
124 return;
125
126 #define V(single_char_token) case single_char_token:
127 SIMPLE_SINGLE_TOKEN_LIST(V)
128 #undef V
129 // Use fixed token IDs for ASCII.
130 token_ = ch;
131 return;
132
133 default:
134 if (IsIdentifierStart(ch)) {
135 ConsumeIdentifier(ch);
136 } else if (IsNumberStart(ch)) {
137 ConsumeNumber(ch);
138 } else {
139 // TODO(bradnelson): Support unicode (probably via UnicodeCache).
140 token_ = kParseError;
141 }
142 return;
143 }
144 }
145 }
146
147 void AsmJsScanner::Rewind() {
148 DCHECK(!rewind_);
149 next_token_ = token_;
150 token_ = preceding_token_;
151 preceding_token_ = kUninitialized;
152 rewind_ = true;
153 preceded_by_newline_ = false;
154 identifier_string_.clear();
155 }
156
157 void AsmJsScanner::ResetLocals() { local_names_.clear(); }
158
159 #if DEBUG
160 // Only used for debugging.
161 std::string AsmJsScanner::Name(token_t token) const {
162 if (token >= 32 && token < 127) {
163 return std::string(1, static_cast<char>(token));
164 }
165 for (auto& i : local_names_) {
166 if (i.second == token) {
167 return i.first;
168 }
169 }
170 for (auto& i : global_names_) {
171 if (i.second == token) {
172 return i.first;
173 }
174 }
175 for (auto& i : property_names_) {
176 if (i.second == token) {
177 return i.first;
178 }
179 }
180 switch (token) {
181 #define V(rawname, name) \
182 case kToken_##name: \
183 return rawname;
184 LONG_SYMBOL_NAME_LIST(V)
185 #undef V
186 #define V(name, value, string_name) \
187 case name: \
188 return string_name;
189 SPECIAL_TOKEN_LIST(V)
190 default:
191 break;
192 }
193 UNREACHABLE();
194 return "{unreachable}";
195 }
196 #endif
197
198 int AsmJsScanner::GetPosition() const {
199 DCHECK(!rewind_);
200 return static_cast<int>(stream_->pos());
201 }
202
203 void AsmJsScanner::Seek(int pos) {
204 stream_->Seek(pos);
205 preceding_token_ = kUninitialized;
206 token_ = kUninitialized;
207 next_token_ = kUninitialized;
208 rewind_ = false;
209 Next();
210 }
211
212 void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
213 // Consume characters while still part of the identifier.
214 identifier_string_.clear();
215 while (IsIdentifierPart(ch)) {
216 identifier_string_ += ch;
217 ch = stream_->Advance();
218 }
219 // Go back one for next time.
220 stream_->Back();
221
222 // Decode what the identifier means.
223 if (preceding_token_ == '.') {
224 auto i = property_names_.find(identifier_string_);
225 if (i != property_names_.end()) {
226 token_ = i->second;
227 return;
228 }
229 } else {
230 {
231 auto i = local_names_.find(identifier_string_);
232 if (i != local_names_.end()) {
233 token_ = i->second;
234 return;
235 }
236 }
237 if (!in_local_scope_) {
238 auto i = global_names_.find(identifier_string_);
239 if (i != global_names_.end()) {
240 token_ = i->second;
241 return;
242 }
243 }
244 }
245 if (preceding_token_ == '.') {
246 CHECK(global_count_ < kMaxIdentifierCount);
247 token_ = kGlobalsStart + global_count_++;
248 property_names_[identifier_string_] = token_;
249 } else if (in_local_scope_) {
250 CHECK(local_names_.size() < kMaxIdentifierCount);
251 token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
252 local_names_[identifier_string_] = token_;
253 } else {
254 CHECK(global_count_ < kMaxIdentifierCount);
255 token_ = kGlobalsStart + global_count_++;
256 global_names_[identifier_string_] = token_;
257 }
258 }
259
260 void AsmJsScanner::ConsumeNumber(uc32 ch) {
261 std::string number;
262 number = ch;
263 bool has_dot = ch == '.';
264 for (;;) {
265 ch = stream_->Advance();
266 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
267 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
268 ch == 'x' ||
269 ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' ||
270 number[number.size() - 1] == 'E'))) {
271 // TODO(bradnelson): Test weird cases ending in -.
272 if (ch == '.') {
273 has_dot = true;
274 }
275 number.push_back(ch);
276 } else {
277 break;
278 }
279 }
280 stream_->Back();
281 // Special case the most common number.
282 if (number.size() == 1 && number[0] == '0') {
283 unsigned_value_ = 0;
284 token_ = kUnsigned;
285 return;
286 }
287 // Pick out dot.
288 if (number.size() == 1 && number[0] == '.') {
289 token_ = '.';
290 return;
291 }
292 // Decode numbers.
293 UnicodeCache cache;
294 double_value_ = StringToDouble(
295 &cache,
296 Vector<uint8_t>(
297 const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())),
298 static_cast<int>(number.size())),
299 ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
300 if (std::isnan(double_value_)) {
301 // Check if string to number conversion didn't consume all the characters.
302 // This happens if the character filter let through something invalid
303 // like: 0123ef for example.
304 // TODO(bradnelson): Check if this happens often enough to be a perf
305 // problem.
306 if (number[0] == '.') {
307 for (size_t k = 1; k < number.size(); ++k) {
308 stream_->Back();
309 }
310 token_ = '.';
311 return;
312 }
313 // Anything else that doesn't parse is an error.
314 token_ = kParseError;
315 return;
316 }
317 if (has_dot) {
318 token_ = kDouble;
319 } else {
320 unsigned_value_ = static_cast<uint32_t>(double_value_);
321 token_ = kUnsigned;
322 }
323 }
324
325 bool AsmJsScanner::ConsumeCComment() {
326 for (;;) {
327 uc32 ch = stream_->Advance();
328 while (ch == '*') {
329 ch = stream_->Advance();
330 if (ch == '/') {
331 return true;
332 }
333 }
334 if (ch == kEndOfInput) {
335 return false;
336 }
337 }
338 }
339
340 void AsmJsScanner::ConsumeCPPComment() {
341 for (;;) {
342 uc32 ch = stream_->Advance();
343 if (ch == '\n' || ch == kEndOfInput) {
344 return;
345 }
346 }
347 }
348
349 void AsmJsScanner::ConsumeString(uc32 quote) {
350 // Only string allowed is 'use asm' / "use asm".
351 const char* expected = "use asm";
352 for (; *expected != '\0'; ++expected) {
353 if (stream_->Advance() != *expected) {
354 token_ = kParseError;
355 return;
356 }
357 }
358 if (stream_->Advance() != quote) {
359 token_ = kParseError;
360 return;
361 }
362 token_ = kToken_UseAsm;
363 }
364
365 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
366 uc32 next_ch = stream_->Advance();
367 if (next_ch == '=') {
368 switch (ch) {
369 case '<':
370 token_ = kToken_LE;
371 break;
372 case '>':
373 token_ = kToken_GE;
374 break;
375 case '=':
376 token_ = kToken_EQ;
377 break;
378 case '!':
379 token_ = kToken_NE;
380 break;
381 default:
382 UNREACHABLE();
383 }
384 } else if (ch == '<' && next_ch == '<') {
385 token_ = kToken_SHL;
386 } else if (ch == '>' && next_ch == '>') {
387 if (stream_->Advance() == '>') {
388 token_ = kToken_SHR;
389 } else {
390 token_ = kToken_SAR;
391 stream_->Back();
392 }
393 } else {
394 stream_->Back();
395 token_ = ch;
396 }
397 }
398
399 bool AsmJsScanner::IsIdentifierStart(uc32 ch) {
400 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
401 ch == '$';
402 }
403
404 bool AsmJsScanner::IsIdentifierPart(uc32 ch) {
405 return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9');
406 }
407
408 bool AsmJsScanner::IsNumberStart(uc32 ch) {
409 return ch == '.' || (ch >= '0' && ch <= '9');
410 }
411
412 } // namespace internal
413 } // namespace v8
OLDNEW
« src/asmjs/asm-scanner.h ('K') | « src/asmjs/asm-scanner.h ('k') | src/flag-definitions.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698