Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "src/asmjs/asm-scanner.h" | |
| 6 | |
| 7 #include "src/conversions.h" | |
| 8 #include "src/flags.h" | |
| 9 #include "src/parsing/scanner.h" | |
| 10 #include "src/unicode-cache.h" | |
| 11 | |
| 12 namespace v8 { | |
| 13 namespace internal { | |
| 14 | |
| 15 namespace { | |
| 16 // Cap number of identifiers to ensure we can assign both global and | |
| 17 // local ones a token id in the range of an int32_t. | |
| 18 static const int kMaxIdentifierCount = 0xf000000; | |
| 19 }; | |
| 20 | |
| 21 AsmJsScanner::AsmJsScanner() | |
| 22 : token_(kUninitialized), | |
| 23 preceding_token_(kUninitialized), | |
| 24 next_token_(kUninitialized), | |
| 25 rewind_(false), | |
| 26 in_local_scope_(false), | |
| 27 global_count_(0), | |
| 28 double_value_(0.0), | |
| 29 unsigned_value_(0), | |
| 30 preceded_by_newline_(false) { | |
| 31 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; | |
| 32 STDLIB_MATH_FUNCTION_LIST(V) | |
| 33 STDLIB_ARRAY_TYPE_LIST(V) | |
| 34 #undef V | |
| 35 #define V(name) property_names_[#name] = kToken_##name; | |
| 36 STDLIB_MATH_VALUE_LIST(V) | |
| 37 STDLIB_OTHER_LIST(V) | |
| 38 #undef V | |
| 39 #define V(name) global_names_[#name] = kToken_##name; | |
| 40 KEYWORD_NAME_LIST(V) | |
| 41 #undef V | |
| 42 } | |
| 43 | |
| 44 void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) { | |
| 45 stream_ = std::move(stream); | |
| 46 Next(); | |
| 47 } | |
| 48 | |
| 49 void AsmJsScanner::Next() { | |
| 50 if (rewind_) { | |
| 51 preceding_token_ = token_; | |
| 52 token_ = next_token_; | |
| 53 next_token_ = kUninitialized; | |
| 54 rewind_ = false; | |
| 55 return; | |
| 56 } | |
| 57 | |
| 58 if (token_ == kEndOfInput || token_ == kParseError) { | |
| 59 return; | |
| 60 } | |
| 61 | |
| 62 #if DEBUG | |
| 63 if (FLAG_trace_asm_scanner) { | |
| 64 if (Token() == kDouble) { | |
| 65 PrintF("%lf ", AsDouble()); | |
| 66 } else if (Token() == kUnsigned) { | |
| 67 PrintF("%" PRIu64 " ", AsUnsigned()); | |
| 68 } else { | |
| 69 std::string name = Name(Token()); | |
| 70 PrintF("%s ", name.c_str()); | |
| 71 } | |
| 72 } | |
| 73 #endif | |
| 74 | |
| 75 preceded_by_newline_ = false; | |
| 76 preceding_token_ = token_; | |
| 77 for (;;) { | |
| 78 uc32 ch = stream_->Advance(); | |
| 79 switch (ch) { | |
| 80 case ' ': | |
| 81 case '\t': | |
| 82 case '\r': | |
| 83 // Ignore whitespace. | |
| 84 break; | |
| 85 | |
| 86 case '\n': | |
| 87 // Track when we've passed a newline for optional semicolon support, | |
| 88 // but keep scanning. | |
| 89 preceded_by_newline_ = true; | |
| 90 break; | |
| 91 | |
| 92 case kEndOfInput: | |
| 93 token_ = kEndOfInput; | |
| 94 return; | |
| 95 | |
| 96 case '\'': | |
| 97 case '"': | |
| 98 ConsumeString(ch); | |
| 99 return; | |
| 100 | |
| 101 case '/': | |
| 102 ch = stream_->Advance(); | |
| 103 if (ch == '/') { | |
| 104 ConsumeCPPComment(); | |
| 105 } else if (ch == '*') { | |
| 106 if (!ConsumeCComment()) { | |
| 107 token_ = kParseError; | |
| 108 return; | |
| 109 } | |
| 110 } else { | |
| 111 stream_->Back(); | |
| 112 token_ = '/'; | |
| 113 return; | |
| 114 } | |
| 115 // Breaks out of switch, but loops again (i.e. the case when we parsed | |
| 116 // a comment, but need to continue to look for the next token). | |
| 117 break; | |
| 118 | |
| 119 case '<': | |
| 120 case '>': | |
| 121 case '=': | |
| 122 case '!': | |
| 123 ConsumeCompareOrShift(ch); | |
| 124 return; | |
| 125 | |
| 126 #define V(single_char_token) case single_char_token: | |
| 127 SIMPLE_SINGLE_TOKEN_LIST(V) | |
| 128 #undef V | |
| 129 // Use fixed token IDs for ASCII. | |
| 130 token_ = ch; | |
| 131 return; | |
| 132 | |
| 133 default: | |
| 134 if (IsIdentifierStart(ch)) { | |
| 135 ConsumeIdentifier(ch); | |
| 136 } else if (IsNumberStart(ch)) { | |
| 137 ConsumeNumber(ch); | |
| 138 } else { | |
| 139 // TODO(bradnelson): Support unicode (probably via UnicodeCache). | |
| 140 token_ = kParseError; | |
| 141 } | |
| 142 return; | |
| 143 } | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 void AsmJsScanner::Rewind() { | |
| 148 DCHECK(!rewind_); | |
| 149 next_token_ = token_; | |
| 150 token_ = preceding_token_; | |
| 151 preceding_token_ = kUninitialized; | |
| 152 rewind_ = true; | |
| 153 preceded_by_newline_ = false; | |
| 154 identifier_string_.clear(); | |
| 155 } | |
| 156 | |
| 157 void AsmJsScanner::ResetLocals() { local_names_.clear(); } | |
| 158 | |
| 159 #if DEBUG | |
| 160 // Only used for debugging. | |
| 161 std::string AsmJsScanner::Name(token_t token) const { | |
| 162 if (token >= 32 && token < 127) { | |
| 163 return std::string(1, static_cast<char>(token)); | |
| 164 } | |
| 165 for (auto& i : local_names_) { | |
| 166 if (i.second == token) { | |
| 167 return i.first; | |
| 168 } | |
| 169 } | |
| 170 for (auto& i : global_names_) { | |
| 171 if (i.second == token) { | |
| 172 return i.first; | |
| 173 } | |
| 174 } | |
| 175 for (auto& i : property_names_) { | |
| 176 if (i.second == token) { | |
| 177 return i.first; | |
| 178 } | |
| 179 } | |
| 180 switch (token) { | |
| 181 #define V(rawname, name) \ | |
| 182 case kToken_##name: \ | |
| 183 return rawname; | |
| 184 LONG_SYMBOL_NAME_LIST(V) | |
| 185 #undef V | |
| 186 #define V(name, value, string_name) \ | |
| 187 case name: \ | |
| 188 return string_name; | |
| 189 SPECIAL_TOKEN_LIST(V) | |
| 190 default: | |
| 191 break; | |
| 192 } | |
| 193 UNREACHABLE(); | |
| 194 return "{unreachable}"; | |
| 195 } | |
| 196 #endif | |
| 197 | |
| 198 int AsmJsScanner::GetPosition() const { | |
| 199 DCHECK(!rewind_); | |
| 200 return static_cast<int>(stream_->pos()); | |
| 201 } | |
| 202 | |
| 203 void AsmJsScanner::Seek(int pos) { | |
| 204 stream_->Seek(pos); | |
| 205 preceding_token_ = kUninitialized; | |
| 206 token_ = kUninitialized; | |
| 207 next_token_ = kUninitialized; | |
| 208 rewind_ = false; | |
| 209 Next(); | |
| 210 } | |
| 211 | |
| 212 void AsmJsScanner::ConsumeIdentifier(uc32 ch) { | |
| 213 // Consume characters while still part of the identifier. | |
| 214 identifier_string_.clear(); | |
| 215 while (IsIdentifierPart(ch)) { | |
| 216 identifier_string_ += ch; | |
| 217 ch = stream_->Advance(); | |
| 218 } | |
| 219 // Go back one for next time. | |
| 220 stream_->Back(); | |
| 221 | |
| 222 // Decode what the identifier means. | |
| 223 if (preceding_token_ == '.') { | |
| 224 auto i = property_names_.find(identifier_string_); | |
| 225 if (i != property_names_.end()) { | |
| 226 token_ = i->second; | |
| 227 return; | |
| 228 } | |
| 229 } else { | |
| 230 { | |
| 231 auto i = local_names_.find(identifier_string_); | |
| 232 if (i != local_names_.end()) { | |
| 233 token_ = i->second; | |
| 234 return; | |
| 235 } | |
| 236 } | |
| 237 if (!in_local_scope_) { | |
| 238 auto i = global_names_.find(identifier_string_); | |
| 239 if (i != global_names_.end()) { | |
| 240 token_ = i->second; | |
| 241 return; | |
| 242 } | |
| 243 } | |
| 244 } | |
| 245 if (preceding_token_ == '.') { | |
| 246 CHECK(global_count_ < kMaxIdentifierCount); | |
| 247 token_ = kGlobalsStart + global_count_++; | |
| 248 property_names_[identifier_string_] = token_; | |
| 249 } else if (in_local_scope_) { | |
| 250 CHECK(local_names_.size() < kMaxIdentifierCount); | |
| 251 token_ = kLocalsStart - static_cast<token_t>(local_names_.size()); | |
| 252 local_names_[identifier_string_] = token_; | |
| 253 } else { | |
| 254 CHECK(global_count_ < kMaxIdentifierCount); | |
| 255 token_ = kGlobalsStart + global_count_++; | |
| 256 global_names_[identifier_string_] = token_; | |
| 257 } | |
| 258 } | |
| 259 | |
| 260 void AsmJsScanner::ConsumeNumber(uc32 ch) { | |
| 261 std::string number; | |
| 262 number = ch; | |
| 263 bool has_dot = ch == '.'; | |
| 264 for (;;) { | |
| 265 ch = stream_->Advance(); | |
| 266 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || | |
| 267 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' || | |
| 268 ch == 'x' || | |
| 269 ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' || | |
| 270 number[number.size() - 1] == 'E'))) { | |
| 271 // TODO(bradnelson): Test weird cases ending in -. | |
| 272 if (ch == '.') { | |
| 273 has_dot = true; | |
| 274 } | |
| 275 number.push_back(ch); | |
| 276 } else { | |
| 277 break; | |
| 278 } | |
| 279 } | |
| 280 stream_->Back(); | |
| 281 // Special case the most common number. | |
| 282 if (number.size() == 1 && number[0] == '0') { | |
| 283 unsigned_value_ = 0; | |
| 284 token_ = kUnsigned; | |
| 285 return; | |
| 286 } | |
| 287 // Pick out dot. | |
| 288 if (number.size() == 1 && number[0] == '.') { | |
| 289 token_ = '.'; | |
| 290 return; | |
| 291 } | |
| 292 // Decode numbers. | |
| 293 UnicodeCache cache; | |
| 294 double_value_ = StringToDouble( | |
| 295 &cache, | |
| 296 Vector<uint8_t>( | |
| 297 const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())), | |
| 298 number.size()), | |
| 299 ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL); | |
| 300 if (std::isnan(double_value_)) { | |
| 301 // Check if string to number conversion didn't consume all the characters. | |
| 302 // This happens if the character filter let through something invalid | |
| 303 // like: 0123ef for example. | |
| 304 // TODO(bradnelson): Check if this happens often enough to be a perf | |
| 305 // problem. | |
| 306 if (number[0] == '.') { | |
| 307 for (size_t k = 1; k < number.size(); ++k) { | |
| 308 stream_->Back(); | |
| 309 } | |
| 310 token_ = '.'; | |
| 311 return; | |
| 312 } | |
| 313 // Anything else that doesn't parse is an error. | |
| 314 token_ = kParseError; | |
| 315 return; | |
| 316 } | |
| 317 if (has_dot) { | |
| 318 token_ = kDouble; | |
| 319 } else { | |
| 320 unsigned_value_ = static_cast<uint32_t>(double_value_); | |
| 321 token_ = kUnsigned; | |
| 322 } | |
| 323 } | |
| 324 | |
| 325 bool AsmJsScanner::ConsumeCComment() { | |
| 326 for (;;) { | |
| 327 uc32 ch = stream_->Advance(); | |
| 328 while (ch == '*') { | |
| 329 ch = stream_->Advance(); | |
| 330 if (ch == '/') { | |
| 331 return true; | |
| 332 } | |
| 333 if (ch == kEndOfInput) { | |
|
vogelheim
2017/03/16 12:46:47
I think you can just drop this if.
If ch is kEndO
bradn
2017/03/16 17:03:15
Done.
| |
| 334 return false; | |
| 335 } | |
| 336 } | |
| 337 if (ch == kEndOfInput) { | |
| 338 return false; | |
| 339 } | |
| 340 } | |
| 341 } | |
| 342 | |
| 343 void AsmJsScanner::ConsumeCPPComment() { | |
| 344 for (;;) { | |
| 345 uc32 ch = stream_->Advance(); | |
| 346 if (ch == '\n' || ch == kEndOfInput) { | |
| 347 return; | |
| 348 } | |
| 349 } | |
| 350 } | |
| 351 | |
| 352 void AsmJsScanner::ConsumeString(uc32 quote) { | |
| 353 // Only string allowed is 'use asm' / "use asm". | |
| 354 const char* expected = "use asm"; | |
| 355 for (; *expected != '\0'; ++expected) { | |
| 356 if (stream_->Advance() != *expected) { | |
| 357 token_ = kParseError; | |
| 358 return; | |
| 359 } | |
| 360 } | |
| 361 if (stream_->Advance() != quote) { | |
| 362 token_ = kParseError; | |
| 363 return; | |
| 364 } | |
| 365 token_ = kToken_UseAsm; | |
| 366 } | |
| 367 | |
| 368 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) { | |
| 369 uc32 next_ch = stream_->Advance(); | |
| 370 if (next_ch == '=') { | |
| 371 switch (ch) { | |
| 372 case '<': | |
| 373 token_ = kToken_LE; | |
| 374 break; | |
| 375 case '>': | |
| 376 token_ = kToken_GE; | |
| 377 break; | |
| 378 case '=': | |
| 379 token_ = kToken_EQ; | |
| 380 break; | |
| 381 case '!': | |
| 382 token_ = kToken_NE; | |
| 383 break; | |
| 384 default: | |
| 385 UNREACHABLE(); | |
| 386 } | |
| 387 } else if (ch == '<' && next_ch == '<') { | |
| 388 token_ = kToken_SHL; | |
| 389 } else if (ch == '>' && next_ch == '>') { | |
| 390 if (stream_->Advance() == '>') { | |
| 391 token_ = kToken_SHR; | |
| 392 } else { | |
| 393 token_ = kToken_SAR; | |
| 394 stream_->Back(); | |
| 395 } | |
| 396 } else { | |
| 397 stream_->Back(); | |
| 398 token_ = ch; | |
| 399 } | |
| 400 } | |
| 401 | |
| 402 bool AsmJsScanner::IsIdentifierStart(uc32 ch) { | |
| 403 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || | |
| 404 ch == '$'; | |
| 405 } | |
| 406 | |
| 407 bool AsmJsScanner::IsIdentifierPart(uc32 ch) { | |
| 408 return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9'); | |
| 409 } | |
| 410 | |
| 411 bool AsmJsScanner::IsNumberStart(uc32 ch) { | |
| 412 return ch == '.' || (ch >= '0' && ch <= '9'); | |
| 413 } | |
| 414 | |
| 415 } // namespace internal | |
| 416 } // namespace v8 | |
| OLD | NEW |