OLD | NEW |
(Empty) | |
| 1 // Copyright 2017 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "src/asmjs/asm-scanner.h" |
| 6 |
| 7 #include "src/conversions.h" |
| 8 #include "src/flags.h" |
| 9 #include "src/parsing/scanner.h" |
| 10 #include "src/unicode-cache.h" |
| 11 |
| 12 namespace v8 { |
| 13 namespace internal { |
| 14 |
| 15 namespace { |
| 16 // Cap number of identifiers to ensure we can assign both global and |
| 17 // local ones a token id in the range of an int32_t. |
| 18 static const int kMaxIdentifierCount = 0xf000000; |
| 19 }; |
| 20 |
| 21 AsmJsScanner::AsmJsScanner() |
| 22 : token_(kUninitialized), |
| 23 preceding_token_(kUninitialized), |
| 24 next_token_(kUninitialized), |
| 25 rewind_(false), |
| 26 in_local_scope_(false), |
| 27 global_count_(0), |
| 28 double_value_(0.0), |
| 29 unsigned_value_(0), |
| 30 preceded_by_newline_(false) { |
| 31 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; |
| 32 STDLIB_MATH_FUNCTION_LIST(V) |
| 33 STDLIB_ARRAY_TYPE_LIST(V) |
| 34 #undef V |
| 35 #define V(name) property_names_[#name] = kToken_##name; |
| 36 STDLIB_MATH_VALUE_LIST(V) |
| 37 STDLIB_OTHER_LIST(V) |
| 38 #undef V |
| 39 #define V(name) global_names_[#name] = kToken_##name; |
| 40 KEYWORD_NAME_LIST(V) |
| 41 #undef V |
| 42 } |
| 43 |
| 44 void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) { |
| 45 stream_ = std::move(stream); |
| 46 Next(); |
| 47 } |
| 48 |
| 49 void AsmJsScanner::Next() { |
| 50 if (rewind_) { |
| 51 preceding_token_ = token_; |
| 52 token_ = next_token_; |
| 53 next_token_ = kUninitialized; |
| 54 rewind_ = false; |
| 55 return; |
| 56 } |
| 57 |
| 58 if (token_ == kEndOfInput || token_ == kParseError) { |
| 59 return; |
| 60 } |
| 61 |
| 62 #if DEBUG |
| 63 if (FLAG_trace_asm_scanner) { |
| 64 if (Token() == kDouble) { |
| 65 PrintF("%lf ", AsDouble()); |
| 66 } else if (Token() == kUnsigned) { |
| 67 PrintF("%" PRIu64 " ", AsUnsigned()); |
| 68 } else { |
| 69 std::string name = Name(Token()); |
| 70 PrintF("%s ", name.c_str()); |
| 71 } |
| 72 } |
| 73 #endif |
| 74 |
| 75 preceded_by_newline_ = false; |
| 76 preceding_token_ = token_; |
| 77 for (;;) { |
| 78 uc32 ch = stream_->Advance(); |
| 79 switch (ch) { |
| 80 case ' ': |
| 81 case '\t': |
| 82 case '\r': |
| 83 // Ignore whitespace. |
| 84 break; |
| 85 |
| 86 case '\n': |
| 87 // Track when we've passed a newline for optional semicolon support, |
| 88 // but keep scanning. |
| 89 preceded_by_newline_ = true; |
| 90 break; |
| 91 |
| 92 case kEndOfInput: |
| 93 token_ = kEndOfInput; |
| 94 return; |
| 95 |
| 96 case '\'': |
| 97 case '"': |
| 98 ConsumeString(ch); |
| 99 return; |
| 100 |
| 101 case '/': |
| 102 ch = stream_->Advance(); |
| 103 if (ch == '/') { |
| 104 ConsumeCPPComment(); |
| 105 } else if (ch == '*') { |
| 106 if (!ConsumeCComment()) { |
| 107 token_ = kParseError; |
| 108 return; |
| 109 } |
| 110 } else { |
| 111 stream_->Back(); |
| 112 token_ = '/'; |
| 113 return; |
| 114 } |
| 115 // Breaks out of switch, but loops again (i.e. the case when we parsed |
| 116 // a comment, but need to continue to look for the next token). |
| 117 break; |
| 118 |
| 119 case '<': |
| 120 case '>': |
| 121 case '=': |
| 122 case '!': |
| 123 ConsumeCompareOrShift(ch); |
| 124 return; |
| 125 |
| 126 #define V(single_char_token) case single_char_token: |
| 127 SIMPLE_SINGLE_TOKEN_LIST(V) |
| 128 #undef V |
| 129 // Use fixed token IDs for ASCII. |
| 130 token_ = ch; |
| 131 return; |
| 132 |
| 133 default: |
| 134 if (IsIdentifierStart(ch)) { |
| 135 ConsumeIdentifier(ch); |
| 136 } else if (IsNumberStart(ch)) { |
| 137 ConsumeNumber(ch); |
| 138 } else { |
| 139 // TODO(bradnelson): Support unicode (probably via UnicodeCache). |
| 140 token_ = kParseError; |
| 141 } |
| 142 return; |
| 143 } |
| 144 } |
| 145 } |
| 146 |
| 147 void AsmJsScanner::Rewind() { |
| 148 DCHECK(!rewind_); |
| 149 next_token_ = token_; |
| 150 token_ = preceding_token_; |
| 151 preceding_token_ = kUninitialized; |
| 152 rewind_ = true; |
| 153 preceded_by_newline_ = false; |
| 154 identifier_string_.clear(); |
| 155 } |
| 156 |
| 157 void AsmJsScanner::ResetLocals() { local_names_.clear(); } |
| 158 |
| 159 #if DEBUG |
| 160 // Only used for debugging. |
| 161 std::string AsmJsScanner::Name(token_t token) const { |
| 162 if (token >= 32 && token < 127) { |
| 163 return std::string(1, static_cast<char>(token)); |
| 164 } |
| 165 for (auto& i : local_names_) { |
| 166 if (i.second == token) { |
| 167 return i.first; |
| 168 } |
| 169 } |
| 170 for (auto& i : global_names_) { |
| 171 if (i.second == token) { |
| 172 return i.first; |
| 173 } |
| 174 } |
| 175 for (auto& i : property_names_) { |
| 176 if (i.second == token) { |
| 177 return i.first; |
| 178 } |
| 179 } |
| 180 switch (token) { |
| 181 #define V(rawname, name) \ |
| 182 case kToken_##name: \ |
| 183 return rawname; |
| 184 LONG_SYMBOL_NAME_LIST(V) |
| 185 #undef V |
| 186 #define V(name, value, string_name) \ |
| 187 case name: \ |
| 188 return string_name; |
| 189 SPECIAL_TOKEN_LIST(V) |
| 190 default: |
| 191 break; |
| 192 } |
| 193 UNREACHABLE(); |
| 194 return "{unreachable}"; |
| 195 } |
| 196 #endif |
| 197 |
| 198 int AsmJsScanner::GetPosition() const { |
| 199 DCHECK(!rewind_); |
| 200 return static_cast<int>(stream_->pos()); |
| 201 } |
| 202 |
| 203 void AsmJsScanner::Seek(int pos) { |
| 204 stream_->Seek(pos); |
| 205 preceding_token_ = kUninitialized; |
| 206 token_ = kUninitialized; |
| 207 next_token_ = kUninitialized; |
| 208 rewind_ = false; |
| 209 Next(); |
| 210 } |
| 211 |
| 212 void AsmJsScanner::ConsumeIdentifier(uc32 ch) { |
| 213 // Consume characters while still part of the identifier. |
| 214 identifier_string_.clear(); |
| 215 while (IsIdentifierPart(ch)) { |
| 216 identifier_string_ += ch; |
| 217 ch = stream_->Advance(); |
| 218 } |
| 219 // Go back one for next time. |
| 220 stream_->Back(); |
| 221 |
| 222 // Decode what the identifier means. |
| 223 if (preceding_token_ == '.') { |
| 224 auto i = property_names_.find(identifier_string_); |
| 225 if (i != property_names_.end()) { |
| 226 token_ = i->second; |
| 227 return; |
| 228 } |
| 229 } else { |
| 230 { |
| 231 auto i = local_names_.find(identifier_string_); |
| 232 if (i != local_names_.end()) { |
| 233 token_ = i->second; |
| 234 return; |
| 235 } |
| 236 } |
| 237 if (!in_local_scope_) { |
| 238 auto i = global_names_.find(identifier_string_); |
| 239 if (i != global_names_.end()) { |
| 240 token_ = i->second; |
| 241 return; |
| 242 } |
| 243 } |
| 244 } |
| 245 if (preceding_token_ == '.') { |
| 246 CHECK(global_count_ < kMaxIdentifierCount); |
| 247 token_ = kGlobalsStart + global_count_++; |
| 248 property_names_[identifier_string_] = token_; |
| 249 } else if (in_local_scope_) { |
| 250 CHECK(local_names_.size() < kMaxIdentifierCount); |
| 251 token_ = kLocalsStart - static_cast<token_t>(local_names_.size()); |
| 252 local_names_[identifier_string_] = token_; |
| 253 } else { |
| 254 CHECK(global_count_ < kMaxIdentifierCount); |
| 255 token_ = kGlobalsStart + global_count_++; |
| 256 global_names_[identifier_string_] = token_; |
| 257 } |
| 258 } |
| 259 |
| 260 void AsmJsScanner::ConsumeNumber(uc32 ch) { |
| 261 std::string number; |
| 262 number = ch; |
| 263 bool has_dot = ch == '.'; |
| 264 for (;;) { |
| 265 ch = stream_->Advance(); |
| 266 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || |
| 267 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' || |
| 268 ch == 'x' || |
| 269 ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' || |
| 270 number[number.size() - 1] == 'E'))) { |
| 271 // TODO(bradnelson): Test weird cases ending in -. |
| 272 if (ch == '.') { |
| 273 has_dot = true; |
| 274 } |
| 275 number.push_back(ch); |
| 276 } else { |
| 277 break; |
| 278 } |
| 279 } |
| 280 stream_->Back(); |
| 281 // Special case the most common number. |
| 282 if (number.size() == 1 && number[0] == '0') { |
| 283 unsigned_value_ = 0; |
| 284 token_ = kUnsigned; |
| 285 return; |
| 286 } |
| 287 // Pick out dot. |
| 288 if (number.size() == 1 && number[0] == '.') { |
| 289 token_ = '.'; |
| 290 return; |
| 291 } |
| 292 // Decode numbers. |
| 293 UnicodeCache cache; |
| 294 double_value_ = StringToDouble( |
| 295 &cache, |
| 296 Vector<uint8_t>( |
| 297 const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())), |
| 298 static_cast<int>(number.size())), |
| 299 ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL); |
| 300 if (std::isnan(double_value_)) { |
| 301 // Check if string to number conversion didn't consume all the characters. |
| 302 // This happens if the character filter let through something invalid |
| 303 // like: 0123ef for example. |
| 304 // TODO(bradnelson): Check if this happens often enough to be a perf |
| 305 // problem. |
| 306 if (number[0] == '.') { |
| 307 for (size_t k = 1; k < number.size(); ++k) { |
| 308 stream_->Back(); |
| 309 } |
| 310 token_ = '.'; |
| 311 return; |
| 312 } |
| 313 // Anything else that doesn't parse is an error. |
| 314 token_ = kParseError; |
| 315 return; |
| 316 } |
| 317 if (has_dot) { |
| 318 token_ = kDouble; |
| 319 } else { |
| 320 unsigned_value_ = static_cast<uint32_t>(double_value_); |
| 321 token_ = kUnsigned; |
| 322 } |
| 323 } |
| 324 |
| 325 bool AsmJsScanner::ConsumeCComment() { |
| 326 for (;;) { |
| 327 uc32 ch = stream_->Advance(); |
| 328 while (ch == '*') { |
| 329 ch = stream_->Advance(); |
| 330 if (ch == '/') { |
| 331 return true; |
| 332 } |
| 333 } |
| 334 if (ch == kEndOfInput) { |
| 335 return false; |
| 336 } |
| 337 } |
| 338 } |
| 339 |
| 340 void AsmJsScanner::ConsumeCPPComment() { |
| 341 for (;;) { |
| 342 uc32 ch = stream_->Advance(); |
| 343 if (ch == '\n' || ch == kEndOfInput) { |
| 344 return; |
| 345 } |
| 346 } |
| 347 } |
| 348 |
| 349 void AsmJsScanner::ConsumeString(uc32 quote) { |
| 350 // Only string allowed is 'use asm' / "use asm". |
| 351 const char* expected = "use asm"; |
| 352 for (; *expected != '\0'; ++expected) { |
| 353 if (stream_->Advance() != *expected) { |
| 354 token_ = kParseError; |
| 355 return; |
| 356 } |
| 357 } |
| 358 if (stream_->Advance() != quote) { |
| 359 token_ = kParseError; |
| 360 return; |
| 361 } |
| 362 token_ = kToken_UseAsm; |
| 363 } |
| 364 |
| 365 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) { |
| 366 uc32 next_ch = stream_->Advance(); |
| 367 if (next_ch == '=') { |
| 368 switch (ch) { |
| 369 case '<': |
| 370 token_ = kToken_LE; |
| 371 break; |
| 372 case '>': |
| 373 token_ = kToken_GE; |
| 374 break; |
| 375 case '=': |
| 376 token_ = kToken_EQ; |
| 377 break; |
| 378 case '!': |
| 379 token_ = kToken_NE; |
| 380 break; |
| 381 default: |
| 382 UNREACHABLE(); |
| 383 } |
| 384 } else if (ch == '<' && next_ch == '<') { |
| 385 token_ = kToken_SHL; |
| 386 } else if (ch == '>' && next_ch == '>') { |
| 387 if (stream_->Advance() == '>') { |
| 388 token_ = kToken_SHR; |
| 389 } else { |
| 390 token_ = kToken_SAR; |
| 391 stream_->Back(); |
| 392 } |
| 393 } else { |
| 394 stream_->Back(); |
| 395 token_ = ch; |
| 396 } |
| 397 } |
| 398 |
| 399 bool AsmJsScanner::IsIdentifierStart(uc32 ch) { |
| 400 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || |
| 401 ch == '$'; |
| 402 } |
| 403 |
| 404 bool AsmJsScanner::IsIdentifierPart(uc32 ch) { |
| 405 return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9'); |
| 406 } |
| 407 |
| 408 bool AsmJsScanner::IsNumberStart(uc32 ch) { |
| 409 return ch == '.' || (ch >= '0' && ch <= '9'); |
| 410 } |
| 411 |
| 412 } // namespace internal |
| 413 } // namespace v8 |
OLD | NEW |