| OLD | NEW |
| (Empty) | |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are |
| 4 // met: |
| 5 // |
| 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided |
| 11 // with the distribution. |
| 12 // * Neither the name of Google Inc. nor the names of its |
| 13 // contributors may be used to endorse or promote products derived |
| 14 // from this software without specific prior written permission. |
| 15 // |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 |
| 28 #include "v8.h" |
| 29 #include "lexer.h" |
| 30 #include "char-predicates-inl.h" |
| 31 #include "scanner-character-streams.h" |
| 32 |
| 33 namespace v8 { |
| 34 namespace internal { |
| 35 |
| 36 |
| 37 #ifdef V8_USE_GENERATED_LEXER |
| 38 |
| 39 |
| 40 void Scanner::Initialize(Utf16CharacterStream* source) { |
| 41 delete lexer_; |
| 42 lexer_ = NULL; |
| 43 switch (source->stream_type_) { |
| 44 case Utf16CharacterStream::kUtf8ToUtf16: |
| 45 { |
| 46 Utf8ToUtf16CharacterStream* stream = |
| 47 static_cast<Utf8ToUtf16CharacterStream*>(source); |
| 48 lexer_ = |
| 49 new Lexer<int8_t>(unicode_cache_, stream->data_, stream->length_); |
| 50 break; |
| 51 } |
| 52 case Utf16CharacterStream::kGenericStringUtf16: |
| 53 { |
| 54 GenericStringUtf16CharacterStream* stream = |
| 55 static_cast<GenericStringUtf16CharacterStream*>(source); |
| 56 ASSERT(stream->data_->IsFlat()); |
| 57 if (stream->data_->IsOneByteRepresentation()) { |
| 58 lexer_ = new Lexer<uint8_t>(unicode_cache_, stream->data_, |
| 59 stream->start_position_, stream->end_position_); |
| 60 } else { |
| 61 lexer_ = new Lexer<uint16_t>(unicode_cache_, stream->data_, |
| 62 stream->start_position_, stream->end_position_); |
| 63 } |
| 64 } |
| 65 break; |
| 66 case Utf16CharacterStream::kExternalTwoByteStringUtf16: |
| 67 { |
| 68 ExternalTwoByteStringUtf16CharacterStream* stream = |
| 69 static_cast<ExternalTwoByteStringUtf16CharacterStream*>(source); |
| 70 ASSERT(stream->data_->IsFlat()); |
| 71 ASSERT(stream->data_->IsOneByteRepresentation()); |
| 72 lexer_ = new Lexer<uint16_t>(unicode_cache_, stream->data_, |
| 73 stream->start_position_, stream->end_position_); |
| 74 } |
| 75 break; |
| 76 } |
| 77 ASSERT(lexer_ != NULL); |
| 78 SyncSettings(); |
| 79 lexer_->Scan(); |
| 80 } |
| 81 |
| 82 |
| 83 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 84 : unicode_cache_(unicode_cache), |
| 85 lexer_(NULL), |
| 86 harmony_numeric_literals_(false), |
| 87 harmony_modules_(false), |
| 88 harmony_scoping_(false) { |
| 89 } |
| 90 |
| 91 |
| 92 void Scanner::SyncSettings() { |
| 93 if (lexer_ == NULL) return; |
| 94 lexer_->SetHarmonyModules(harmony_modules_); |
| 95 lexer_->SetHarmonyScoping(harmony_scoping_); |
| 96 lexer_->SetHarmonyNumericLiterals(harmony_numeric_literals_); |
| 97 } |
| 98 |
| 99 |
| 100 #endif |
| 101 |
| 102 |
| 103 static void UpdateLexersAfterGC(v8::Isolate* isolate, |
| 104 GCType, |
| 105 GCCallbackFlags) { |
| 106 reinterpret_cast<i::Isolate*>(isolate)-> |
| 107 lexer_gc_handler()->UpdateLexersAfterGC(); |
| 108 } |
| 109 |
| 110 |
| 111 void LexerGCHandler::AddLexer(LexerBase* lexer) { |
| 112 if (lexers_.empty()) { |
| 113 isolate_->heap()->AddGCEpilogueCallback( |
| 114 &i::UpdateLexersAfterGC, kGCTypeAll, true); |
| 115 } |
| 116 lexers_.insert(lexer); |
| 117 } |
| 118 |
| 119 |
| 120 void LexerGCHandler::RemoveLexer(LexerBase* lexer) { |
| 121 lexers_.erase(lexer); |
| 122 if (lexers_.empty()) { |
| 123 isolate_->heap()->RemoveGCEpilogueCallback(&i::UpdateLexersAfterGC); |
| 124 } |
| 125 } |
| 126 |
| 127 |
| 128 void LexerGCHandler::UpdateLexersAfterGC() { |
| 129 typedef std::set<LexerBase*>::const_iterator It; |
| 130 for (It it = lexers_.begin(); it != lexers_.end(); ++it) { |
| 131 (*it)->UpdateBufferBasedOnHandle(); |
| 132 } |
| 133 } |
| 134 |
| 135 |
| 136 LexerBase::~LexerBase() {} |
| 137 |
| 138 |
| 139 // Returns the next token and advances input. |
| 140 Token::Value LexerBase::Next() { |
| 141 has_line_terminator_before_next_ = false; |
| 142 has_multiline_comment_before_next_ = false; |
| 143 current_ = next_; |
| 144 std::swap(current_literal_, next_literal_); |
| 145 Scan(); |
| 146 return current_.token; |
| 147 } |
| 148 |
| 149 |
| 150 template<typename Char> |
| 151 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, |
| 152 const Char* source_ptr, |
| 153 int length) |
| 154 : LexerBase(unicode_cache), |
| 155 isolate_(NULL), |
| 156 source_ptr_(source_ptr), |
| 157 start_position_(0), |
| 158 end_position_(length), |
| 159 buffer_(NULL), |
| 160 buffer_end_(NULL), |
| 161 start_(NULL), |
| 162 cursor_(NULL), |
| 163 last_octal_end_(NULL) { |
| 164 CHECK(false); // not yet supported |
| 165 } |
| 166 |
| 167 |
| 168 template<typename Char> |
| 169 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, |
| 170 Handle<String> source, |
| 171 int start_position, |
| 172 int end_position) |
| 173 : LexerBase(unicode_cache), |
| 174 isolate_(source->GetIsolate()), |
| 175 // TODO(dcarney): don't need to allocate here, used stored positions |
| 176 source_handle_(isolate_->factory()->NewSubString( |
| 177 source, start_position, end_position)), |
| 178 source_ptr_(NULL), |
| 179 start_position_(start_position), |
| 180 end_position_(end_position), |
| 181 buffer_(NULL), |
| 182 buffer_end_(NULL), |
| 183 start_(NULL), |
| 184 cursor_(NULL), |
| 185 last_octal_end_(NULL) { |
| 186 ASSERT(source->IsFlat()); |
| 187 fprintf(stderr, "%s %d %d %d %d %d\n", |
| 188 __func__, start_position_, end_position_, |
| 189 source->length(), source_handle_->length(), |
| 190 source->IsOneByteRepresentation()); |
| 191 UpdateBufferBasedOnHandle(); |
| 192 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0; |
| 193 isolate_->lexer_gc_handler()->AddLexer(this); |
| 194 } |
| 195 |
| 196 |
| 197 template<typename Char> |
| 198 Lexer<Char>::~Lexer() { |
| 199 if (!source_handle_.is_null()) { |
| 200 isolate_->lexer_gc_handler()->RemoveLexer(this); |
| 201 } |
| 202 } |
| 203 |
| 204 |
| 205 template<typename Char> |
| 206 void Lexer<Char>::SeekForward(int pos) { |
| 207 cursor_ = buffer_ + pos; |
| 208 start_ = cursor_; |
| 209 has_line_terminator_before_next_ = false; |
| 210 has_multiline_comment_before_next_ = false; |
| 211 Scan(); // Fills in next_. |
| 212 } |
| 213 |
| 214 |
| 215 template<typename Char> |
| 216 void Lexer<Char>::SetEnd(int pos) { |
| 217 buffer_end_ = buffer_ + pos; |
| 218 } |
| 219 |
| 220 |
| 221 template<typename Char> |
| 222 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) { |
| 223 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 224 bool in_character_class = false; |
| 225 |
| 226 // Previous token is either '/' or '/=', in the second case, the |
| 227 // pattern starts at =. |
| 228 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); |
| 229 |
| 230 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 231 // the scanner should pass uninterpreted bodies to the RegExp |
| 232 // constructor. |
| 233 if (cursor_ >= buffer_end_) return false; |
| 234 |
| 235 while (*cursor_ != '/' || in_character_class) { |
| 236 if (unicode_cache_->IsLineTerminator(*cursor_)) return false; |
| 237 if (*cursor_ == '\\') { // Escape sequence. |
| 238 ++cursor_; |
| 239 if (cursor_ >= buffer_end_ || unicode_cache_->IsLineTerminator(*cursor_)) |
| 240 return false; |
| 241 ++cursor_; |
| 242 if (cursor_ >= buffer_end_) return false; |
| 243 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 244 // only "safe" characters are allowed (letters, digits, underscore), |
| 245 // otherwise the escape isn't valid and the invalid character has |
| 246 // its normal meaning. I.e., we can just continue scanning without |
| 247 // worrying whether the following characters are part of the escape |
| 248 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
| 249 // of the escape sequence. |
| 250 |
| 251 // TODO(896): At some point, parse RegExps more throughly to capture |
| 252 // octal esacpes in strict mode. |
| 253 } else { // Unescaped character. |
| 254 if (*cursor_ == '[') in_character_class = true; |
| 255 if (*cursor_ == ']') in_character_class = false; |
| 256 if (++cursor_ >= buffer_end_) return false; |
| 257 } |
| 258 } |
| 259 next_.end_pos = (cursor_ - buffer_); |
| 260 ++cursor_; // consume '/' |
| 261 return true; |
| 262 } |
| 263 |
| 264 |
| 265 template<typename Char> |
| 266 bool Lexer<Char>::ScanRegExpFlags() { |
| 267 next_.beg_pos = cursor_ - buffer_; |
| 268 // Scan regular expression flags. |
| 269 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) { |
| 270 if (*cursor_ != '\\') { |
| 271 if (++cursor_ >= buffer_end_) break; |
| 272 } else { |
| 273 if (!ScanLiteralUnicodeEscape()) break; |
| 274 if (++cursor_ >= buffer_end_) break; |
| 275 } |
| 276 } |
| 277 next_.end_pos = cursor_ - buffer_; |
| 278 return true; |
| 279 } |
| 280 |
| 281 |
| 282 template<typename Char> |
| 283 uc32 Lexer<Char>::ScanHexNumber(int length) { |
| 284 // We have seen \uXXXX, let's see what it is. |
| 285 uc32 x = 0; |
| 286 for (const Char* s = cursor_ - length; s != cursor_; ++s) { |
| 287 int d = HexValue(*s); |
| 288 if (d < 0) { |
| 289 return -1; |
| 290 } |
| 291 x = x * 16 + d; |
| 292 } |
| 293 return x; |
| 294 } |
| 295 |
| 296 |
| 297 template<typename Char> |
| 298 const Char* Lexer<Char>::ScanHexNumber( |
| 299 const Char* cursor, const Char* end, uc32* result) { |
| 300 uc32 x = 0; |
| 301 for ( ; cursor < end; ++cursor) { |
| 302 int d = HexValue(*cursor); |
| 303 if (d < 0) { |
| 304 *result = -1; |
| 305 return NULL; |
| 306 } |
| 307 x = x * 16 + d; |
| 308 } |
| 309 *result = x; |
| 310 return cursor; |
| 311 } |
| 312 |
| 313 |
| 314 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 315 // ECMA-262. Other JS VMs support them. |
| 316 template<typename Char> |
| 317 const Char* Lexer<Char>::ScanOctalEscape( |
| 318 const Char* start, const Char* end, uc32* result) { |
| 319 uc32 x = *result - '0'; |
| 320 const Char* cursor; |
| 321 for (cursor = start; cursor < end; cursor++) { |
| 322 int d = *cursor - '0'; |
| 323 if (d < 0 || d > 7) break; |
| 324 int nx = x * 8 + d; |
| 325 if (nx >= 256) break; |
| 326 x = nx; |
| 327 } |
| 328 *result = x; |
| 329 return cursor; |
| 330 } |
| 331 |
| 332 |
| 333 template<typename Char> |
| 334 bool Lexer<Char>::ScanLiteralUnicodeEscape() { |
| 335 ASSERT(cursor_ < buffer_end_); |
| 336 Char primary_char = *(cursor_); |
| 337 ASSERT(primary_char == '\\'); |
| 338 if (++cursor_ >= buffer_end_) return false; |
| 339 primary_char = *(cursor_); |
| 340 int i = 1; |
| 341 if (primary_char == 'u') { |
| 342 i++; |
| 343 while (i < 6) { |
| 344 if (++cursor_ >= buffer_end_) return false; |
| 345 primary_char = *(cursor_); |
| 346 if (!IsHexDigit(primary_char)) break; |
| 347 i++; |
| 348 } |
| 349 } |
| 350 return i == 6; |
| 351 } |
| 352 |
| 353 |
| 354 template<typename Char> |
| 355 const Char* Lexer<Char>::ScanIdentifierUnicodeEscape( |
| 356 const Char* cursor, const Char* end, uc32* result) { |
| 357 ASSERT(*cursor == '\\'); |
| 358 if (++cursor >= end) return NULL; |
| 359 if (*cursor != 'u') return NULL; |
| 360 ++cursor; |
| 361 if (cursor + 4 > end) return NULL; |
| 362 cursor = ScanHexNumber(cursor, cursor + 4, result); |
| 363 return cursor; |
| 364 } |
| 365 |
| 366 |
| 367 template<typename Char> |
| 368 const Char* Lexer<Char>::ScanEscape( |
| 369 const Char* cursor, const Char* end, LiteralBuffer* literal) { |
| 370 ASSERT(*cursor == '\\'); |
| 371 if (++cursor >= end) return NULL; |
| 372 uc32 c = *cursor; |
| 373 if (++cursor > end) return NULL; |
| 374 // Skip escaped newlines. |
| 375 if (unicode_cache_->IsLineTerminator(c)) { |
| 376 uc32 peek = *cursor; |
| 377 // Allow CR+LF newlines in multiline string literals. |
| 378 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++; |
| 379 // Allow LF+CR newlines in multiline string literals. |
| 380 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++; |
| 381 return cursor; |
| 382 } |
| 383 |
| 384 switch (c) { |
| 385 case '\'': // fall through |
| 386 case '"' : // fall through |
| 387 case '\\': break; |
| 388 case 'b' : c = '\b'; break; |
| 389 case 'f' : c = '\f'; break; |
| 390 case 'n' : c = '\n'; break; |
| 391 case 'r' : c = '\r'; break; |
| 392 case 't' : c = '\t'; break; |
| 393 case 'u' : { |
| 394 ASSERT(cursor + 4 <= end); |
| 395 cursor = ScanHexNumber(cursor, cursor + 4, &c); |
| 396 if (cursor == NULL) return NULL; |
| 397 break; |
| 398 } |
| 399 case 'v' : c = '\v'; break; |
| 400 case 'x' : { |
| 401 ASSERT(cursor + 2 <= end); |
| 402 cursor = ScanHexNumber(cursor, cursor + 2, &c); |
| 403 if (cursor == NULL) return NULL; |
| 404 break; |
| 405 } |
| 406 case '0' : // fall through |
| 407 case '1' : // fall through |
| 408 case '2' : // fall through |
| 409 case '3' : // fall through |
| 410 case '4' : // fall through |
| 411 case '5' : // fall through |
| 412 case '6' : // fall through |
| 413 case '7' : |
| 414 if (end > cursor + 2) end = cursor + 2; |
| 415 cursor = ScanOctalEscape(cursor, end, &c); break; |
| 416 } |
| 417 |
| 418 // According to ECMA-262, section 7.8.4, characters not covered by the |
| 419 // above cases should be illegal, but they are commonly handled as |
| 420 // non-escaped characters by JS VMs. |
| 421 literal->AddChar(c); |
| 422 return cursor; |
| 423 } |
| 424 |
| 425 |
| 426 template<typename Char> |
| 427 LexerBase::Location Lexer<Char>::octal_position() const { |
| 428 if (!last_octal_end_) |
| 429 return Location::invalid(); |
| 430 // The last octal might be an octal escape or an octal number. Whichever it |
| 431 // is, we'll find the start by just scanning back until we hit a non-octal |
| 432 // character. |
| 433 const Char* temp_cursor = last_octal_end_ - 1; |
| 434 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') |
| 435 --temp_cursor; |
| 436 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_); |
| 437 } |
| 438 |
| 439 |
| 440 template<> |
| 441 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const { |
| 442 String::FlatContent content = source_handle_->GetFlatContent(); |
| 443 return content.ToOneByteVector().start(); |
| 444 } |
| 445 |
| 446 |
| 447 template <> |
| 448 const uint16_t* Lexer<uint16_t>::GetNewBufferBasedOnHandle() |
| 449 const { |
| 450 String::FlatContent content = source_handle_->GetFlatContent(); |
| 451 return content.ToUC16Vector().start(); |
| 452 } |
| 453 |
| 454 |
| 455 template<> |
| 456 const int8_t* Lexer<int8_t>::GetNewBufferBasedOnHandle() const { |
| 457 String::FlatContent content = source_handle_->GetFlatContent(); |
| 458 return reinterpret_cast<const int8_t*>(content.ToOneByteVector().start()); |
| 459 } |
| 460 |
| 461 |
| 462 template<typename Char> |
| 463 void Lexer<Char>::UpdateBufferBasedOnHandle() { |
| 464 // We get a raw pointer from the Handle, but we also update it every time |
| 465 // there is a GC, so it is safe. |
| 466 DisallowHeapAllocation no_gc; |
| 467 const Char* new_buffer = GetNewBufferBasedOnHandle(); |
| 468 if (new_buffer != buffer_) { |
| 469 int start_offset = start_ - buffer_; |
| 470 int cursor_offset = cursor_ - buffer_; |
| 471 int last_octal_end_offset = last_octal_end_ - buffer_; |
| 472 buffer_ = new_buffer; |
| 473 buffer_end_ = buffer_ + source_handle_->length(); |
| 474 start_ = buffer_ + start_offset; |
| 475 cursor_ = buffer_ + cursor_offset; |
| 476 if (last_octal_end_ != NULL) { |
| 477 last_octal_end_ = buffer_ + last_octal_end_offset; |
| 478 } |
| 479 ResetLiterals(); |
| 480 } |
| 481 } |
| 482 |
| 483 |
| 484 template<> |
| 485 bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) { |
| 486 return !token.has_escapes; |
| 487 } |
| 488 |
| 489 |
| 490 template<> |
| 491 bool Lexer<uint16_t>::IsSubstringOfSource( |
| 492 const TokenDesc& token) { |
| 493 if (token.has_escapes) return false; |
| 494 const uint16_t* start = buffer_ + token.beg_pos; |
| 495 const uint16_t* end = buffer_ + token.end_pos; |
| 496 for (const uint16_t* cursor = start; cursor != end; ++cursor) { |
| 497 if (*cursor >= unibrow::Latin1::kMaxChar) return true; |
| 498 } |
| 499 return false; |
| 500 } |
| 501 |
| 502 |
| 503 template<> |
| 504 bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) { |
| 505 // FIXME: implement. |
| 506 UNREACHABLE(); |
| 507 return false; |
| 508 } |
| 509 |
| 510 |
| 511 template<> |
| 512 bool Lexer<uint8_t>::FillLiteral( |
| 513 const TokenDesc& token, LiteralDesc* literal) { |
| 514 literal->beg_pos = token.beg_pos; |
| 515 const uint8_t* start = buffer_ + token.beg_pos; |
| 516 const uint8_t* end = buffer_ + token.end_pos; |
| 517 if (token.token == Token::STRING) { |
| 518 ++start; |
| 519 --end; |
| 520 } |
| 521 if (IsSubstringOfSource(token)) { |
| 522 literal->is_ascii = true; |
| 523 literal->is_in_buffer = false; |
| 524 literal->offset = start - buffer_; |
| 525 literal->length = end - start; |
| 526 literal->ascii_string = Vector<const char>( |
| 527 reinterpret_cast<const char*>(start), literal->length); |
| 528 return true; |
| 529 } |
| 530 return CopyToLiteralBuffer(start, end, token, literal); |
| 531 } |
| 532 |
| 533 |
| 534 template<> |
| 535 bool Lexer<uint16_t>::FillLiteral( |
| 536 const TokenDesc& token, LiteralDesc* literal) { |
| 537 literal->beg_pos = token.beg_pos; |
| 538 const uint16_t* start = buffer_ + token.beg_pos; |
| 539 const uint16_t* end = buffer_ + token.end_pos; |
| 540 if (token.token == Token::STRING) { |
| 541 ++start; |
| 542 --end; |
| 543 } |
| 544 if (IsSubstringOfSource(token)) { |
| 545 literal->is_ascii = false; |
| 546 literal->is_in_buffer = false; |
| 547 literal->offset = start - buffer_; |
| 548 literal->length = end - start; |
| 549 literal->utf16_string = Vector<const uint16_t>(start, literal->length); |
| 550 return true; |
| 551 } |
| 552 return CopyToLiteralBuffer(start, end, token, literal); |
| 553 } |
| 554 |
| 555 |
| 556 template<> |
| 557 bool Lexer<int8_t>::FillLiteral( |
| 558 const TokenDesc& token, LiteralDesc* literal) { |
| 559 // FIXME: implement. |
| 560 UNREACHABLE(); |
| 561 return false; |
| 562 } |
| 563 |
| 564 |
| 565 template<class Char> |
| 566 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, |
| 567 const Char* end, |
| 568 const TokenDesc& token, |
| 569 LiteralDesc* literal) { |
| 570 literal->buffer.Reset(); |
| 571 if (token.has_escapes) { |
| 572 for (const Char* cursor = start; cursor != end;) { |
| 573 if (*cursor != '\\') { |
| 574 literal->buffer.AddChar(*cursor++); |
| 575 } else if (token.token == Token::IDENTIFIER) { |
| 576 uc32 c; |
| 577 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); |
| 578 ASSERT(cursor != NULL); |
| 579 if (cursor == NULL) return false; |
| 580 literal->buffer.AddChar(c); |
| 581 } else { |
| 582 cursor = ScanEscape(cursor, end, &literal->buffer); |
| 583 ASSERT(cursor != NULL); |
| 584 if (cursor == NULL) return false; |
| 585 } |
| 586 } |
| 587 } else { |
| 588 for (const Char* cursor = start; cursor != end;) { |
| 589 literal->buffer.AddChar(*cursor++); |
| 590 } |
| 591 } |
| 592 literal->is_ascii = literal->buffer.is_ascii(); |
| 593 literal->is_in_buffer = true; |
| 594 literal->length = literal->buffer.length(); |
| 595 if (literal->is_ascii) { |
| 596 literal->ascii_string = literal->buffer.ascii_literal(); |
| 597 } else { |
| 598 literal->utf16_string = literal->buffer.utf16_literal(); |
| 599 } |
| 600 return true; |
| 601 } |
| 602 |
| 603 |
| 604 template<class Char> |
| 605 Handle<String> Lexer<Char>::InternalizeLiteral( |
| 606 LiteralDesc* literal) { |
| 607 Factory* factory = isolate_->factory(); |
| 608 if (literal->is_in_buffer) { |
| 609 return literal->is_ascii |
| 610 ? factory->InternalizeOneByteString( |
| 611 Vector<const uint8_t>::cast(literal->ascii_string)) |
| 612 : factory->InternalizeTwoByteString(literal->utf16_string); |
| 613 } |
| 614 if (sizeof(Char) == 1) { |
| 615 SubStringKey<uint8_t> key( |
| 616 source_handle_, literal->offset, literal->length); |
| 617 return factory->InternalizeStringWithKey(&key); |
| 618 } else { |
| 619 SubStringKey<uint16_t> key( |
| 620 source_handle_, literal->offset, literal->length); |
| 621 return factory->InternalizeStringWithKey(&key); |
| 622 } |
| 623 } |
| 624 |
| 625 |
| 626 template<> |
| 627 Handle<String> Lexer<uint8_t>::AllocateLiteral( |
| 628 LiteralDesc* literal, PretenureFlag pretenured) { |
| 629 Factory* factory = isolate_->factory(); |
| 630 if (literal->is_in_buffer) { |
| 631 return literal->is_ascii |
| 632 ? factory->NewStringFromAscii(literal->ascii_string, pretenured) |
| 633 : factory->NewStringFromTwoByte(literal->utf16_string, pretenured); |
| 634 } |
| 635 int from = literal->offset; |
| 636 int length = literal->length; |
| 637 // Save the offset and the length before allocating the string as it may |
| 638 // cause a GC, invalidate the literal, and move the source. |
| 639 Handle<String> result = factory->NewRawOneByteString(length, pretenured); |
| 640 uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); |
| 641 String::WriteToFlat(*source_handle_, chars, from, from + length); |
| 642 return result; |
| 643 } |
| 644 |
| 645 |
| 646 template<> |
| 647 Handle<String> Lexer<uint16_t>::AllocateLiteral( |
| 648 LiteralDesc* literal, PretenureFlag pretenured) { |
| 649 Factory* factory = isolate_->factory(); |
| 650 if (literal->is_in_buffer) { |
| 651 return literal->is_ascii |
| 652 ? factory->NewStringFromAscii(literal->ascii_string, pretenured) |
| 653 : factory->NewStringFromTwoByte(literal->utf16_string, pretenured); |
| 654 } |
| 655 // Save the offset and the length before allocating the string as it may |
| 656 // cause a GC, invalidate the literal, and move the source. |
| 657 int from = literal->offset; |
| 658 int length = literal->length; |
| 659 Handle<String> result = factory->NewRawTwoByteString(length, pretenured); |
| 660 uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars(); |
| 661 String::WriteToFlat(*source_handle_, chars, from, from + length); |
| 662 return result; |
| 663 } |
| 664 |
| 665 |
| 666 template<> |
| 667 Handle<String> Lexer<int8_t>::AllocateLiteral( |
| 668 LiteralDesc* literal, PretenureFlag pretenured) { |
| 669 // FIXME: implement |
| 670 UNREACHABLE(); |
| 671 return Handle<String>(); |
| 672 } |
| 673 |
| 674 template class Lexer<uint8_t>; |
| 675 template class Lexer<uint16_t>; |
| 676 template class Lexer<int8_t>; |
| 677 |
| 678 } } // v8::internal |
| OLD | NEW |