| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 457 template <> | 457 template <> |
| 458 const uint16_t* Lexer<uint16_t>::GetNewBufferBasedOnHandle() | 458 const uint16_t* Lexer<uint16_t>::GetNewBufferBasedOnHandle() |
| 459 const { | 459 const { |
| 460 String::FlatContent content = source_handle_->GetFlatContent(); | 460 String::FlatContent content = source_handle_->GetFlatContent(); |
| 461 return content.ToUC16Vector().start(); | 461 return content.ToUC16Vector().start(); |
| 462 } | 462 } |
| 463 | 463 |
| 464 | 464 |
| 465 template<> | 465 template<> |
| 466 const int8_t* Lexer<int8_t>::GetNewBufferBasedOnHandle() const { | 466 const int8_t* Lexer<int8_t>::GetNewBufferBasedOnHandle() const { |
| 467 String::FlatContent content = source_handle_->GetFlatContent(); | 467 UNREACHABLE(); |
| 468 return reinterpret_cast<const int8_t*>(content.ToOneByteVector().start()); | 468 return NULL; |
| 469 } | 469 } |
| 470 | 470 |
| 471 | 471 |
| 472 template<typename Char> | 472 template<typename Char> |
| 473 void Lexer<Char>::UpdateBufferBasedOnHandle() { | 473 void Lexer<Char>::UpdateBufferBasedOnHandle() { |
| 474 // We get a raw pointer from the Handle, but we also update it every time | 474 // We get a raw pointer from the Handle, but we also update it every time |
| 475 // there is a GC, so it is safe. | 475 // there is a GC, so it is safe. |
| 476 DisallowHeapAllocation no_gc; | 476 DisallowHeapAllocation no_gc; |
| 477 const Char* new_buffer = GetNewBufferBasedOnHandle(); | 477 const Char* new_buffer = GetNewBufferBasedOnHandle(); |
| 478 if (new_buffer != buffer_) { | 478 if (new_buffer != buffer_) { |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 520 one_byte_string_.Dispose(); | 520 one_byte_string_.Dispose(); |
| 521 } | 521 } |
| 522 is_one_byte_string_owned_ = false; | 522 is_one_byte_string_owned_ = false; |
| 523 one_byte_string_ = buffer.one_byte_literal(); | 523 one_byte_string_ = buffer.one_byte_literal(); |
| 524 } else { | 524 } else { |
| 525 two_byte_string_ = buffer.two_byte_literal(); | 525 two_byte_string_ = buffer.two_byte_literal(); |
| 526 } | 526 } |
| 527 } | 527 } |
| 528 | 528 |
| 529 | 529 |
| 530 Handle<String> LexerBase::AllocateNextLiteralString(Isolate* isolate, | |
| 531 PretenureFlag tenured) { | |
| 532 if (is_next_literal_one_byte()) { | |
| 533 return isolate->factory()->NewStringFromOneByte( | |
| 534 Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured); | |
| 535 } else { | |
| 536 return isolate->factory()->NewStringFromTwoByte( | |
| 537 next_literal_two_byte_string(), tenured); | |
| 538 } | |
| 539 } | |
| 540 | |
| 541 | |
| 542 Handle<String> LexerBase::AllocateInternalizedString(Isolate* isolate) { | |
| 543 if (is_literal_one_byte()) { | |
| 544 return isolate->factory()->InternalizeOneByteString( | |
| 545 literal_one_byte_string()); | |
| 546 } else { | |
| 547 return isolate->factory()->InternalizeTwoByteString( | |
| 548 literal_two_byte_string()); | |
| 549 } | |
| 550 } | |
| 551 | |
| 552 | |
| 553 double LexerBase::DoubleValue() { | 530 double LexerBase::DoubleValue() { |
| 554 ASSERT(is_literal_one_byte()); | |
| 555 return StringToDouble( | 531 return StringToDouble( |
| 556 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()), | 532 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()), |
| 557 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 533 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); |
| 558 } | 534 } |
| 559 | 535 |
| 560 | 536 |
| 561 int LexerBase::FindNumber(DuplicateFinder* finder, int value) { | 537 int LexerBase::FindNumber(DuplicateFinder* finder, int value) { |
| 562 return finder->AddNumber(literal_one_byte_string(), value); | 538 return finder->AddNumber(literal_one_byte_string(), value); |
| 563 } | 539 } |
| 564 | 540 |
| 565 | 541 |
| 566 int LexerBase::FindSymbol(DuplicateFinder* finder, int value) { | 542 int LexerBase::FindSymbol(DuplicateFinder* finder, int value) { |
| 567 if (is_literal_one_byte()) { | 543 if (is_literal_one_byte()) { |
| 568 return finder->AddOneByteSymbol(literal_one_byte_string(), value); | 544 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
| 569 } | 545 } |
| 570 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); | 546 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
| 571 } | 547 } |
| 572 | 548 |
| 573 | 549 |
| 574 void LexerBase::LogSymbol(ParserRecorder* log, int position) { | 550 void LexerBase::LogSymbol(ParserRecorder* log, int position) { |
| 575 if (is_literal_one_byte()) { | 551 if (is_literal_one_byte()) { |
| 576 log->LogOneByteSymbol(position, literal_one_byte_string()); | 552 log->LogOneByteSymbol(position, literal_one_byte_string()); |
| 577 } else { | 553 } else { |
| 578 log->LogTwoByteSymbol(position, literal_two_byte_string()); | 554 log->LogTwoByteSymbol(position, literal_two_byte_string()); |
| 579 } | 555 } |
| 580 } | 556 } |
| 581 | 557 |
| 582 | 558 |
| 583 static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) { | |
| 584 return true; | |
| 585 } | |
| 586 | |
| 587 | |
| 588 static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) { | |
| 589 uint16_t acc = 0; | |
| 590 while (cursor != end) { | |
| 591 acc |= *cursor++ >> 8; | |
| 592 } | |
| 593 return acc == 0; | |
| 594 } | |
| 595 | |
| 596 | |
| 597 static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) { | |
| 598 int8_t acc = 0; | |
| 599 while (cursor != end) { | |
| 600 acc |= *cursor++ >> 7; | |
| 601 } | |
| 602 return acc == 0; | |
| 603 } | |
| 604 | |
| 605 | |
| 606 template<> | 559 template<> |
| 607 template<> | 560 template<> |
| 608 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor, | 561 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor, |
| 609 const uint16_t* end, | |
| 610 LiteralDesc* literal) { | 562 LiteralDesc* literal) { |
| 611 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length); | 563 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length); |
| 612 uint8_t* data = vector.start(); | 564 CopyChars(vector.start(), cursor, literal->length); |
| 613 while (cursor < end) { | |
| 614 *data++ = *cursor++; | |
| 615 } | |
| 616 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true); | 565 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true); |
| 617 } | 566 } |
| 618 | 567 |
| 619 | 568 |
| 620 template<> | 569 template<> |
| 621 template<> | 570 template<> |
| 622 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start, | 571 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start, |
| 623 const uint16_t* end, | 572 LiteralDesc* literal) { |
| 624 LiteralDesc* literal) { | |
| 625 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length)); | 573 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length)); |
| 626 } | 574 } |
| 627 | 575 |
| 628 | 576 |
| 629 template<> | 577 template<> |
| 630 template<> | 578 template<> |
| 631 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start, | 579 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start, |
| 632 const uint8_t* end, | |
| 633 LiteralDesc* literal) { | 580 LiteralDesc* literal) { |
| 634 literal->SetOneByteString( | 581 literal->SetOneByteString( |
| 635 Vector<const uint8_t>(start, literal->length), false); | 582 Vector<const uint8_t>(start, literal->length), false); |
| 636 } | 583 } |
| 637 | 584 |
| 638 | 585 |
| 639 template<> | 586 template<> |
| 640 template<> | 587 template<> |
| 641 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start, | 588 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start, |
| 642 const int8_t* end, | |
| 643 LiteralDesc* literal) { | 589 LiteralDesc* literal) { |
| 644 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start); | 590 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start); |
| 645 literal->SetOneByteString( | 591 literal->SetOneByteString( |
| 646 Vector<const uint8_t>(cast, literal->length), false); | 592 Vector<const uint8_t>(cast, literal->length), false); |
| 647 } | 593 } |
| 648 | 594 |
| 649 | 595 |
| 650 template<class Char> | 596 template<class Char> |
| 597 static inline void GetStartAndEnd(const Char* buffer, |
| 598 const LexerBase::TokenDesc& token, |
| 599 const Char** start, |
| 600 const Char** end) { |
| 601 *start = buffer + token.beg_pos; |
| 602 *end = buffer + token.end_pos; |
| 603 if (token.token == Token::STRING) { |
| 604 ++(*start); |
| 605 --(*end); |
| 606 } |
| 607 } |
| 608 |
| 609 |
| 610 template<class Char> |
| 611 static inline const Char* LiteralOffsetAndLength( |
| 612 const Char* buffer, |
| 613 const LexerBase::TokenDesc& token, |
| 614 int* offset, |
| 615 int* length) { |
| 616 ASSERT(!Lexer<Char>::MustBeInBuffer(token)); |
| 617 const Char* start = NULL; |
| 618 const Char* end = NULL; |
| 619 GetStartAndEnd<Char>(buffer, token, &start, &end); |
| 620 *offset = start - buffer; |
| 621 *length = end - start; |
| 622 return start; |
| 623 } |
| 624 |
| 625 |
| 626 template<class Char> |
| 651 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) { | 627 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) { |
| 652 literal->beg_pos = token.beg_pos; | 628 literal->beg_pos = token.beg_pos; |
| 653 const Char* start = buffer_ + token.beg_pos; | 629 if (!MustBeInBuffer(token)) { |
| 654 const Char* end = buffer_ + token.end_pos; | 630 const Char* start = LiteralOffsetAndLength<Char>(buffer_, |
| 655 if (token.token == Token::STRING) { | 631 token, |
| 656 ++start; | 632 &literal->offset, |
| 657 --end; | 633 &literal->length); |
| 634 if (sizeof(Char) == 1) { |
| 635 SetLiteral<true>(start, literal); |
| 636 } else if (token.is_in_primary_range) { |
| 637 SetLiteral<true>(start, literal); |
| 638 } else { |
| 639 SetLiteral<false>(start, literal); |
| 640 } |
| 641 return true; |
| 658 } | 642 } |
| 659 if (!token.has_escapes) { | 643 return CopyToLiteralBuffer(token, literal); |
| 660 bool is_one_byte = IsOneByte(start, end); | |
| 661 if (sizeof(Char) == 2 || is_one_byte) { | |
| 662 literal->offset = start - buffer_; | |
| 663 literal->length = end - start; | |
| 664 if (sizeof(Char) == 1) { | |
| 665 SetLiteral<true>(start, end, literal); | |
| 666 } else if (is_one_byte) { | |
| 667 SetLiteral<true>(start, end, literal); | |
| 668 } else { | |
| 669 SetLiteral<false>(start, end, literal); | |
| 670 } | |
| 671 return true; | |
| 672 } | |
| 673 } | |
| 674 return CopyToLiteralBuffer(start, end, token, literal); | |
| 675 } | 644 } |
| 676 | 645 |
| 677 | 646 |
| 678 template<class Char> | 647 template<class Char> |
| 679 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, | 648 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token, |
| 680 const Char* end, | |
| 681 const TokenDesc& token, | |
| 682 LiteralDesc* literal) { | 649 LiteralDesc* literal) { |
| 683 literal->buffer.Reset(); | 650 literal->buffer.Reset(); |
| 651 const Char* start = NULL; |
| 652 const Char* end = NULL; |
| 653 GetStartAndEnd<Char>(buffer_, token, &start, &end); |
| 684 if (token.has_escapes) { | 654 if (token.has_escapes) { |
| 685 for (const Char* cursor = start; cursor != end;) { | 655 for (const Char* cursor = start; cursor != end;) { |
| 686 if (*cursor != '\\') { | 656 if (*cursor != '\\') { |
| 687 literal->buffer.AddChar(*cursor++); | 657 literal->buffer.AddChar(*cursor++); |
| 688 } else if (token.token == Token::IDENTIFIER) { | 658 } else if (token.token == Token::IDENTIFIER) { |
| 689 uc32 c; | 659 uc32 c; |
| 690 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); | 660 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); |
| 691 ASSERT(cursor != NULL); | 661 ASSERT(cursor != NULL); |
| 692 if (cursor == NULL) return false; | 662 if (cursor == NULL) return false; |
| 693 literal->buffer.AddChar(c); | 663 literal->buffer.AddChar(c); |
| 694 } else { | 664 } else { |
| 695 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer); | 665 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer); |
| 696 ASSERT(cursor != NULL); | 666 ASSERT(cursor != NULL); |
| 697 if (cursor == NULL) return false; | 667 if (cursor == NULL) return false; |
| 698 } | 668 } |
| 699 } | 669 } |
| 700 } else { | 670 } else { |
| 701 // TODO(dcarney): This can only happen for utf8 strings | 671 // TODO(dcarney): This can only happen for utf8 strings |
| 702 // use a helper function. | 672 // use a helper function. |
| 703 for (const Char* cursor = start; cursor != end;) { | 673 for (const Char* cursor = start; cursor != end;) { |
| 704 literal->buffer.AddChar(*cursor++); | 674 literal->buffer.AddChar(*cursor++); |
| 705 } | 675 } |
| 706 } | 676 } |
| 707 literal->SetStringFromLiteralBuffer(); | 677 literal->SetStringFromLiteralBuffer(); |
| 708 return true; | 678 return true; |
| 709 } | 679 } |
| 710 | 680 |
| 711 | 681 |
| 712 template<class Char> | 682 template<class Char> |
| 713 Handle<String> Lexer<Char>::InternalizeLiteral( | 683 Handle<String> Lexer<Char>::AllocateInternalizedString( |
| 714 LiteralDesc* literal) { | 684 Isolate* isolate) { |
| 715 // Factory* factory = isolate_->factory(); | 685 Factory* factory = isolate->factory(); |
| 716 // if (literal->is_in_buffer) { | 686 LiteralDesc* literal = current_literal_; |
| 717 // return literal->is_one_byte | 687 const TokenDesc& token = current_; |
| 718 // ? factory->InternalizeOneByteString( | 688 // TODO(dcarney): handle utf8 directly. |
| 719 // Vector<const uint8_t>::cast(literal->one_byte_string)) | 689 if (source_handle_.is_null() || MustBeInBuffer(token)) { |
| 720 // : factory->InternalizeTwoByteString(literal->two_byte_string); | 690 EnsureLiteralIsValid(token, literal); |
| 721 // } | 691 return literal->is_one_byte() ? |
| 722 // if (sizeof(Char) == 1) { | 692 factory->InternalizeOneByteString(literal->one_byte_string()) : |
| 723 // SubStringKey<uint8_t> key( | 693 factory->InternalizeTwoByteString(literal->two_byte_string()); |
| 724 // source_handle_, literal->offset, literal->length); | 694 } |
| 725 // return factory->InternalizeStringWithKey(&key); | 695 int offset = 0, length = 0; |
| 726 // } else { | 696 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length); |
| 727 // SubStringKey<uint16_t> key( | 697 if (sizeof(Char) == 1) { |
| 728 // source_handle_, literal->offset, literal->length); | 698 SubStringKey<uint8_t> key(source_handle_, offset, length); |
| 729 // return factory->InternalizeStringWithKey(&key); | 699 return factory->InternalizeStringWithKey(&key); |
| 730 // } | 700 } else { |
| 731 CHECK(false); | 701 SubStringKey<uint16_t> key(source_handle_, offset, length); |
| 732 return Handle<String>(); | 702 return factory->InternalizeStringWithKey(&key); |
| 703 } |
| 733 } | 704 } |
| 734 | 705 |
| 735 | 706 |
| 736 template<> | 707 template<class Char> |
| 737 Handle<String> Lexer<uint8_t>::AllocateLiteral( | 708 Handle<String> Lexer<Char>::AllocateNextLiteralString(Isolate* isolate, |
| 738 LiteralDesc* literal, PretenureFlag pretenured) { | 709 PretenureFlag tenured) { |
| 739 // Factory* factory = isolate_->factory(); | 710 Factory* factory = isolate->factory(); |
| 740 // if (literal->is_in_buffer) { | 711 LiteralDesc* literal = next_literal_; |
| 741 // return literal->is_one_byte | 712 const TokenDesc& token = next_; |
| 742 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) | 713 // TODO(dcarney): handle utf8 directly. |
| 743 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured) | 714 if (source_handle_.is_null() || MustBeInBuffer(token)) { |
| 744 // } | 715 EnsureLiteralIsValid(token, literal); |
| 745 // int from = literal->offset; | 716 return literal->is_one_byte() ? |
| 746 // int length = literal->length; | 717 factory->NewStringFromOneByte(literal->one_byte_string(), tenured) : |
| 747 // // Save the offset and the length before allocating the string as it may | 718 factory->NewStringFromTwoByte(literal->two_byte_string(), tenured); |
| 748 // // cause a GC, invalidate the literal, and move the source. | 719 } |
| 749 // Handle<String> result = factory->NewRawOneByteString(length, pretenured); | 720 int offset = 0, length = 0; |
| 750 // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); | 721 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length); |
| 751 // String::WriteToFlat(*source_handle_, chars, from, from + length); | 722 return factory->NewSubString(source_handle_, offset, offset + length); |
| 752 // return result; | |
| 753 CHECK(false); | |
| 754 return Handle<String>(); | |
| 755 } | 723 } |
| 756 | 724 |
| 757 | 725 |
| 758 template<> | |
| 759 Handle<String> Lexer<uint16_t>::AllocateLiteral( | |
| 760 LiteralDesc* literal, PretenureFlag pretenured) { | |
| 761 // Factory* factory = isolate_->factory(); | |
| 762 // if (literal->is_in_buffer) { | |
| 763 // return literal->is_one_byte | |
| 764 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) | |
| 765 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured) | |
| 766 // } | |
| 767 // // Save the offset and the length before allocating the string as it may | |
| 768 // // cause a GC, invalidate the literal, and move the source. | |
| 769 // int from = literal->offset; | |
| 770 // int length = literal->length; | |
| 771 // Handle<String> result = factory->NewRawTwoByteString(length, pretenured); | |
| 772 // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars(); | |
| 773 // String::WriteToFlat(*source_handle_, chars, from, from + length); | |
| 774 // return result; | |
| 775 CHECK(false); | |
| 776 return Handle<String>(); | |
| 777 } | |
| 778 | |
| 779 | |
| 780 template<> | |
| 781 Handle<String> Lexer<int8_t>::AllocateLiteral( | |
| 782 LiteralDesc* literal, PretenureFlag pretenured) { | |
| 783 CHECK(false); | |
| 784 return Handle<String>(); | |
| 785 } | |
| 786 | |
| 787 | |
| 788 template class Lexer<uint8_t>; | 726 template class Lexer<uint8_t>; |
| 789 template class Lexer<uint16_t>; | 727 template class Lexer<uint16_t>; |
| 790 template class Lexer<int8_t>; | 728 template class Lexer<int8_t>; |
| 791 | 729 |
| 792 } } // v8::internal | 730 } } // v8::internal |
| OLD | NEW |