Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Side by Side Diff: src/lexer/lexer.cc

Issue 201613002: Experimental parser: allocate substrings (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.h ('k') | tools/lexer_generator/code_generator.jinja » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after
457 template <> 457 template <>
458 const uint16_t* Lexer<uint16_t>::GetNewBufferBasedOnHandle() 458 const uint16_t* Lexer<uint16_t>::GetNewBufferBasedOnHandle()
459 const { 459 const {
460 String::FlatContent content = source_handle_->GetFlatContent(); 460 String::FlatContent content = source_handle_->GetFlatContent();
461 return content.ToUC16Vector().start(); 461 return content.ToUC16Vector().start();
462 } 462 }
463 463
464 464
465 template<> 465 template<>
466 const int8_t* Lexer<int8_t>::GetNewBufferBasedOnHandle() const { 466 const int8_t* Lexer<int8_t>::GetNewBufferBasedOnHandle() const {
467 String::FlatContent content = source_handle_->GetFlatContent(); 467 UNREACHABLE();
468 return reinterpret_cast<const int8_t*>(content.ToOneByteVector().start()); 468 return NULL;
469 } 469 }
470 470
471 471
472 template<typename Char> 472 template<typename Char>
473 void Lexer<Char>::UpdateBufferBasedOnHandle() { 473 void Lexer<Char>::UpdateBufferBasedOnHandle() {
474 // We get a raw pointer from the Handle, but we also update it every time 474 // We get a raw pointer from the Handle, but we also update it every time
475 // there is a GC, so it is safe. 475 // there is a GC, so it is safe.
476 DisallowHeapAllocation no_gc; 476 DisallowHeapAllocation no_gc;
477 const Char* new_buffer = GetNewBufferBasedOnHandle(); 477 const Char* new_buffer = GetNewBufferBasedOnHandle();
478 if (new_buffer != buffer_) { 478 if (new_buffer != buffer_) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
520 one_byte_string_.Dispose(); 520 one_byte_string_.Dispose();
521 } 521 }
522 is_one_byte_string_owned_ = false; 522 is_one_byte_string_owned_ = false;
523 one_byte_string_ = buffer.one_byte_literal(); 523 one_byte_string_ = buffer.one_byte_literal();
524 } else { 524 } else {
525 two_byte_string_ = buffer.two_byte_literal(); 525 two_byte_string_ = buffer.two_byte_literal();
526 } 526 }
527 } 527 }
528 528
529 529
530 Handle<String> LexerBase::AllocateNextLiteralString(Isolate* isolate,
531 PretenureFlag tenured) {
532 if (is_next_literal_one_byte()) {
533 return isolate->factory()->NewStringFromOneByte(
534 Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured);
535 } else {
536 return isolate->factory()->NewStringFromTwoByte(
537 next_literal_two_byte_string(), tenured);
538 }
539 }
540
541
542 Handle<String> LexerBase::AllocateInternalizedString(Isolate* isolate) {
543 if (is_literal_one_byte()) {
544 return isolate->factory()->InternalizeOneByteString(
545 literal_one_byte_string());
546 } else {
547 return isolate->factory()->InternalizeTwoByteString(
548 literal_two_byte_string());
549 }
550 }
551
552
553 double LexerBase::DoubleValue() { 530 double LexerBase::DoubleValue() {
554 ASSERT(is_literal_one_byte());
555 return StringToDouble( 531 return StringToDouble(
556 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()), 532 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()),
557 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); 533 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
558 } 534 }
559 535
560 536
561 int LexerBase::FindNumber(DuplicateFinder* finder, int value) { 537 int LexerBase::FindNumber(DuplicateFinder* finder, int value) {
562 return finder->AddNumber(literal_one_byte_string(), value); 538 return finder->AddNumber(literal_one_byte_string(), value);
563 } 539 }
564 540
565 541
566 int LexerBase::FindSymbol(DuplicateFinder* finder, int value) { 542 int LexerBase::FindSymbol(DuplicateFinder* finder, int value) {
567 if (is_literal_one_byte()) { 543 if (is_literal_one_byte()) {
568 return finder->AddOneByteSymbol(literal_one_byte_string(), value); 544 return finder->AddOneByteSymbol(literal_one_byte_string(), value);
569 } 545 }
570 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); 546 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);
571 } 547 }
572 548
573 549
574 void LexerBase::LogSymbol(ParserRecorder* log, int position) { 550 void LexerBase::LogSymbol(ParserRecorder* log, int position) {
575 if (is_literal_one_byte()) { 551 if (is_literal_one_byte()) {
576 log->LogOneByteSymbol(position, literal_one_byte_string()); 552 log->LogOneByteSymbol(position, literal_one_byte_string());
577 } else { 553 } else {
578 log->LogTwoByteSymbol(position, literal_two_byte_string()); 554 log->LogTwoByteSymbol(position, literal_two_byte_string());
579 } 555 }
580 } 556 }
581 557
582 558
583 static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) {
584 return true;
585 }
586
587
588 static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) {
589 uint16_t acc = 0;
590 while (cursor != end) {
591 acc |= *cursor++ >> 8;
592 }
593 return acc == 0;
594 }
595
596
597 static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) {
598 int8_t acc = 0;
599 while (cursor != end) {
600 acc |= *cursor++ >> 7;
601 }
602 return acc == 0;
603 }
604
605
606 template<> 559 template<>
607 template<> 560 template<>
608 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor, 561 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor,
609 const uint16_t* end,
610 LiteralDesc* literal) { 562 LiteralDesc* literal) {
611 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length); 563 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length);
612 uint8_t* data = vector.start(); 564 CopyChars(vector.start(), cursor, literal->length);
613 while (cursor < end) {
614 *data++ = *cursor++;
615 }
616 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true); 565 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true);
617 } 566 }
618 567
619 568
620 template<> 569 template<>
621 template<> 570 template<>
622 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start, 571 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start,
623 const uint16_t* end, 572 LiteralDesc* literal) {
624 LiteralDesc* literal) {
625 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length)); 573 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length));
626 } 574 }
627 575
628 576
629 template<> 577 template<>
630 template<> 578 template<>
631 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start, 579 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start,
632 const uint8_t* end,
633 LiteralDesc* literal) { 580 LiteralDesc* literal) {
634 literal->SetOneByteString( 581 literal->SetOneByteString(
635 Vector<const uint8_t>(start, literal->length), false); 582 Vector<const uint8_t>(start, literal->length), false);
636 } 583 }
637 584
638 585
639 template<> 586 template<>
640 template<> 587 template<>
641 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start, 588 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start,
642 const int8_t* end,
643 LiteralDesc* literal) { 589 LiteralDesc* literal) {
644 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start); 590 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start);
645 literal->SetOneByteString( 591 literal->SetOneByteString(
646 Vector<const uint8_t>(cast, literal->length), false); 592 Vector<const uint8_t>(cast, literal->length), false);
647 } 593 }
648 594
649 595
650 template<class Char> 596 template<class Char>
597 static inline void GetStartAndEnd(const Char* buffer,
598 const LexerBase::TokenDesc& token,
599 const Char** start,
600 const Char** end) {
601 *start = buffer + token.beg_pos;
602 *end = buffer + token.end_pos;
603 if (token.token == Token::STRING) {
604 ++(*start);
605 --(*end);
606 }
607 }
608
609
610 template<class Char>
611 static inline const Char* LiteralOffsetAndLength(
612 const Char* buffer,
613 const LexerBase::TokenDesc& token,
614 int* offset,
615 int* length) {
616 ASSERT(!Lexer<Char>::MustBeInBuffer(token));
617 const Char* start = NULL;
618 const Char* end = NULL;
619 GetStartAndEnd<Char>(buffer, token, &start, &end);
620 *offset = start - buffer;
621 *length = end - start;
622 return start;
623 }
624
625
626 template<class Char>
651 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) { 627 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) {
652 literal->beg_pos = token.beg_pos; 628 literal->beg_pos = token.beg_pos;
653 const Char* start = buffer_ + token.beg_pos; 629 if (!MustBeInBuffer(token)) {
654 const Char* end = buffer_ + token.end_pos; 630 const Char* start = LiteralOffsetAndLength<Char>(buffer_,
655 if (token.token == Token::STRING) { 631 token,
656 ++start; 632 &literal->offset,
657 --end; 633 &literal->length);
634 if (sizeof(Char) == 1) {
635 SetLiteral<true>(start, literal);
636 } else if (token.is_in_primary_range) {
637 SetLiteral<true>(start, literal);
638 } else {
639 SetLiteral<false>(start, literal);
640 }
641 return true;
658 } 642 }
659 if (!token.has_escapes) { 643 return CopyToLiteralBuffer(token, literal);
660 bool is_one_byte = IsOneByte(start, end);
661 if (sizeof(Char) == 2 || is_one_byte) {
662 literal->offset = start - buffer_;
663 literal->length = end - start;
664 if (sizeof(Char) == 1) {
665 SetLiteral<true>(start, end, literal);
666 } else if (is_one_byte) {
667 SetLiteral<true>(start, end, literal);
668 } else {
669 SetLiteral<false>(start, end, literal);
670 }
671 return true;
672 }
673 }
674 return CopyToLiteralBuffer(start, end, token, literal);
675 } 644 }
676 645
677 646
678 template<class Char> 647 template<class Char>
679 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, 648 bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
680 const Char* end,
681 const TokenDesc& token,
682 LiteralDesc* literal) { 649 LiteralDesc* literal) {
683 literal->buffer.Reset(); 650 literal->buffer.Reset();
651 const Char* start = NULL;
652 const Char* end = NULL;
653 GetStartAndEnd<Char>(buffer_, token, &start, &end);
684 if (token.has_escapes) { 654 if (token.has_escapes) {
685 for (const Char* cursor = start; cursor != end;) { 655 for (const Char* cursor = start; cursor != end;) {
686 if (*cursor != '\\') { 656 if (*cursor != '\\') {
687 literal->buffer.AddChar(*cursor++); 657 literal->buffer.AddChar(*cursor++);
688 } else if (token.token == Token::IDENTIFIER) { 658 } else if (token.token == Token::IDENTIFIER) {
689 uc32 c; 659 uc32 c;
690 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); 660 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);
691 ASSERT(cursor != NULL); 661 ASSERT(cursor != NULL);
692 if (cursor == NULL) return false; 662 if (cursor == NULL) return false;
693 literal->buffer.AddChar(c); 663 literal->buffer.AddChar(c);
694 } else { 664 } else {
695 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer); 665 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);
696 ASSERT(cursor != NULL); 666 ASSERT(cursor != NULL);
697 if (cursor == NULL) return false; 667 if (cursor == NULL) return false;
698 } 668 }
699 } 669 }
700 } else { 670 } else {
701 // TODO(dcarney): This can only happen for utf8 strings 671 // TODO(dcarney): This can only happen for utf8 strings
702 // use a helper function. 672 // use a helper function.
703 for (const Char* cursor = start; cursor != end;) { 673 for (const Char* cursor = start; cursor != end;) {
704 literal->buffer.AddChar(*cursor++); 674 literal->buffer.AddChar(*cursor++);
705 } 675 }
706 } 676 }
707 literal->SetStringFromLiteralBuffer(); 677 literal->SetStringFromLiteralBuffer();
708 return true; 678 return true;
709 } 679 }
710 680
711 681
712 template<class Char> 682 template<class Char>
713 Handle<String> Lexer<Char>::InternalizeLiteral( 683 Handle<String> Lexer<Char>::AllocateInternalizedString(
714 LiteralDesc* literal) { 684 Isolate* isolate) {
715 // Factory* factory = isolate_->factory(); 685 Factory* factory = isolate->factory();
716 // if (literal->is_in_buffer) { 686 LiteralDesc* literal = current_literal_;
717 // return literal->is_one_byte 687 const TokenDesc& token = current_;
718 // ? factory->InternalizeOneByteString( 688 // TODO(dcarney): handle utf8 directly.
719 // Vector<const uint8_t>::cast(literal->one_byte_string)) 689 if (source_handle_.is_null() || MustBeInBuffer(token)) {
720 // : factory->InternalizeTwoByteString(literal->two_byte_string); 690 EnsureLiteralIsValid(token, literal);
721 // } 691 return literal->is_one_byte() ?
722 // if (sizeof(Char) == 1) { 692 factory->InternalizeOneByteString(literal->one_byte_string()) :
723 // SubStringKey<uint8_t> key( 693 factory->InternalizeTwoByteString(literal->two_byte_string());
724 // source_handle_, literal->offset, literal->length); 694 }
725 // return factory->InternalizeStringWithKey(&key); 695 int offset = 0, length = 0;
726 // } else { 696 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length);
727 // SubStringKey<uint16_t> key( 697 if (sizeof(Char) == 1) {
728 // source_handle_, literal->offset, literal->length); 698 SubStringKey<uint8_t> key(source_handle_, offset, length);
729 // return factory->InternalizeStringWithKey(&key); 699 return factory->InternalizeStringWithKey(&key);
730 // } 700 } else {
731 CHECK(false); 701 SubStringKey<uint16_t> key(source_handle_, offset, length);
732 return Handle<String>(); 702 return factory->InternalizeStringWithKey(&key);
703 }
733 } 704 }
734 705
735 706
736 template<> 707 template<class Char>
737 Handle<String> Lexer<uint8_t>::AllocateLiteral( 708 Handle<String> Lexer<Char>::AllocateNextLiteralString(Isolate* isolate,
738 LiteralDesc* literal, PretenureFlag pretenured) { 709 PretenureFlag tenured) {
739 // Factory* factory = isolate_->factory(); 710 Factory* factory = isolate->factory();
740 // if (literal->is_in_buffer) { 711 LiteralDesc* literal = next_literal_;
741 // return literal->is_one_byte 712 const TokenDesc& token = next_;
742 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) 713 // TODO(dcarney): handle utf8 directly.
743 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured) 714 if (source_handle_.is_null() || MustBeInBuffer(token)) {
744 // } 715 EnsureLiteralIsValid(token, literal);
745 // int from = literal->offset; 716 return literal->is_one_byte() ?
746 // int length = literal->length; 717 factory->NewStringFromOneByte(literal->one_byte_string(), tenured) :
747 // // Save the offset and the length before allocating the string as it may 718 factory->NewStringFromTwoByte(literal->two_byte_string(), tenured);
748 // // cause a GC, invalidate the literal, and move the source. 719 }
749 // Handle<String> result = factory->NewRawOneByteString(length, pretenured); 720 int offset = 0, length = 0;
750 // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); 721 LiteralOffsetAndLength<Char>(buffer_, token, &offset, &length);
751 // String::WriteToFlat(*source_handle_, chars, from, from + length); 722 return factory->NewSubString(source_handle_, offset, offset + length);
752 // return result;
753 CHECK(false);
754 return Handle<String>();
755 } 723 }
756 724
757 725
758 template<>
759 Handle<String> Lexer<uint16_t>::AllocateLiteral(
760 LiteralDesc* literal, PretenureFlag pretenured) {
761 // Factory* factory = isolate_->factory();
762 // if (literal->is_in_buffer) {
763 // return literal->is_one_byte
764 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
765 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)
766 // }
767 // // Save the offset and the length before allocating the string as it may
768 // // cause a GC, invalidate the literal, and move the source.
769 // int from = literal->offset;
770 // int length = literal->length;
771 // Handle<String> result = factory->NewRawTwoByteString(length, pretenured);
772 // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
773 // String::WriteToFlat(*source_handle_, chars, from, from + length);
774 // return result;
775 CHECK(false);
776 return Handle<String>();
777 }
778
779
780 template<>
781 Handle<String> Lexer<int8_t>::AllocateLiteral(
782 LiteralDesc* literal, PretenureFlag pretenured) {
783 CHECK(false);
784 return Handle<String>();
785 }
786
787
788 template class Lexer<uint8_t>; 726 template class Lexer<uint8_t>;
789 template class Lexer<uint16_t>; 727 template class Lexer<uint16_t>;
790 template class Lexer<int8_t>; 728 template class Lexer<int8_t>;
791 729
792 } } // v8::internal 730 } } // v8::internal
OLDNEW
« no previous file with comments | « src/lexer/lexer.h ('k') | tools/lexer_generator/code_generator.jinja » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698