OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/include/fpdfapi/fpdf_parser.h" | 7 #include "core/include/fpdfapi/fpdf_parser.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <memory> | 10 #include <memory> |
(...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
575 return TRUE; | 575 return TRUE; |
576 } | 576 } |
577 | 577 |
578 FX_BOOL CPDF_Parser::RebuildCrossRef() { | 578 FX_BOOL CPDF_Parser::RebuildCrossRef() { |
579 m_ObjectInfo.clear(); | 579 m_ObjectInfo.clear(); |
580 m_SortedOffset.clear(); | 580 m_SortedOffset.clear(); |
581 if (m_pTrailer) { | 581 if (m_pTrailer) { |
582 m_pTrailer->Release(); | 582 m_pTrailer->Release(); |
583 m_pTrailer = NULL; | 583 m_pTrailer = NULL; |
584 } | 584 } |
585 int32_t status = 0; | 585 |
| 586 ParserState state = ParserState::kDefault; |
| 587 |
586 int32_t inside_index = 0; | 588 int32_t inside_index = 0; |
587 FX_DWORD objnum = 0; | 589 FX_DWORD objnum = 0; |
588 FX_DWORD gennum = 0; | 590 FX_DWORD gennum = 0; |
589 int32_t depth = 0; | 591 int32_t depth = 0; |
590 const FX_DWORD kBufferSize = 4096; | 592 const FX_DWORD kBufferSize = 4096; |
591 std::vector<uint8_t> buffer(kBufferSize); | 593 std::vector<uint8_t> buffer(kBufferSize); |
592 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; | 594 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; |
593 FX_FILESIZE start_pos = 0; | 595 FX_FILESIZE start_pos = 0; |
594 FX_FILESIZE start_pos1 = 0; | 596 FX_FILESIZE start_pos1 = 0; |
595 FX_FILESIZE last_obj = -1; | 597 FX_FILESIZE last_obj = -1; |
596 FX_FILESIZE last_xref = -1; | 598 FX_FILESIZE last_xref = -1; |
597 FX_FILESIZE last_trailer = -1; | 599 FX_FILESIZE last_trailer = -1; |
598 while (pos < m_Syntax.m_FileLen) { | 600 while (pos < m_Syntax.m_FileLen) { |
599 const FX_FILESIZE saved_pos = pos; | 601 const FX_FILESIZE saved_pos = pos; |
600 bool bOverFlow = false; | 602 bool bOverFlow = false; |
601 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); | 603 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); |
602 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | 604 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) |
603 break; | 605 break; |
604 | 606 |
605 for (FX_DWORD i = 0; i < size; i++) { | 607 for (FX_DWORD i = 0; i < size; i++) { |
606 uint8_t byte = buffer[i]; | 608 uint8_t byte = buffer[i]; |
607 switch (status) { | 609 switch (state) { |
608 case 0: | 610 case ParserState::kDefault: |
609 if (PDFCharIsWhitespace(byte)) | 611 if (PDFCharIsWhitespace(byte)) |
610 status = 1; | 612 state = ParserState::kWhitespace; |
611 | 613 |
612 if (std::isdigit(byte)) { | 614 if (std::isdigit(byte)) { |
613 --i; | 615 --i; |
614 status = 1; | 616 state = ParserState::kWhitespace; |
615 } | 617 } |
616 | 618 |
617 if (byte == '%') { | 619 if (byte == '%') { |
618 inside_index = 0; | 620 inside_index = 0; |
619 status = 9; | 621 state = ParserState::kComment; |
620 } | 622 } |
621 | 623 |
622 if (byte == '(') { | 624 if (byte == '(') { |
623 status = 10; | 625 state = ParserState::kString; |
624 depth = 1; | 626 depth = 1; |
625 } | 627 } |
626 | 628 |
627 if (byte == '<') { | 629 if (byte == '<') { |
628 inside_index = 1; | 630 inside_index = 1; |
629 status = 11; | 631 state = ParserState::kHexString; |
630 } | 632 } |
631 | 633 |
632 if (byte == '\\') | 634 if (byte == '\\') |
633 status = 13; | 635 state = ParserState::kEscapedString; |
634 | 636 |
635 if (byte == 't') { | 637 if (byte == 't') { |
636 status = 7; | 638 state = ParserState::kTrailer; |
637 inside_index = 1; | 639 inside_index = 1; |
638 } | 640 } |
639 break; | 641 break; |
640 case 1: | 642 |
| 643 case ParserState::kWhitespace: |
641 if (PDFCharIsWhitespace(byte)) { | 644 if (PDFCharIsWhitespace(byte)) { |
642 break; | 645 break; |
643 } else if (std::isdigit(byte)) { | 646 } else if (std::isdigit(byte)) { |
644 start_pos = pos + i; | 647 start_pos = pos + i; |
645 status = 2; | 648 state = ParserState::kObjNum; |
646 objnum = FXSYS_toDecimalDigit(byte); | 649 objnum = FXSYS_toDecimalDigit(byte); |
| 650 |
647 } else if (byte == 't') { | 651 } else if (byte == 't') { |
648 status = 7; | 652 state = ParserState::kTrailer; |
649 inside_index = 1; | 653 inside_index = 1; |
| 654 |
650 } else if (byte == 'x') { | 655 } else if (byte == 'x') { |
651 status = 8; | 656 state = ParserState::kXref; |
652 inside_index = 1; | 657 inside_index = 1; |
| 658 |
653 } else { | 659 } else { |
654 --i; | 660 --i; |
655 status = 0; | 661 state = ParserState::kDefault; |
656 } | 662 } |
657 break; | 663 break; |
658 case 2: | 664 |
| 665 case ParserState::kObjNum: |
659 if (std::isdigit(byte)) { | 666 if (std::isdigit(byte)) { |
660 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); | 667 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); |
661 break; | 668 break; |
662 } else if (PDFCharIsWhitespace(byte)) { | 669 } else if (PDFCharIsWhitespace(byte)) { |
663 status = 3; | 670 state = ParserState::kPostObjNum; |
664 } else { | 671 } else { |
665 --i; | 672 --i; |
666 status = 14; | 673 state = ParserState::kEndObj; |
667 inside_index = 0; | 674 inside_index = 0; |
668 } | 675 } |
669 break; | 676 break; |
670 case 3: | 677 |
| 678 case ParserState::kPostObjNum: |
671 if (std::isdigit(byte)) { | 679 if (std::isdigit(byte)) { |
672 start_pos1 = pos + i; | 680 start_pos1 = pos + i; |
673 status = 4; | 681 state = ParserState::kGenNum; |
674 gennum = FXSYS_toDecimalDigit(byte); | 682 gennum = FXSYS_toDecimalDigit(byte); |
675 } else if (PDFCharIsWhitespace(byte)) { | 683 } else if (PDFCharIsWhitespace(byte)) { |
676 break; | 684 break; |
677 } else if (byte == 't') { | 685 } else if (byte == 't') { |
678 status = 7; | 686 state = ParserState::kTrailer; |
679 inside_index = 1; | 687 inside_index = 1; |
680 } else { | 688 } else { |
681 --i; | 689 --i; |
682 status = 0; | 690 state = ParserState::kDefault; |
683 } | 691 } |
684 break; | 692 break; |
685 case 4: | 693 |
| 694 case ParserState::kGenNum: |
686 if (std::isdigit(byte)) { | 695 if (std::isdigit(byte)) { |
687 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); | 696 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); |
688 break; | 697 break; |
689 } else if (PDFCharIsWhitespace(byte)) { | 698 } else if (PDFCharIsWhitespace(byte)) { |
690 status = 5; | 699 state = ParserState::kPostGenNum; |
691 } else { | 700 } else { |
692 --i; | 701 --i; |
693 status = 0; | 702 state = ParserState::kDefault; |
694 } | 703 } |
695 break; | 704 break; |
696 case 5: | 705 |
| 706 case ParserState::kPostGenNum: |
697 if (byte == 'o') { | 707 if (byte == 'o') { |
698 status = 6; | 708 state = ParserState::kBeginObj; |
699 inside_index = 1; | 709 inside_index = 1; |
700 } else if (PDFCharIsWhitespace(byte)) { | 710 } else if (PDFCharIsWhitespace(byte)) { |
701 break; | 711 break; |
702 } else if (std::isdigit(byte)) { | 712 } else if (std::isdigit(byte)) { |
703 objnum = gennum; | 713 objnum = gennum; |
704 gennum = FXSYS_toDecimalDigit(byte); | 714 gennum = FXSYS_toDecimalDigit(byte); |
705 start_pos = start_pos1; | 715 start_pos = start_pos1; |
706 start_pos1 = pos + i; | 716 start_pos1 = pos + i; |
707 status = 4; | 717 state = ParserState::kGenNum; |
708 } else if (byte == 't') { | 718 } else if (byte == 't') { |
709 status = 7; | 719 state = ParserState::kTrailer; |
710 inside_index = 1; | 720 inside_index = 1; |
711 } else { | 721 } else { |
712 --i; | 722 --i; |
713 status = 0; | 723 state = ParserState::kDefault; |
714 } | 724 } |
715 break; | 725 break; |
716 case 6: | 726 |
| 727 case ParserState::kBeginObj: |
717 switch (inside_index) { | 728 switch (inside_index) { |
718 case 1: | 729 case 1: |
719 if (byte != 'b') { | 730 if (byte != 'b') { |
720 --i; | 731 --i; |
721 status = 0; | 732 state = ParserState::kDefault; |
722 } else { | 733 } else { |
723 inside_index++; | 734 inside_index++; |
724 } | 735 } |
725 break; | 736 break; |
726 case 2: | 737 case 2: |
727 if (byte != 'j') { | 738 if (byte != 'j') { |
728 --i; | 739 --i; |
729 status = 0; | 740 state = ParserState::kDefault; |
730 } else { | 741 } else { |
731 inside_index++; | 742 inside_index++; |
732 } | 743 } |
733 break; | 744 break; |
734 case 3: | 745 case 3: |
735 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 746 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
736 if (objnum > 0x1000000) { | 747 if (objnum > 0x1000000) { |
737 status = 0; | 748 state = ParserState::kDefault; |
738 break; | 749 break; |
739 } | 750 } |
740 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; | 751 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; |
741 m_SortedOffset.insert(obj_pos); | 752 m_SortedOffset.insert(obj_pos); |
742 last_obj = start_pos; | 753 last_obj = start_pos; |
743 FX_FILESIZE obj_end = 0; | 754 FX_FILESIZE obj_end = 0; |
744 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | 755 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( |
745 m_pDocument, obj_pos, objnum, &obj_end); | 756 m_pDocument, obj_pos, objnum, &obj_end); |
746 if (CPDF_Stream* pStream = ToStream(pObject)) { | 757 if (CPDF_Stream* pStream = ToStream(pObject)) { |
747 if (CPDF_Dictionary* pDict = pStream->GetDict()) { | 758 if (CPDF_Dictionary* pDict = pStream->GetDict()) { |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
786 } else { | 797 } else { |
787 m_ObjectInfo[objnum].pos = obj_pos; | 798 m_ObjectInfo[objnum].pos = obj_pos; |
788 m_ObjectInfo[objnum].type = 1; | 799 m_ObjectInfo[objnum].type = 1; |
789 m_ObjectInfo[objnum].gennum = gennum; | 800 m_ObjectInfo[objnum].gennum = gennum; |
790 } | 801 } |
791 if (pObject) { | 802 if (pObject) { |
792 pObject->Release(); | 803 pObject->Release(); |
793 } | 804 } |
794 } | 805 } |
795 --i; | 806 --i; |
796 status = 0; | 807 state = ParserState::kDefault; |
797 break; | 808 break; |
798 } | 809 } |
799 break; | 810 break; |
800 case 7: | 811 |
| 812 case ParserState::kTrailer: |
801 if (inside_index == 7) { | 813 if (inside_index == 7) { |
802 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 814 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
803 last_trailer = pos + i - 7; | 815 last_trailer = pos + i - 7; |
804 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); | 816 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); |
805 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); | 817 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); |
806 if (pObj) { | 818 if (pObj) { |
807 if (!pObj->IsDictionary() && !pObj->AsStream()) { | 819 if (!pObj->IsDictionary() && !pObj->AsStream()) { |
808 pObj->Release(); | 820 pObj->Release(); |
809 } else { | 821 } else { |
810 CPDF_Stream* pStream = pObj->AsStream(); | 822 CPDF_Stream* pStream = pObj->AsStream(); |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
853 } | 865 } |
854 m_Syntax.RestorePos(dwSavePos); | 866 m_Syntax.RestorePos(dwSavePos); |
855 } | 867 } |
856 } else { | 868 } else { |
857 pObj->Release(); | 869 pObj->Release(); |
858 } | 870 } |
859 } | 871 } |
860 } | 872 } |
861 } | 873 } |
862 --i; | 874 --i; |
863 status = 0; | 875 state = ParserState::kDefault; |
864 } else if (byte == "trailer"[inside_index]) { | 876 } else if (byte == "trailer"[inside_index]) { |
865 inside_index++; | 877 inside_index++; |
866 } else { | 878 } else { |
867 --i; | 879 --i; |
868 status = 0; | 880 state = ParserState::kDefault; |
869 } | 881 } |
870 break; | 882 break; |
871 case 8: | 883 |
| 884 case ParserState::kXref: |
872 if (inside_index == 4) { | 885 if (inside_index == 4) { |
873 last_xref = pos + i - 4; | 886 last_xref = pos + i - 4; |
874 status = 1; | 887 state = ParserState::kWhitespace; |
875 } else if (byte == "xref"[inside_index]) { | 888 } else if (byte == "xref"[inside_index]) { |
876 inside_index++; | 889 inside_index++; |
877 } else { | 890 } else { |
878 --i; | 891 --i; |
879 status = 0; | 892 state = ParserState::kDefault; |
880 } | 893 } |
881 break; | 894 break; |
882 case 9: | 895 |
| 896 case ParserState::kComment: |
883 if (byte == '\r' || byte == '\n') { | 897 if (byte == '\r' || byte == '\n') { |
884 status = 0; | 898 state = ParserState::kDefault; |
885 } | 899 } |
886 break; | 900 break; |
887 case 10: | 901 |
| 902 case ParserState::kString: |
888 if (byte == ')') { | 903 if (byte == ')') { |
889 if (depth > 0) { | 904 if (depth > 0) { |
890 depth--; | 905 depth--; |
891 } | 906 } |
892 } else if (byte == '(') { | 907 } else if (byte == '(') { |
893 depth++; | 908 depth++; |
894 } | 909 } |
895 if (!depth) { | 910 if (!depth) { |
896 status = 0; | 911 state = ParserState::kDefault; |
897 } | 912 } |
898 break; | 913 break; |
899 case 11: | 914 |
| 915 case ParserState::kHexString: |
900 if (byte == '>' || (byte == '<' && inside_index == 1)) | 916 if (byte == '>' || (byte == '<' && inside_index == 1)) |
901 status = 0; | 917 state = ParserState::kDefault; |
902 inside_index = 0; | 918 inside_index = 0; |
903 break; | 919 break; |
904 case 13: | 920 |
| 921 case ParserState::kEscapedString: |
905 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | 922 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { |
906 --i; | 923 --i; |
907 status = 0; | 924 state = ParserState::kDefault; |
908 } | 925 } |
909 break; | 926 break; |
910 case 14: | 927 |
| 928 case ParserState::kEndObj: |
911 if (PDFCharIsWhitespace(byte)) { | 929 if (PDFCharIsWhitespace(byte)) { |
912 status = 0; | 930 state = ParserState::kDefault; |
913 } else if (byte == '%' || byte == '(' || byte == '<' || | 931 } else if (byte == '%' || byte == '(' || byte == '<' || |
914 byte == '\\') { | 932 byte == '\\') { |
915 status = 0; | 933 state = ParserState::kDefault; |
916 --i; | 934 --i; |
917 } else if (inside_index == 6) { | 935 } else if (inside_index == 6) { |
918 status = 0; | 936 state = ParserState::kDefault; |
919 --i; | 937 --i; |
920 } else if (byte == "endobj"[inside_index]) { | 938 } else if (byte == "endobj"[inside_index]) { |
921 inside_index++; | 939 inside_index++; |
922 } | 940 } |
923 break; | 941 break; |
924 } | 942 } |
925 if (bOverFlow) { | 943 if (bOverFlow) { |
926 size = 0; | 944 size = 0; |
927 break; | 945 break; |
928 } | 946 } |
(...skipping 3793 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4722 if (!m_pLinearizedDict) | 4740 if (!m_pLinearizedDict) |
4723 return -1; | 4741 return -1; |
4724 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); | 4742 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); |
4725 if (!pRange) | 4743 if (!pRange) |
4726 return -1; | 4744 return -1; |
4727 CPDF_Object* pStreamLen = pRange->GetElementValue(1); | 4745 CPDF_Object* pStreamLen = pRange->GetElementValue(1); |
4728 if (!pStreamLen) | 4746 if (!pStreamLen) |
4729 return -1; | 4747 return -1; |
4730 return pStreamLen->GetInteger(); | 4748 return pStreamLen->GetInteger(); |
4731 } | 4749 } |
OLD | NEW |