| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/include/fpdfapi/fpdf_parser.h" | 7 #include "core/include/fpdfapi/fpdf_parser.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 575 return TRUE; | 575 return TRUE; |
| 576 } | 576 } |
| 577 | 577 |
| 578 FX_BOOL CPDF_Parser::RebuildCrossRef() { | 578 FX_BOOL CPDF_Parser::RebuildCrossRef() { |
| 579 m_ObjectInfo.clear(); | 579 m_ObjectInfo.clear(); |
| 580 m_SortedOffset.clear(); | 580 m_SortedOffset.clear(); |
| 581 if (m_pTrailer) { | 581 if (m_pTrailer) { |
| 582 m_pTrailer->Release(); | 582 m_pTrailer->Release(); |
| 583 m_pTrailer = NULL; | 583 m_pTrailer = NULL; |
| 584 } | 584 } |
| 585 int32_t status = 0; | 585 |
| 586 ParserState state = ParserState::kDefault; |
| 587 |
| 586 int32_t inside_index = 0; | 588 int32_t inside_index = 0; |
| 587 FX_DWORD objnum = 0; | 589 FX_DWORD objnum = 0; |
| 588 FX_DWORD gennum = 0; | 590 FX_DWORD gennum = 0; |
| 589 int32_t depth = 0; | 591 int32_t depth = 0; |
| 590 const FX_DWORD kBufferSize = 4096; | 592 const FX_DWORD kBufferSize = 4096; |
| 591 std::vector<uint8_t> buffer(kBufferSize); | 593 std::vector<uint8_t> buffer(kBufferSize); |
| 592 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; | 594 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; |
| 593 FX_FILESIZE start_pos = 0; | 595 FX_FILESIZE start_pos = 0; |
| 594 FX_FILESIZE start_pos1 = 0; | 596 FX_FILESIZE start_pos1 = 0; |
| 595 FX_FILESIZE last_obj = -1; | 597 FX_FILESIZE last_obj = -1; |
| 596 FX_FILESIZE last_xref = -1; | 598 FX_FILESIZE last_xref = -1; |
| 597 FX_FILESIZE last_trailer = -1; | 599 FX_FILESIZE last_trailer = -1; |
| 598 while (pos < m_Syntax.m_FileLen) { | 600 while (pos < m_Syntax.m_FileLen) { |
| 599 const FX_FILESIZE saved_pos = pos; | 601 const FX_FILESIZE saved_pos = pos; |
| 600 bool bOverFlow = false; | 602 bool bOverFlow = false; |
| 601 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); | 603 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); |
| 602 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | 604 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) |
| 603 break; | 605 break; |
| 604 | 606 |
| 605 for (FX_DWORD i = 0; i < size; i++) { | 607 for (FX_DWORD i = 0; i < size; i++) { |
| 606 uint8_t byte = buffer[i]; | 608 uint8_t byte = buffer[i]; |
| 607 switch (status) { | 609 switch (state) { |
| 608 case 0: | 610 case ParserState::kDefault: |
| 609 if (PDFCharIsWhitespace(byte)) | 611 if (PDFCharIsWhitespace(byte)) |
| 610 status = 1; | 612 state = ParserState::kWhitespace; |
| 611 | 613 |
| 612 if (std::isdigit(byte)) { | 614 if (std::isdigit(byte)) { |
| 613 --i; | 615 --i; |
| 614 status = 1; | 616 state = ParserState::kWhitespace; |
| 615 } | 617 } |
| 616 | 618 |
| 617 if (byte == '%') { | 619 if (byte == '%') { |
| 618 inside_index = 0; | 620 inside_index = 0; |
| 619 status = 9; | 621 state = ParserState::kComment; |
| 620 } | 622 } |
| 621 | 623 |
| 622 if (byte == '(') { | 624 if (byte == '(') { |
| 623 status = 10; | 625 state = ParserState::kString; |
| 624 depth = 1; | 626 depth = 1; |
| 625 } | 627 } |
| 626 | 628 |
| 627 if (byte == '<') { | 629 if (byte == '<') { |
| 628 inside_index = 1; | 630 inside_index = 1; |
| 629 status = 11; | 631 state = ParserState::kHexString; |
| 630 } | 632 } |
| 631 | 633 |
| 632 if (byte == '\\') | 634 if (byte == '\\') |
| 633 status = 13; | 635 state = ParserState::kEscapedString; |
| 634 | 636 |
| 635 if (byte == 't') { | 637 if (byte == 't') { |
| 636 status = 7; | 638 state = ParserState::kTrailer; |
| 637 inside_index = 1; | 639 inside_index = 1; |
| 638 } | 640 } |
| 639 break; | 641 break; |
| 640 case 1: | 642 |
| 643 case ParserState::kWhitespace: |
| 641 if (PDFCharIsWhitespace(byte)) { | 644 if (PDFCharIsWhitespace(byte)) { |
| 642 break; | 645 break; |
| 643 } else if (std::isdigit(byte)) { | 646 } else if (std::isdigit(byte)) { |
| 644 start_pos = pos + i; | 647 start_pos = pos + i; |
| 645 status = 2; | 648 state = ParserState::kObjNum; |
| 646 objnum = FXSYS_toDecimalDigit(byte); | 649 objnum = FXSYS_toDecimalDigit(byte); |
| 650 |
| 647 } else if (byte == 't') { | 651 } else if (byte == 't') { |
| 648 status = 7; | 652 state = ParserState::kTrailer; |
| 649 inside_index = 1; | 653 inside_index = 1; |
| 654 |
| 650 } else if (byte == 'x') { | 655 } else if (byte == 'x') { |
| 651 status = 8; | 656 state = ParserState::kXref; |
| 652 inside_index = 1; | 657 inside_index = 1; |
| 658 |
| 653 } else { | 659 } else { |
| 654 --i; | 660 --i; |
| 655 status = 0; | 661 state = ParserState::kDefault; |
| 656 } | 662 } |
| 657 break; | 663 break; |
| 658 case 2: | 664 |
| 665 case ParserState::kObjNum: |
| 659 if (std::isdigit(byte)) { | 666 if (std::isdigit(byte)) { |
| 660 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); | 667 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); |
| 661 break; | 668 break; |
| 662 } else if (PDFCharIsWhitespace(byte)) { | 669 } else if (PDFCharIsWhitespace(byte)) { |
| 663 status = 3; | 670 state = ParserState::kPostObjNum; |
| 664 } else { | 671 } else { |
| 665 --i; | 672 --i; |
| 666 status = 14; | 673 state = ParserState::kEndObj; |
| 667 inside_index = 0; | 674 inside_index = 0; |
| 668 } | 675 } |
| 669 break; | 676 break; |
| 670 case 3: | 677 |
| 678 case ParserState::kPostObjNum: |
| 671 if (std::isdigit(byte)) { | 679 if (std::isdigit(byte)) { |
| 672 start_pos1 = pos + i; | 680 start_pos1 = pos + i; |
| 673 status = 4; | 681 state = ParserState::kGenNum; |
| 674 gennum = FXSYS_toDecimalDigit(byte); | 682 gennum = FXSYS_toDecimalDigit(byte); |
| 675 } else if (PDFCharIsWhitespace(byte)) { | 683 } else if (PDFCharIsWhitespace(byte)) { |
| 676 break; | 684 break; |
| 677 } else if (byte == 't') { | 685 } else if (byte == 't') { |
| 678 status = 7; | 686 state = ParserState::kTrailer; |
| 679 inside_index = 1; | 687 inside_index = 1; |
| 680 } else { | 688 } else { |
| 681 --i; | 689 --i; |
| 682 status = 0; | 690 state = ParserState::kDefault; |
| 683 } | 691 } |
| 684 break; | 692 break; |
| 685 case 4: | 693 |
| 694 case ParserState::kGenNum: |
| 686 if (std::isdigit(byte)) { | 695 if (std::isdigit(byte)) { |
| 687 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); | 696 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); |
| 688 break; | 697 break; |
| 689 } else if (PDFCharIsWhitespace(byte)) { | 698 } else if (PDFCharIsWhitespace(byte)) { |
| 690 status = 5; | 699 state = ParserState::kPostGenNum; |
| 691 } else { | 700 } else { |
| 692 --i; | 701 --i; |
| 693 status = 0; | 702 state = ParserState::kDefault; |
| 694 } | 703 } |
| 695 break; | 704 break; |
| 696 case 5: | 705 |
| 706 case ParserState::kPostGenNum: |
| 697 if (byte == 'o') { | 707 if (byte == 'o') { |
| 698 status = 6; | 708 state = ParserState::kBeginObj; |
| 699 inside_index = 1; | 709 inside_index = 1; |
| 700 } else if (PDFCharIsWhitespace(byte)) { | 710 } else if (PDFCharIsWhitespace(byte)) { |
| 701 break; | 711 break; |
| 702 } else if (std::isdigit(byte)) { | 712 } else if (std::isdigit(byte)) { |
| 703 objnum = gennum; | 713 objnum = gennum; |
| 704 gennum = FXSYS_toDecimalDigit(byte); | 714 gennum = FXSYS_toDecimalDigit(byte); |
| 705 start_pos = start_pos1; | 715 start_pos = start_pos1; |
| 706 start_pos1 = pos + i; | 716 start_pos1 = pos + i; |
| 707 status = 4; | 717 state = ParserState::kGenNum; |
| 708 } else if (byte == 't') { | 718 } else if (byte == 't') { |
| 709 status = 7; | 719 state = ParserState::kTrailer; |
| 710 inside_index = 1; | 720 inside_index = 1; |
| 711 } else { | 721 } else { |
| 712 --i; | 722 --i; |
| 713 status = 0; | 723 state = ParserState::kDefault; |
| 714 } | 724 } |
| 715 break; | 725 break; |
| 716 case 6: | 726 |
| 727 case ParserState::kBeginObj: |
| 717 switch (inside_index) { | 728 switch (inside_index) { |
| 718 case 1: | 729 case 1: |
| 719 if (byte != 'b') { | 730 if (byte != 'b') { |
| 720 --i; | 731 --i; |
| 721 status = 0; | 732 state = ParserState::kDefault; |
| 722 } else { | 733 } else { |
| 723 inside_index++; | 734 inside_index++; |
| 724 } | 735 } |
| 725 break; | 736 break; |
| 726 case 2: | 737 case 2: |
| 727 if (byte != 'j') { | 738 if (byte != 'j') { |
| 728 --i; | 739 --i; |
| 729 status = 0; | 740 state = ParserState::kDefault; |
| 730 } else { | 741 } else { |
| 731 inside_index++; | 742 inside_index++; |
| 732 } | 743 } |
| 733 break; | 744 break; |
| 734 case 3: | 745 case 3: |
| 735 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 746 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
| 736 if (objnum > 0x1000000) { | 747 if (objnum > 0x1000000) { |
| 737 status = 0; | 748 state = ParserState::kDefault; |
| 738 break; | 749 break; |
| 739 } | 750 } |
| 740 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; | 751 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; |
| 741 m_SortedOffset.insert(obj_pos); | 752 m_SortedOffset.insert(obj_pos); |
| 742 last_obj = start_pos; | 753 last_obj = start_pos; |
| 743 FX_FILESIZE obj_end = 0; | 754 FX_FILESIZE obj_end = 0; |
| 744 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | 755 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( |
| 745 m_pDocument, obj_pos, objnum, &obj_end); | 756 m_pDocument, obj_pos, objnum, &obj_end); |
| 746 if (CPDF_Stream* pStream = ToStream(pObject)) { | 757 if (CPDF_Stream* pStream = ToStream(pObject)) { |
| 747 if (CPDF_Dictionary* pDict = pStream->GetDict()) { | 758 if (CPDF_Dictionary* pDict = pStream->GetDict()) { |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 786 } else { | 797 } else { |
| 787 m_ObjectInfo[objnum].pos = obj_pos; | 798 m_ObjectInfo[objnum].pos = obj_pos; |
| 788 m_ObjectInfo[objnum].type = 1; | 799 m_ObjectInfo[objnum].type = 1; |
| 789 m_ObjectInfo[objnum].gennum = gennum; | 800 m_ObjectInfo[objnum].gennum = gennum; |
| 790 } | 801 } |
| 791 if (pObject) { | 802 if (pObject) { |
| 792 pObject->Release(); | 803 pObject->Release(); |
| 793 } | 804 } |
| 794 } | 805 } |
| 795 --i; | 806 --i; |
| 796 status = 0; | 807 state = ParserState::kDefault; |
| 797 break; | 808 break; |
| 798 } | 809 } |
| 799 break; | 810 break; |
| 800 case 7: | 811 |
| 812 case ParserState::kTrailer: |
| 801 if (inside_index == 7) { | 813 if (inside_index == 7) { |
| 802 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 814 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
| 803 last_trailer = pos + i - 7; | 815 last_trailer = pos + i - 7; |
| 804 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); | 816 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); |
| 805 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); | 817 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); |
| 806 if (pObj) { | 818 if (pObj) { |
| 807 if (!pObj->IsDictionary() && !pObj->AsStream()) { | 819 if (!pObj->IsDictionary() && !pObj->AsStream()) { |
| 808 pObj->Release(); | 820 pObj->Release(); |
| 809 } else { | 821 } else { |
| 810 CPDF_Stream* pStream = pObj->AsStream(); | 822 CPDF_Stream* pStream = pObj->AsStream(); |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 853 } | 865 } |
| 854 m_Syntax.RestorePos(dwSavePos); | 866 m_Syntax.RestorePos(dwSavePos); |
| 855 } | 867 } |
| 856 } else { | 868 } else { |
| 857 pObj->Release(); | 869 pObj->Release(); |
| 858 } | 870 } |
| 859 } | 871 } |
| 860 } | 872 } |
| 861 } | 873 } |
| 862 --i; | 874 --i; |
| 863 status = 0; | 875 state = ParserState::kDefault; |
| 864 } else if (byte == "trailer"[inside_index]) { | 876 } else if (byte == "trailer"[inside_index]) { |
| 865 inside_index++; | 877 inside_index++; |
| 866 } else { | 878 } else { |
| 867 --i; | 879 --i; |
| 868 status = 0; | 880 state = ParserState::kDefault; |
| 869 } | 881 } |
| 870 break; | 882 break; |
| 871 case 8: | 883 |
| 884 case ParserState::kXref: |
| 872 if (inside_index == 4) { | 885 if (inside_index == 4) { |
| 873 last_xref = pos + i - 4; | 886 last_xref = pos + i - 4; |
| 874 status = 1; | 887 state = ParserState::kWhitespace; |
| 875 } else if (byte == "xref"[inside_index]) { | 888 } else if (byte == "xref"[inside_index]) { |
| 876 inside_index++; | 889 inside_index++; |
| 877 } else { | 890 } else { |
| 878 --i; | 891 --i; |
| 879 status = 0; | 892 state = ParserState::kDefault; |
| 880 } | 893 } |
| 881 break; | 894 break; |
| 882 case 9: | 895 |
| 896 case ParserState::kComment: |
| 883 if (byte == '\r' || byte == '\n') { | 897 if (byte == '\r' || byte == '\n') { |
| 884 status = 0; | 898 state = ParserState::kDefault; |
| 885 } | 899 } |
| 886 break; | 900 break; |
| 887 case 10: | 901 |
| 902 case ParserState::kString: |
| 888 if (byte == ')') { | 903 if (byte == ')') { |
| 889 if (depth > 0) { | 904 if (depth > 0) { |
| 890 depth--; | 905 depth--; |
| 891 } | 906 } |
| 892 } else if (byte == '(') { | 907 } else if (byte == '(') { |
| 893 depth++; | 908 depth++; |
| 894 } | 909 } |
| 895 if (!depth) { | 910 if (!depth) { |
| 896 status = 0; | 911 state = ParserState::kDefault; |
| 897 } | 912 } |
| 898 break; | 913 break; |
| 899 case 11: | 914 |
| 915 case ParserState::kHexString: |
| 900 if (byte == '>' || (byte == '<' && inside_index == 1)) | 916 if (byte == '>' || (byte == '<' && inside_index == 1)) |
| 901 status = 0; | 917 state = ParserState::kDefault; |
| 902 inside_index = 0; | 918 inside_index = 0; |
| 903 break; | 919 break; |
| 904 case 13: | 920 |
| 921 case ParserState::kEscapedString: |
| 905 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | 922 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { |
| 906 --i; | 923 --i; |
| 907 status = 0; | 924 state = ParserState::kDefault; |
| 908 } | 925 } |
| 909 break; | 926 break; |
| 910 case 14: | 927 |
| 928 case ParserState::kEndObj: |
| 911 if (PDFCharIsWhitespace(byte)) { | 929 if (PDFCharIsWhitespace(byte)) { |
| 912 status = 0; | 930 state = ParserState::kDefault; |
| 913 } else if (byte == '%' || byte == '(' || byte == '<' || | 931 } else if (byte == '%' || byte == '(' || byte == '<' || |
| 914 byte == '\\') { | 932 byte == '\\') { |
| 915 status = 0; | 933 state = ParserState::kDefault; |
| 916 --i; | 934 --i; |
| 917 } else if (inside_index == 6) { | 935 } else if (inside_index == 6) { |
| 918 status = 0; | 936 state = ParserState::kDefault; |
| 919 --i; | 937 --i; |
| 920 } else if (byte == "endobj"[inside_index]) { | 938 } else if (byte == "endobj"[inside_index]) { |
| 921 inside_index++; | 939 inside_index++; |
| 922 } | 940 } |
| 923 break; | 941 break; |
| 924 } | 942 } |
| 925 if (bOverFlow) { | 943 if (bOverFlow) { |
| 926 size = 0; | 944 size = 0; |
| 927 break; | 945 break; |
| 928 } | 946 } |
| (...skipping 3793 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4722 if (!m_pLinearizedDict) | 4740 if (!m_pLinearizedDict) |
| 4723 return -1; | 4741 return -1; |
| 4724 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); | 4742 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); |
| 4725 if (!pRange) | 4743 if (!pRange) |
| 4726 return -1; | 4744 return -1; |
| 4727 CPDF_Object* pStreamLen = pRange->GetElementValue(1); | 4745 CPDF_Object* pStreamLen = pRange->GetElementValue(1); |
| 4728 if (!pStreamLen) | 4746 if (!pStreamLen) |
| 4729 return -1; | 4747 return -1; |
| 4730 return pStreamLen->GetInteger(); | 4748 return pStreamLen->GetInteger(); |
| 4731 } | 4749 } |
| OLD | NEW |