Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(424)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1715483002: Name the states for fpdf_parser_parser. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/include/fpdfapi/fpdf_parser.h" 7 #include "core/include/fpdfapi/fpdf_parser.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
(...skipping 18 matching lines...) Expand all
29 // A limit on the maximum object number in the xref table. Theoretical limits 29 // A limit on the maximum object number in the xref table. Theoretical limits
30 // are higher, but this may be large enough in practice. 30 // are higher, but this may be large enough in practice.
31 const FX_DWORD kMaxObjectNumber = 1048576; 31 const FX_DWORD kMaxObjectNumber = 1048576;
32 32
33 struct SearchTagRecord { 33 struct SearchTagRecord {
34 const char* m_pTag; 34 const char* m_pTag;
35 FX_DWORD m_Len; 35 FX_DWORD m_Len;
36 FX_DWORD m_Offset; 36 FX_DWORD m_Offset;
37 }; 37 };
38 38
39 enum class ParserState {
Tom Sepez 2016/02/18 19:27:55 Can we make ParserState an enum nested privately i
dsinclair 2016/02/18 19:47:04 Done. Is there a benefit over having it in the an
Tom Sepez 2016/02/18 19:49:16 Yes, it tells the reader that this state is associ
dsinclair 2016/02/18 19:51:03 Ah, right. Too spoiled by one class per file. Wasn
40 kDefault,
41 kComment,
Tom Sepez 2016/02/18 19:27:55 Can we keep these in the same order as before so t
dsinclair 2016/02/18 19:47:04 I checked through the code, all uses of status are
Tom Sepez 2016/02/18 19:49:16 Acknowledged.
42 kWhitespace,
43
Tom Sepez 2016/02/18 19:27:55 nit: not sure we need blank lines here.
dsinclair 2016/02/18 19:47:04 Done. Was an attempt at grouping and readability.
44 kString,
45 kHexString,
46 kEscapedString,
47
48 kXref,
49 kObjNum,
50 kPostObjNum,
51 kGenNum,
52 kPostGenNum,
53
54 kTrailer,
55
56 kBeginObj,
57 kEndObj
58 };
59
39 int32_t GetHeaderOffset(IFX_FileRead* pFile) { 60 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
40 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); 61 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
41 const size_t kBufSize = 4; 62 const size_t kBufSize = 4;
42 uint8_t buf[kBufSize]; 63 uint8_t buf[kBufSize];
43 int32_t offset = 0; 64 int32_t offset = 0;
44 while (offset <= 1024) { 65 while (offset <= 1024) {
45 if (!pFile->ReadBlock(buf, offset, kBufSize)) 66 if (!pFile->ReadBlock(buf, offset, kBufSize))
46 return -1; 67 return -1;
47 68
48 if (*(FX_DWORD*)buf == tag) 69 if (*(FX_DWORD*)buf == tag)
(...skipping 526 matching lines...) Expand 10 before | Expand all | Expand 10 after
575 return TRUE; 596 return TRUE;
576 } 597 }
577 598
578 FX_BOOL CPDF_Parser::RebuildCrossRef() { 599 FX_BOOL CPDF_Parser::RebuildCrossRef() {
579 m_ObjectInfo.clear(); 600 m_ObjectInfo.clear();
580 m_SortedOffset.clear(); 601 m_SortedOffset.clear();
581 if (m_pTrailer) { 602 if (m_pTrailer) {
582 m_pTrailer->Release(); 603 m_pTrailer->Release();
583 m_pTrailer = NULL; 604 m_pTrailer = NULL;
584 } 605 }
585 int32_t status = 0; 606
607 enum ParserState status = ParserState::kDefault;
Tom Sepez 2016/02/18 19:27:55 nit: |enum| needed ? Also, can we rename this |sta
dsinclair 2016/02/18 19:47:04 Done.
608
586 int32_t inside_index = 0; 609 int32_t inside_index = 0;
587 FX_DWORD objnum = 0; 610 FX_DWORD objnum = 0;
588 FX_DWORD gennum = 0; 611 FX_DWORD gennum = 0;
589 int32_t depth = 0; 612 int32_t depth = 0;
590 const FX_DWORD kBufferSize = 4096; 613 const FX_DWORD kBufferSize = 4096;
591 std::vector<uint8_t> buffer(kBufferSize); 614 std::vector<uint8_t> buffer(kBufferSize);
592 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; 615 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
593 FX_FILESIZE start_pos = 0; 616 FX_FILESIZE start_pos = 0;
594 FX_FILESIZE start_pos1 = 0; 617 FX_FILESIZE start_pos1 = 0;
595 FX_FILESIZE last_obj = -1; 618 FX_FILESIZE last_obj = -1;
596 FX_FILESIZE last_xref = -1; 619 FX_FILESIZE last_xref = -1;
597 FX_FILESIZE last_trailer = -1; 620 FX_FILESIZE last_trailer = -1;
598 while (pos < m_Syntax.m_FileLen) { 621 while (pos < m_Syntax.m_FileLen) {
599 const FX_FILESIZE saved_pos = pos; 622 const FX_FILESIZE saved_pos = pos;
600 bool bOverFlow = false; 623 bool bOverFlow = false;
601 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); 624 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
602 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) 625 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
603 break; 626 break;
604 627
605 for (FX_DWORD i = 0; i < size; i++) { 628 for (FX_DWORD i = 0; i < size; i++) {
606 uint8_t byte = buffer[i]; 629 uint8_t byte = buffer[i];
607 switch (status) { 630 switch (status) {
608 case 0: 631 case ParserState::kDefault:
609 if (PDFCharIsWhitespace(byte)) 632 if (PDFCharIsWhitespace(byte))
610 status = 1; 633 status = ParserState::kWhitespace;
611 634
612 if (std::isdigit(byte)) { 635 if (std::isdigit(byte)) {
613 --i; 636 --i;
614 status = 1; 637 status = ParserState::kWhitespace;
615 } 638 }
616 639
617 if (byte == '%') { 640 if (byte == '%') {
618 inside_index = 0; 641 inside_index = 0;
619 status = 9; 642 status = ParserState::kComment;
620 } 643 }
621 644
622 if (byte == '(') { 645 if (byte == '(') {
623 status = 10; 646 status = ParserState::kString;
624 depth = 1; 647 depth = 1;
625 } 648 }
626 649
627 if (byte == '<') { 650 if (byte == '<') {
628 inside_index = 1; 651 inside_index = 1;
629 status = 11; 652 status = ParserState::kHexString;
630 } 653 }
631 654
632 if (byte == '\\') 655 if (byte == '\\')
633 status = 13; 656 status = ParserState::kEscapedString;
634 657
635 if (byte == 't') { 658 if (byte == 't') {
636 status = 7; 659 status = ParserState::kTrailer;
637 inside_index = 1; 660 inside_index = 1;
638 } 661 }
639 break; 662 break;
640 case 1: 663
664 case ParserState::kWhitespace:
641 if (PDFCharIsWhitespace(byte)) { 665 if (PDFCharIsWhitespace(byte)) {
642 break; 666 break;
643 } else if (std::isdigit(byte)) { 667 } else if (std::isdigit(byte)) {
644 start_pos = pos + i; 668 start_pos = pos + i;
645 status = 2; 669 status = ParserState::kObjNum;
646 objnum = FXSYS_toDecimalDigit(byte); 670 objnum = FXSYS_toDecimalDigit(byte);
671
647 } else if (byte == 't') { 672 } else if (byte == 't') {
648 status = 7; 673 status = ParserState::kTrailer;
649 inside_index = 1; 674 inside_index = 1;
675
650 } else if (byte == 'x') { 676 } else if (byte == 'x') {
651 status = 8; 677 status = ParserState::kXref;
652 inside_index = 1; 678 inside_index = 1;
679
653 } else { 680 } else {
654 --i; 681 --i;
655 status = 0; 682 status = ParserState::kDefault;
656 } 683 }
657 break; 684 break;
658 case 2: 685
686 case ParserState::kObjNum:
659 if (std::isdigit(byte)) { 687 if (std::isdigit(byte)) {
660 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); 688 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte);
661 break; 689 break;
662 } else if (PDFCharIsWhitespace(byte)) { 690 } else if (PDFCharIsWhitespace(byte)) {
663 status = 3; 691 status = ParserState::kPostObjNum;
664 } else { 692 } else {
665 --i; 693 --i;
666 status = 14; 694 status = ParserState::kEndObj;
667 inside_index = 0; 695 inside_index = 0;
668 } 696 }
669 break; 697 break;
670 case 3: 698
699 case ParserState::kPostObjNum:
671 if (std::isdigit(byte)) { 700 if (std::isdigit(byte)) {
672 start_pos1 = pos + i; 701 start_pos1 = pos + i;
673 status = 4; 702 status = ParserState::kGenNum;
674 gennum = FXSYS_toDecimalDigit(byte); 703 gennum = FXSYS_toDecimalDigit(byte);
675 } else if (PDFCharIsWhitespace(byte)) { 704 } else if (PDFCharIsWhitespace(byte)) {
676 break; 705 break;
677 } else if (byte == 't') { 706 } else if (byte == 't') {
678 status = 7; 707 status = ParserState::kTrailer;
679 inside_index = 1; 708 inside_index = 1;
680 } else { 709 } else {
681 --i; 710 --i;
682 status = 0; 711 status = ParserState::kDefault;
683 } 712 }
684 break; 713 break;
685 case 4: 714
715 case ParserState::kGenNum:
686 if (std::isdigit(byte)) { 716 if (std::isdigit(byte)) {
687 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); 717 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte);
688 break; 718 break;
689 } else if (PDFCharIsWhitespace(byte)) { 719 } else if (PDFCharIsWhitespace(byte)) {
690 status = 5; 720 status = ParserState::kPostGenNum;
691 } else { 721 } else {
692 --i; 722 --i;
693 status = 0; 723 status = ParserState::kDefault;
694 } 724 }
695 break; 725 break;
696 case 5: 726
727 case ParserState::kPostGenNum:
697 if (byte == 'o') { 728 if (byte == 'o') {
698 status = 6; 729 status = ParserState::kBeginObj;
699 inside_index = 1; 730 inside_index = 1;
700 } else if (PDFCharIsWhitespace(byte)) { 731 } else if (PDFCharIsWhitespace(byte)) {
701 break; 732 break;
702 } else if (std::isdigit(byte)) { 733 } else if (std::isdigit(byte)) {
703 objnum = gennum; 734 objnum = gennum;
704 gennum = FXSYS_toDecimalDigit(byte); 735 gennum = FXSYS_toDecimalDigit(byte);
705 start_pos = start_pos1; 736 start_pos = start_pos1;
706 start_pos1 = pos + i; 737 start_pos1 = pos + i;
707 status = 4; 738 status = ParserState::kGenNum;
708 } else if (byte == 't') { 739 } else if (byte == 't') {
709 status = 7; 740 status = ParserState::kTrailer;
710 inside_index = 1; 741 inside_index = 1;
711 } else { 742 } else {
712 --i; 743 --i;
713 status = 0; 744 status = ParserState::kDefault;
714 } 745 }
715 break; 746 break;
716 case 6: 747
748 case ParserState::kBeginObj:
717 switch (inside_index) { 749 switch (inside_index) {
718 case 1: 750 case 1:
719 if (byte != 'b') { 751 if (byte != 'b') {
720 --i; 752 --i;
721 status = 0; 753 status = ParserState::kDefault;
722 } else { 754 } else {
723 inside_index++; 755 inside_index++;
724 } 756 }
725 break; 757 break;
726 case 2: 758 case 2:
727 if (byte != 'j') { 759 if (byte != 'j') {
728 --i; 760 --i;
729 status = 0; 761 status = ParserState::kDefault;
730 } else { 762 } else {
731 inside_index++; 763 inside_index++;
732 } 764 }
733 break; 765 break;
734 case 3: 766 case 3:
735 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 767 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
736 if (objnum > 0x1000000) { 768 if (objnum > 0x1000000) {
737 status = 0; 769 status = ParserState::kDefault;
738 break; 770 break;
739 } 771 }
740 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; 772 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
741 m_SortedOffset.insert(obj_pos); 773 m_SortedOffset.insert(obj_pos);
742 last_obj = start_pos; 774 last_obj = start_pos;
743 FX_FILESIZE obj_end = 0; 775 FX_FILESIZE obj_end = 0;
744 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( 776 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
745 m_pDocument, obj_pos, objnum, &obj_end); 777 m_pDocument, obj_pos, objnum, &obj_end);
746 if (CPDF_Stream* pStream = ToStream(pObject)) { 778 if (CPDF_Stream* pStream = ToStream(pObject)) {
747 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 779 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
786 } else { 818 } else {
787 m_ObjectInfo[objnum].pos = obj_pos; 819 m_ObjectInfo[objnum].pos = obj_pos;
788 m_ObjectInfo[objnum].type = 1; 820 m_ObjectInfo[objnum].type = 1;
789 m_ObjectInfo[objnum].gennum = gennum; 821 m_ObjectInfo[objnum].gennum = gennum;
790 } 822 }
791 if (pObject) { 823 if (pObject) {
792 pObject->Release(); 824 pObject->Release();
793 } 825 }
794 } 826 }
795 --i; 827 --i;
796 status = 0; 828 status = ParserState::kDefault;
797 break; 829 break;
798 } 830 }
799 break; 831 break;
800 case 7: 832
833 case ParserState::kTrailer:
801 if (inside_index == 7) { 834 if (inside_index == 7) {
802 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 835 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
803 last_trailer = pos + i - 7; 836 last_trailer = pos + i - 7;
804 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); 837 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
805 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); 838 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true);
806 if (pObj) { 839 if (pObj) {
807 if (!pObj->IsDictionary() && !pObj->AsStream()) { 840 if (!pObj->IsDictionary() && !pObj->AsStream()) {
808 pObj->Release(); 841 pObj->Release();
809 } else { 842 } else {
810 CPDF_Stream* pStream = pObj->AsStream(); 843 CPDF_Stream* pStream = pObj->AsStream();
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
853 } 886 }
854 m_Syntax.RestorePos(dwSavePos); 887 m_Syntax.RestorePos(dwSavePos);
855 } 888 }
856 } else { 889 } else {
857 pObj->Release(); 890 pObj->Release();
858 } 891 }
859 } 892 }
860 } 893 }
861 } 894 }
862 --i; 895 --i;
863 status = 0; 896 status = ParserState::kDefault;
864 } else if (byte == "trailer"[inside_index]) { 897 } else if (byte == "trailer"[inside_index]) {
865 inside_index++; 898 inside_index++;
866 } else { 899 } else {
867 --i; 900 --i;
868 status = 0; 901 status = ParserState::kDefault;
869 } 902 }
870 break; 903 break;
871 case 8: 904
905 case ParserState::kXref:
872 if (inside_index == 4) { 906 if (inside_index == 4) {
873 last_xref = pos + i - 4; 907 last_xref = pos + i - 4;
874 status = 1; 908 status = ParserState::kWhitespace;
875 } else if (byte == "xref"[inside_index]) { 909 } else if (byte == "xref"[inside_index]) {
876 inside_index++; 910 inside_index++;
877 } else { 911 } else {
878 --i; 912 --i;
879 status = 0; 913 status = ParserState::kDefault;
880 } 914 }
881 break; 915 break;
882 case 9: 916
917 case ParserState::kComment:
883 if (byte == '\r' || byte == '\n') { 918 if (byte == '\r' || byte == '\n') {
884 status = 0; 919 status = ParserState::kDefault;
885 } 920 }
886 break; 921 break;
887 case 10: 922
923 case ParserState::kString:
888 if (byte == ')') { 924 if (byte == ')') {
889 if (depth > 0) { 925 if (depth > 0) {
890 depth--; 926 depth--;
891 } 927 }
892 } else if (byte == '(') { 928 } else if (byte == '(') {
893 depth++; 929 depth++;
894 } 930 }
895 if (!depth) { 931 if (!depth) {
896 status = 0; 932 status = ParserState::kDefault;
897 } 933 }
898 break; 934 break;
899 case 11: 935
936 case ParserState::kHexString:
900 if (byte == '>' || (byte == '<' && inside_index == 1)) 937 if (byte == '>' || (byte == '<' && inside_index == 1))
901 status = 0; 938 status = ParserState::kDefault;
902 inside_index = 0; 939 inside_index = 0;
903 break; 940 break;
904 case 13: 941
Tom Sepez 2016/02/18 19:27:55 Heh. No 12.
dsinclair 2016/02/18 19:47:04 Acknowledged.
942 case ParserState::kEscapedString:
905 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { 943 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
906 --i; 944 --i;
907 status = 0; 945 status = ParserState::kDefault;
908 } 946 }
909 break; 947 break;
910 case 14: 948
949 case ParserState::kEndObj:
911 if (PDFCharIsWhitespace(byte)) { 950 if (PDFCharIsWhitespace(byte)) {
912 status = 0; 951 status = ParserState::kDefault;
913 } else if (byte == '%' || byte == '(' || byte == '<' || 952 } else if (byte == '%' || byte == '(' || byte == '<' ||
914 byte == '\\') { 953 byte == '\\') {
915 status = 0; 954 status = ParserState::kDefault;
916 --i; 955 --i;
917 } else if (inside_index == 6) { 956 } else if (inside_index == 6) {
918 status = 0; 957 status = ParserState::kDefault;
919 --i; 958 --i;
920 } else if (byte == "endobj"[inside_index]) { 959 } else if (byte == "endobj"[inside_index]) {
921 inside_index++; 960 inside_index++;
922 } 961 }
923 break; 962 break;
924 } 963 }
925 if (bOverFlow) { 964 if (bOverFlow) {
926 size = 0; 965 size = 0;
927 break; 966 break;
928 } 967 }
(...skipping 3793 matching lines...) Expand 10 before | Expand all | Expand 10 after
4722 if (!m_pLinearizedDict) 4761 if (!m_pLinearizedDict)
4723 return -1; 4762 return -1;
4724 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); 4763 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");
4725 if (!pRange) 4764 if (!pRange)
4726 return -1; 4765 return -1;
4727 CPDF_Object* pStreamLen = pRange->GetElementValue(1); 4766 CPDF_Object* pStreamLen = pRange->GetElementValue(1);
4728 if (!pStreamLen) 4767 if (!pStreamLen)
4729 return -1; 4768 return -1;
4730 return pStreamLen->GetInteger(); 4769 return pStreamLen->GetInteger();
4731 } 4770 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698