Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1715483002: Name the states for fpdf_parser_parser. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Rebase to master Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/include/fpdfapi/fpdf_parser.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/include/fpdfapi/fpdf_parser.h" 7 #include "core/include/fpdfapi/fpdf_parser.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
(...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after
575 return TRUE; 575 return TRUE;
576 } 576 }
577 577
578 FX_BOOL CPDF_Parser::RebuildCrossRef() { 578 FX_BOOL CPDF_Parser::RebuildCrossRef() {
579 m_ObjectInfo.clear(); 579 m_ObjectInfo.clear();
580 m_SortedOffset.clear(); 580 m_SortedOffset.clear();
581 if (m_pTrailer) { 581 if (m_pTrailer) {
582 m_pTrailer->Release(); 582 m_pTrailer->Release();
583 m_pTrailer = NULL; 583 m_pTrailer = NULL;
584 } 584 }
585 int32_t status = 0; 585
586 ParserState state = ParserState::kDefault;
587
586 int32_t inside_index = 0; 588 int32_t inside_index = 0;
587 FX_DWORD objnum = 0; 589 FX_DWORD objnum = 0;
588 FX_DWORD gennum = 0; 590 FX_DWORD gennum = 0;
589 int32_t depth = 0; 591 int32_t depth = 0;
590 const FX_DWORD kBufferSize = 4096; 592 const FX_DWORD kBufferSize = 4096;
591 std::vector<uint8_t> buffer(kBufferSize); 593 std::vector<uint8_t> buffer(kBufferSize);
592 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; 594 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
593 FX_FILESIZE start_pos = 0; 595 FX_FILESIZE start_pos = 0;
594 FX_FILESIZE start_pos1 = 0; 596 FX_FILESIZE start_pos1 = 0;
595 FX_FILESIZE last_obj = -1; 597 FX_FILESIZE last_obj = -1;
596 FX_FILESIZE last_xref = -1; 598 FX_FILESIZE last_xref = -1;
597 FX_FILESIZE last_trailer = -1; 599 FX_FILESIZE last_trailer = -1;
598 while (pos < m_Syntax.m_FileLen) { 600 while (pos < m_Syntax.m_FileLen) {
599 const FX_FILESIZE saved_pos = pos; 601 const FX_FILESIZE saved_pos = pos;
600 bool bOverFlow = false; 602 bool bOverFlow = false;
601 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); 603 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
602 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) 604 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
603 break; 605 break;
604 606
605 for (FX_DWORD i = 0; i < size; i++) { 607 for (FX_DWORD i = 0; i < size; i++) {
606 uint8_t byte = buffer[i]; 608 uint8_t byte = buffer[i];
607 switch (status) { 609 switch (state) {
608 case 0: 610 case ParserState::kDefault:
609 if (PDFCharIsWhitespace(byte)) 611 if (PDFCharIsWhitespace(byte))
610 status = 1; 612 state = ParserState::kWhitespace;
611 613
612 if (std::isdigit(byte)) { 614 if (std::isdigit(byte)) {
613 --i; 615 --i;
614 status = 1; 616 state = ParserState::kWhitespace;
615 } 617 }
616 618
617 if (byte == '%') { 619 if (byte == '%') {
618 inside_index = 0; 620 inside_index = 0;
619 status = 9; 621 state = ParserState::kComment;
620 } 622 }
621 623
622 if (byte == '(') { 624 if (byte == '(') {
623 status = 10; 625 state = ParserState::kString;
624 depth = 1; 626 depth = 1;
625 } 627 }
626 628
627 if (byte == '<') { 629 if (byte == '<') {
628 inside_index = 1; 630 inside_index = 1;
629 status = 11; 631 state = ParserState::kHexString;
630 } 632 }
631 633
632 if (byte == '\\') 634 if (byte == '\\')
633 status = 13; 635 state = ParserState::kEscapedString;
634 636
635 if (byte == 't') { 637 if (byte == 't') {
636 status = 7; 638 state = ParserState::kTrailer;
637 inside_index = 1; 639 inside_index = 1;
638 } 640 }
639 break; 641 break;
640 case 1: 642
643 case ParserState::kWhitespace:
641 if (PDFCharIsWhitespace(byte)) { 644 if (PDFCharIsWhitespace(byte)) {
642 break; 645 break;
643 } else if (std::isdigit(byte)) { 646 } else if (std::isdigit(byte)) {
644 start_pos = pos + i; 647 start_pos = pos + i;
645 status = 2; 648 state = ParserState::kObjNum;
646 objnum = FXSYS_toDecimalDigit(byte); 649 objnum = FXSYS_toDecimalDigit(byte);
650
647 } else if (byte == 't') { 651 } else if (byte == 't') {
648 status = 7; 652 state = ParserState::kTrailer;
649 inside_index = 1; 653 inside_index = 1;
654
650 } else if (byte == 'x') { 655 } else if (byte == 'x') {
651 status = 8; 656 state = ParserState::kXref;
652 inside_index = 1; 657 inside_index = 1;
658
653 } else { 659 } else {
654 --i; 660 --i;
655 status = 0; 661 state = ParserState::kDefault;
656 } 662 }
657 break; 663 break;
658 case 2: 664
665 case ParserState::kObjNum:
659 if (std::isdigit(byte)) { 666 if (std::isdigit(byte)) {
660 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); 667 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte);
661 break; 668 break;
662 } else if (PDFCharIsWhitespace(byte)) { 669 } else if (PDFCharIsWhitespace(byte)) {
663 status = 3; 670 state = ParserState::kPostObjNum;
664 } else { 671 } else {
665 --i; 672 --i;
666 status = 14; 673 state = ParserState::kEndObj;
667 inside_index = 0; 674 inside_index = 0;
668 } 675 }
669 break; 676 break;
670 case 3: 677
678 case ParserState::kPostObjNum:
671 if (std::isdigit(byte)) { 679 if (std::isdigit(byte)) {
672 start_pos1 = pos + i; 680 start_pos1 = pos + i;
673 status = 4; 681 state = ParserState::kGenNum;
674 gennum = FXSYS_toDecimalDigit(byte); 682 gennum = FXSYS_toDecimalDigit(byte);
675 } else if (PDFCharIsWhitespace(byte)) { 683 } else if (PDFCharIsWhitespace(byte)) {
676 break; 684 break;
677 } else if (byte == 't') { 685 } else if (byte == 't') {
678 status = 7; 686 state = ParserState::kTrailer;
679 inside_index = 1; 687 inside_index = 1;
680 } else { 688 } else {
681 --i; 689 --i;
682 status = 0; 690 state = ParserState::kDefault;
683 } 691 }
684 break; 692 break;
685 case 4: 693
694 case ParserState::kGenNum:
686 if (std::isdigit(byte)) { 695 if (std::isdigit(byte)) {
687 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); 696 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte);
688 break; 697 break;
689 } else if (PDFCharIsWhitespace(byte)) { 698 } else if (PDFCharIsWhitespace(byte)) {
690 status = 5; 699 state = ParserState::kPostGenNum;
691 } else { 700 } else {
692 --i; 701 --i;
693 status = 0; 702 state = ParserState::kDefault;
694 } 703 }
695 break; 704 break;
696 case 5: 705
706 case ParserState::kPostGenNum:
697 if (byte == 'o') { 707 if (byte == 'o') {
698 status = 6; 708 state = ParserState::kBeginObj;
699 inside_index = 1; 709 inside_index = 1;
700 } else if (PDFCharIsWhitespace(byte)) { 710 } else if (PDFCharIsWhitespace(byte)) {
701 break; 711 break;
702 } else if (std::isdigit(byte)) { 712 } else if (std::isdigit(byte)) {
703 objnum = gennum; 713 objnum = gennum;
704 gennum = FXSYS_toDecimalDigit(byte); 714 gennum = FXSYS_toDecimalDigit(byte);
705 start_pos = start_pos1; 715 start_pos = start_pos1;
706 start_pos1 = pos + i; 716 start_pos1 = pos + i;
707 status = 4; 717 state = ParserState::kGenNum;
708 } else if (byte == 't') { 718 } else if (byte == 't') {
709 status = 7; 719 state = ParserState::kTrailer;
710 inside_index = 1; 720 inside_index = 1;
711 } else { 721 } else {
712 --i; 722 --i;
713 status = 0; 723 state = ParserState::kDefault;
714 } 724 }
715 break; 725 break;
716 case 6: 726
727 case ParserState::kBeginObj:
717 switch (inside_index) { 728 switch (inside_index) {
718 case 1: 729 case 1:
719 if (byte != 'b') { 730 if (byte != 'b') {
720 --i; 731 --i;
721 status = 0; 732 state = ParserState::kDefault;
722 } else { 733 } else {
723 inside_index++; 734 inside_index++;
724 } 735 }
725 break; 736 break;
726 case 2: 737 case 2:
727 if (byte != 'j') { 738 if (byte != 'j') {
728 --i; 739 --i;
729 status = 0; 740 state = ParserState::kDefault;
730 } else { 741 } else {
731 inside_index++; 742 inside_index++;
732 } 743 }
733 break; 744 break;
734 case 3: 745 case 3:
735 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 746 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
736 if (objnum > 0x1000000) { 747 if (objnum > 0x1000000) {
737 status = 0; 748 state = ParserState::kDefault;
738 break; 749 break;
739 } 750 }
740 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; 751 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
741 m_SortedOffset.insert(obj_pos); 752 m_SortedOffset.insert(obj_pos);
742 last_obj = start_pos; 753 last_obj = start_pos;
743 FX_FILESIZE obj_end = 0; 754 FX_FILESIZE obj_end = 0;
744 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( 755 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
745 m_pDocument, obj_pos, objnum, &obj_end); 756 m_pDocument, obj_pos, objnum, &obj_end);
746 if (CPDF_Stream* pStream = ToStream(pObject)) { 757 if (CPDF_Stream* pStream = ToStream(pObject)) {
747 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 758 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
786 } else { 797 } else {
787 m_ObjectInfo[objnum].pos = obj_pos; 798 m_ObjectInfo[objnum].pos = obj_pos;
788 m_ObjectInfo[objnum].type = 1; 799 m_ObjectInfo[objnum].type = 1;
789 m_ObjectInfo[objnum].gennum = gennum; 800 m_ObjectInfo[objnum].gennum = gennum;
790 } 801 }
791 if (pObject) { 802 if (pObject) {
792 pObject->Release(); 803 pObject->Release();
793 } 804 }
794 } 805 }
795 --i; 806 --i;
796 status = 0; 807 state = ParserState::kDefault;
797 break; 808 break;
798 } 809 }
799 break; 810 break;
800 case 7: 811
812 case ParserState::kTrailer:
801 if (inside_index == 7) { 813 if (inside_index == 7) {
802 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 814 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
803 last_trailer = pos + i - 7; 815 last_trailer = pos + i - 7;
804 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); 816 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
805 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); 817 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true);
806 if (pObj) { 818 if (pObj) {
807 if (!pObj->IsDictionary() && !pObj->AsStream()) { 819 if (!pObj->IsDictionary() && !pObj->AsStream()) {
808 pObj->Release(); 820 pObj->Release();
809 } else { 821 } else {
810 CPDF_Stream* pStream = pObj->AsStream(); 822 CPDF_Stream* pStream = pObj->AsStream();
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
853 } 865 }
854 m_Syntax.RestorePos(dwSavePos); 866 m_Syntax.RestorePos(dwSavePos);
855 } 867 }
856 } else { 868 } else {
857 pObj->Release(); 869 pObj->Release();
858 } 870 }
859 } 871 }
860 } 872 }
861 } 873 }
862 --i; 874 --i;
863 status = 0; 875 state = ParserState::kDefault;
864 } else if (byte == "trailer"[inside_index]) { 876 } else if (byte == "trailer"[inside_index]) {
865 inside_index++; 877 inside_index++;
866 } else { 878 } else {
867 --i; 879 --i;
868 status = 0; 880 state = ParserState::kDefault;
869 } 881 }
870 break; 882 break;
871 case 8: 883
884 case ParserState::kXref:
872 if (inside_index == 4) { 885 if (inside_index == 4) {
873 last_xref = pos + i - 4; 886 last_xref = pos + i - 4;
874 status = 1; 887 state = ParserState::kWhitespace;
875 } else if (byte == "xref"[inside_index]) { 888 } else if (byte == "xref"[inside_index]) {
876 inside_index++; 889 inside_index++;
877 } else { 890 } else {
878 --i; 891 --i;
879 status = 0; 892 state = ParserState::kDefault;
880 } 893 }
881 break; 894 break;
882 case 9: 895
896 case ParserState::kComment:
883 if (byte == '\r' || byte == '\n') { 897 if (byte == '\r' || byte == '\n') {
884 status = 0; 898 state = ParserState::kDefault;
885 } 899 }
886 break; 900 break;
887 case 10: 901
902 case ParserState::kString:
888 if (byte == ')') { 903 if (byte == ')') {
889 if (depth > 0) { 904 if (depth > 0) {
890 depth--; 905 depth--;
891 } 906 }
892 } else if (byte == '(') { 907 } else if (byte == '(') {
893 depth++; 908 depth++;
894 } 909 }
895 if (!depth) { 910 if (!depth) {
896 status = 0; 911 state = ParserState::kDefault;
897 } 912 }
898 break; 913 break;
899 case 11: 914
915 case ParserState::kHexString:
900 if (byte == '>' || (byte == '<' && inside_index == 1)) 916 if (byte == '>' || (byte == '<' && inside_index == 1))
901 status = 0; 917 state = ParserState::kDefault;
902 inside_index = 0; 918 inside_index = 0;
903 break; 919 break;
904 case 13: 920
921 case ParserState::kEscapedString:
905 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { 922 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
906 --i; 923 --i;
907 status = 0; 924 state = ParserState::kDefault;
908 } 925 }
909 break; 926 break;
910 case 14: 927
928 case ParserState::kEndObj:
911 if (PDFCharIsWhitespace(byte)) { 929 if (PDFCharIsWhitespace(byte)) {
912 status = 0; 930 state = ParserState::kDefault;
913 } else if (byte == '%' || byte == '(' || byte == '<' || 931 } else if (byte == '%' || byte == '(' || byte == '<' ||
914 byte == '\\') { 932 byte == '\\') {
915 status = 0; 933 state = ParserState::kDefault;
916 --i; 934 --i;
917 } else if (inside_index == 6) { 935 } else if (inside_index == 6) {
918 status = 0; 936 state = ParserState::kDefault;
919 --i; 937 --i;
920 } else if (byte == "endobj"[inside_index]) { 938 } else if (byte == "endobj"[inside_index]) {
921 inside_index++; 939 inside_index++;
922 } 940 }
923 break; 941 break;
924 } 942 }
925 if (bOverFlow) { 943 if (bOverFlow) {
926 size = 0; 944 size = 0;
927 break; 945 break;
928 } 946 }
(...skipping 3793 matching lines...) Expand 10 before | Expand all | Expand 10 after
4722 if (!m_pLinearizedDict) 4740 if (!m_pLinearizedDict)
4723 return -1; 4741 return -1;
4724 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); 4742 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");
4725 if (!pRange) 4743 if (!pRange)
4726 return -1; 4744 return -1;
4727 CPDF_Object* pStreamLen = pRange->GetElementValue(1); 4745 CPDF_Object* pStreamLen = pRange->GetElementValue(1);
4728 if (!pStreamLen) 4746 if (!pStreamLen)
4729 return -1; 4747 return -1;
4730 return pStreamLen->GetInteger(); 4748 return pStreamLen->GetInteger();
4731 } 4749 }
OLDNEW
« no previous file with comments | « core/include/fpdfapi/fpdf_parser.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698