| OLD | NEW | 
|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 
| 6 | 6 | 
| 7 #include "core/include/fpdfapi/fpdf_parser.h" | 7 #include "core/include/fpdfapi/fpdf_parser.h" | 
| 8 | 8 | 
| 9 #include <algorithm> | 9 #include <algorithm> | 
| 10 #include <memory> | 10 #include <memory> | 
| (...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 575   return TRUE; | 575   return TRUE; | 
| 576 } | 576 } | 
| 577 | 577 | 
| 578 FX_BOOL CPDF_Parser::RebuildCrossRef() { | 578 FX_BOOL CPDF_Parser::RebuildCrossRef() { | 
| 579   m_ObjectInfo.clear(); | 579   m_ObjectInfo.clear(); | 
| 580   m_SortedOffset.clear(); | 580   m_SortedOffset.clear(); | 
| 581   if (m_pTrailer) { | 581   if (m_pTrailer) { | 
| 582     m_pTrailer->Release(); | 582     m_pTrailer->Release(); | 
| 583     m_pTrailer = NULL; | 583     m_pTrailer = NULL; | 
| 584   } | 584   } | 
| 585   int32_t status = 0; | 585 | 
|  | 586   ParserState state = ParserState::kDefault; | 
|  | 587 | 
| 586   int32_t inside_index = 0; | 588   int32_t inside_index = 0; | 
| 587   FX_DWORD objnum = 0; | 589   FX_DWORD objnum = 0; | 
| 588   FX_DWORD gennum = 0; | 590   FX_DWORD gennum = 0; | 
| 589   int32_t depth = 0; | 591   int32_t depth = 0; | 
| 590   const FX_DWORD kBufferSize = 4096; | 592   const FX_DWORD kBufferSize = 4096; | 
| 591   std::vector<uint8_t> buffer(kBufferSize); | 593   std::vector<uint8_t> buffer(kBufferSize); | 
| 592   FX_FILESIZE pos = m_Syntax.m_HeaderOffset; | 594   FX_FILESIZE pos = m_Syntax.m_HeaderOffset; | 
| 593   FX_FILESIZE start_pos = 0; | 595   FX_FILESIZE start_pos = 0; | 
| 594   FX_FILESIZE start_pos1 = 0; | 596   FX_FILESIZE start_pos1 = 0; | 
| 595   FX_FILESIZE last_obj = -1; | 597   FX_FILESIZE last_obj = -1; | 
| 596   FX_FILESIZE last_xref = -1; | 598   FX_FILESIZE last_xref = -1; | 
| 597   FX_FILESIZE last_trailer = -1; | 599   FX_FILESIZE last_trailer = -1; | 
| 598   while (pos < m_Syntax.m_FileLen) { | 600   while (pos < m_Syntax.m_FileLen) { | 
| 599     const FX_FILESIZE saved_pos = pos; | 601     const FX_FILESIZE saved_pos = pos; | 
| 600     bool bOverFlow = false; | 602     bool bOverFlow = false; | 
| 601     FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); | 603     FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); | 
| 602     if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | 604     if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | 
| 603       break; | 605       break; | 
| 604 | 606 | 
| 605     for (FX_DWORD i = 0; i < size; i++) { | 607     for (FX_DWORD i = 0; i < size; i++) { | 
| 606       uint8_t byte = buffer[i]; | 608       uint8_t byte = buffer[i]; | 
| 607       switch (status) { | 609       switch (state) { | 
| 608         case 0: | 610         case ParserState::kDefault: | 
| 609           if (PDFCharIsWhitespace(byte)) | 611           if (PDFCharIsWhitespace(byte)) | 
| 610             status = 1; | 612             state = ParserState::kWhitespace; | 
| 611 | 613 | 
| 612           if (std::isdigit(byte)) { | 614           if (std::isdigit(byte)) { | 
| 613             --i; | 615             --i; | 
| 614             status = 1; | 616             state = ParserState::kWhitespace; | 
| 615           } | 617           } | 
| 616 | 618 | 
| 617           if (byte == '%') { | 619           if (byte == '%') { | 
| 618             inside_index = 0; | 620             inside_index = 0; | 
| 619             status = 9; | 621             state = ParserState::kComment; | 
| 620           } | 622           } | 
| 621 | 623 | 
| 622           if (byte == '(') { | 624           if (byte == '(') { | 
| 623             status = 10; | 625             state = ParserState::kString; | 
| 624             depth = 1; | 626             depth = 1; | 
| 625           } | 627           } | 
| 626 | 628 | 
| 627           if (byte == '<') { | 629           if (byte == '<') { | 
| 628             inside_index = 1; | 630             inside_index = 1; | 
| 629             status = 11; | 631             state = ParserState::kHexString; | 
| 630           } | 632           } | 
| 631 | 633 | 
| 632           if (byte == '\\') | 634           if (byte == '\\') | 
| 633             status = 13; | 635             state = ParserState::kEscapedString; | 
| 634 | 636 | 
| 635           if (byte == 't') { | 637           if (byte == 't') { | 
| 636             status = 7; | 638             state = ParserState::kTrailer; | 
| 637             inside_index = 1; | 639             inside_index = 1; | 
| 638           } | 640           } | 
| 639           break; | 641           break; | 
| 640         case 1: | 642 | 
|  | 643         case ParserState::kWhitespace: | 
| 641           if (PDFCharIsWhitespace(byte)) { | 644           if (PDFCharIsWhitespace(byte)) { | 
| 642             break; | 645             break; | 
| 643           } else if (std::isdigit(byte)) { | 646           } else if (std::isdigit(byte)) { | 
| 644             start_pos = pos + i; | 647             start_pos = pos + i; | 
| 645             status = 2; | 648             state = ParserState::kObjNum; | 
| 646             objnum = FXSYS_toDecimalDigit(byte); | 649             objnum = FXSYS_toDecimalDigit(byte); | 
|  | 650 | 
| 647           } else if (byte == 't') { | 651           } else if (byte == 't') { | 
| 648             status = 7; | 652             state = ParserState::kTrailer; | 
| 649             inside_index = 1; | 653             inside_index = 1; | 
|  | 654 | 
| 650           } else if (byte == 'x') { | 655           } else if (byte == 'x') { | 
| 651             status = 8; | 656             state = ParserState::kXref; | 
| 652             inside_index = 1; | 657             inside_index = 1; | 
|  | 658 | 
| 653           } else { | 659           } else { | 
| 654             --i; | 660             --i; | 
| 655             status = 0; | 661             state = ParserState::kDefault; | 
| 656           } | 662           } | 
| 657           break; | 663           break; | 
| 658         case 2: | 664 | 
|  | 665         case ParserState::kObjNum: | 
| 659           if (std::isdigit(byte)) { | 666           if (std::isdigit(byte)) { | 
| 660             objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); | 667             objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); | 
| 661             break; | 668             break; | 
| 662           } else if (PDFCharIsWhitespace(byte)) { | 669           } else if (PDFCharIsWhitespace(byte)) { | 
| 663             status = 3; | 670             state = ParserState::kPostObjNum; | 
| 664           } else { | 671           } else { | 
| 665             --i; | 672             --i; | 
| 666             status = 14; | 673             state = ParserState::kEndObj; | 
| 667             inside_index = 0; | 674             inside_index = 0; | 
| 668           } | 675           } | 
| 669           break; | 676           break; | 
| 670         case 3: | 677 | 
|  | 678         case ParserState::kPostObjNum: | 
| 671           if (std::isdigit(byte)) { | 679           if (std::isdigit(byte)) { | 
| 672             start_pos1 = pos + i; | 680             start_pos1 = pos + i; | 
| 673             status = 4; | 681             state = ParserState::kGenNum; | 
| 674             gennum = FXSYS_toDecimalDigit(byte); | 682             gennum = FXSYS_toDecimalDigit(byte); | 
| 675           } else if (PDFCharIsWhitespace(byte)) { | 683           } else if (PDFCharIsWhitespace(byte)) { | 
| 676             break; | 684             break; | 
| 677           } else if (byte == 't') { | 685           } else if (byte == 't') { | 
| 678             status = 7; | 686             state = ParserState::kTrailer; | 
| 679             inside_index = 1; | 687             inside_index = 1; | 
| 680           } else { | 688           } else { | 
| 681             --i; | 689             --i; | 
| 682             status = 0; | 690             state = ParserState::kDefault; | 
| 683           } | 691           } | 
| 684           break; | 692           break; | 
| 685         case 4: | 693 | 
|  | 694         case ParserState::kGenNum: | 
| 686           if (std::isdigit(byte)) { | 695           if (std::isdigit(byte)) { | 
| 687             gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); | 696             gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); | 
| 688             break; | 697             break; | 
| 689           } else if (PDFCharIsWhitespace(byte)) { | 698           } else if (PDFCharIsWhitespace(byte)) { | 
| 690             status = 5; | 699             state = ParserState::kPostGenNum; | 
| 691           } else { | 700           } else { | 
| 692             --i; | 701             --i; | 
| 693             status = 0; | 702             state = ParserState::kDefault; | 
| 694           } | 703           } | 
| 695           break; | 704           break; | 
| 696         case 5: | 705 | 
|  | 706         case ParserState::kPostGenNum: | 
| 697           if (byte == 'o') { | 707           if (byte == 'o') { | 
| 698             status = 6; | 708             state = ParserState::kBeginObj; | 
| 699             inside_index = 1; | 709             inside_index = 1; | 
| 700           } else if (PDFCharIsWhitespace(byte)) { | 710           } else if (PDFCharIsWhitespace(byte)) { | 
| 701             break; | 711             break; | 
| 702           } else if (std::isdigit(byte)) { | 712           } else if (std::isdigit(byte)) { | 
| 703             objnum = gennum; | 713             objnum = gennum; | 
| 704             gennum = FXSYS_toDecimalDigit(byte); | 714             gennum = FXSYS_toDecimalDigit(byte); | 
| 705             start_pos = start_pos1; | 715             start_pos = start_pos1; | 
| 706             start_pos1 = pos + i; | 716             start_pos1 = pos + i; | 
| 707             status = 4; | 717             state = ParserState::kGenNum; | 
| 708           } else if (byte == 't') { | 718           } else if (byte == 't') { | 
| 709             status = 7; | 719             state = ParserState::kTrailer; | 
| 710             inside_index = 1; | 720             inside_index = 1; | 
| 711           } else { | 721           } else { | 
| 712             --i; | 722             --i; | 
| 713             status = 0; | 723             state = ParserState::kDefault; | 
| 714           } | 724           } | 
| 715           break; | 725           break; | 
| 716         case 6: | 726 | 
|  | 727         case ParserState::kBeginObj: | 
| 717           switch (inside_index) { | 728           switch (inside_index) { | 
| 718             case 1: | 729             case 1: | 
| 719               if (byte != 'b') { | 730               if (byte != 'b') { | 
| 720                 --i; | 731                 --i; | 
| 721                 status = 0; | 732                 state = ParserState::kDefault; | 
| 722               } else { | 733               } else { | 
| 723                 inside_index++; | 734                 inside_index++; | 
| 724               } | 735               } | 
| 725               break; | 736               break; | 
| 726             case 2: | 737             case 2: | 
| 727               if (byte != 'j') { | 738               if (byte != 'j') { | 
| 728                 --i; | 739                 --i; | 
| 729                 status = 0; | 740                 state = ParserState::kDefault; | 
| 730               } else { | 741               } else { | 
| 731                 inside_index++; | 742                 inside_index++; | 
| 732               } | 743               } | 
| 733               break; | 744               break; | 
| 734             case 3: | 745             case 3: | 
| 735               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 746               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 
| 736                 if (objnum > 0x1000000) { | 747                 if (objnum > 0x1000000) { | 
| 737                   status = 0; | 748                   state = ParserState::kDefault; | 
| 738                   break; | 749                   break; | 
| 739                 } | 750                 } | 
| 740                 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; | 751                 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; | 
| 741                 m_SortedOffset.insert(obj_pos); | 752                 m_SortedOffset.insert(obj_pos); | 
| 742                 last_obj = start_pos; | 753                 last_obj = start_pos; | 
| 743                 FX_FILESIZE obj_end = 0; | 754                 FX_FILESIZE obj_end = 0; | 
| 744                 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | 755                 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | 
| 745                     m_pDocument, obj_pos, objnum, &obj_end); | 756                     m_pDocument, obj_pos, objnum, &obj_end); | 
| 746                 if (CPDF_Stream* pStream = ToStream(pObject)) { | 757                 if (CPDF_Stream* pStream = ToStream(pObject)) { | 
| 747                   if (CPDF_Dictionary* pDict = pStream->GetDict()) { | 758                   if (CPDF_Dictionary* pDict = pStream->GetDict()) { | 
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 786                 } else { | 797                 } else { | 
| 787                   m_ObjectInfo[objnum].pos = obj_pos; | 798                   m_ObjectInfo[objnum].pos = obj_pos; | 
| 788                   m_ObjectInfo[objnum].type = 1; | 799                   m_ObjectInfo[objnum].type = 1; | 
| 789                   m_ObjectInfo[objnum].gennum = gennum; | 800                   m_ObjectInfo[objnum].gennum = gennum; | 
| 790                 } | 801                 } | 
| 791                 if (pObject) { | 802                 if (pObject) { | 
| 792                   pObject->Release(); | 803                   pObject->Release(); | 
| 793                 } | 804                 } | 
| 794               } | 805               } | 
| 795               --i; | 806               --i; | 
| 796               status = 0; | 807               state = ParserState::kDefault; | 
| 797               break; | 808               break; | 
| 798           } | 809           } | 
| 799           break; | 810           break; | 
| 800         case 7: | 811 | 
|  | 812         case ParserState::kTrailer: | 
| 801           if (inside_index == 7) { | 813           if (inside_index == 7) { | 
| 802             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 814             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | 
| 803               last_trailer = pos + i - 7; | 815               last_trailer = pos + i - 7; | 
| 804               m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); | 816               m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); | 
| 805               CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); | 817               CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); | 
| 806               if (pObj) { | 818               if (pObj) { | 
| 807                 if (!pObj->IsDictionary() && !pObj->AsStream()) { | 819                 if (!pObj->IsDictionary() && !pObj->AsStream()) { | 
| 808                   pObj->Release(); | 820                   pObj->Release(); | 
| 809                 } else { | 821                 } else { | 
| 810                   CPDF_Stream* pStream = pObj->AsStream(); | 822                   CPDF_Stream* pStream = pObj->AsStream(); | 
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 853                       } | 865                       } | 
| 854                       m_Syntax.RestorePos(dwSavePos); | 866                       m_Syntax.RestorePos(dwSavePos); | 
| 855                     } | 867                     } | 
| 856                   } else { | 868                   } else { | 
| 857                     pObj->Release(); | 869                     pObj->Release(); | 
| 858                   } | 870                   } | 
| 859                 } | 871                 } | 
| 860               } | 872               } | 
| 861             } | 873             } | 
| 862             --i; | 874             --i; | 
| 863             status = 0; | 875             state = ParserState::kDefault; | 
| 864           } else if (byte == "trailer"[inside_index]) { | 876           } else if (byte == "trailer"[inside_index]) { | 
| 865             inside_index++; | 877             inside_index++; | 
| 866           } else { | 878           } else { | 
| 867             --i; | 879             --i; | 
| 868             status = 0; | 880             state = ParserState::kDefault; | 
| 869           } | 881           } | 
| 870           break; | 882           break; | 
| 871         case 8: | 883 | 
|  | 884         case ParserState::kXref: | 
| 872           if (inside_index == 4) { | 885           if (inside_index == 4) { | 
| 873             last_xref = pos + i - 4; | 886             last_xref = pos + i - 4; | 
| 874             status = 1; | 887             state = ParserState::kWhitespace; | 
| 875           } else if (byte == "xref"[inside_index]) { | 888           } else if (byte == "xref"[inside_index]) { | 
| 876             inside_index++; | 889             inside_index++; | 
| 877           } else { | 890           } else { | 
| 878             --i; | 891             --i; | 
| 879             status = 0; | 892             state = ParserState::kDefault; | 
| 880           } | 893           } | 
| 881           break; | 894           break; | 
| 882         case 9: | 895 | 
|  | 896         case ParserState::kComment: | 
| 883           if (byte == '\r' || byte == '\n') { | 897           if (byte == '\r' || byte == '\n') { | 
| 884             status = 0; | 898             state = ParserState::kDefault; | 
| 885           } | 899           } | 
| 886           break; | 900           break; | 
| 887         case 10: | 901 | 
|  | 902         case ParserState::kString: | 
| 888           if (byte == ')') { | 903           if (byte == ')') { | 
| 889             if (depth > 0) { | 904             if (depth > 0) { | 
| 890               depth--; | 905               depth--; | 
| 891             } | 906             } | 
| 892           } else if (byte == '(') { | 907           } else if (byte == '(') { | 
| 893             depth++; | 908             depth++; | 
| 894           } | 909           } | 
| 895           if (!depth) { | 910           if (!depth) { | 
| 896             status = 0; | 911             state = ParserState::kDefault; | 
| 897           } | 912           } | 
| 898           break; | 913           break; | 
| 899         case 11: | 914 | 
|  | 915         case ParserState::kHexString: | 
| 900           if (byte == '>' || (byte == '<' && inside_index == 1)) | 916           if (byte == '>' || (byte == '<' && inside_index == 1)) | 
| 901             status = 0; | 917             state = ParserState::kDefault; | 
| 902           inside_index = 0; | 918           inside_index = 0; | 
| 903           break; | 919           break; | 
| 904         case 13: | 920 | 
|  | 921         case ParserState::kEscapedString: | 
| 905           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | 922           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | 
| 906             --i; | 923             --i; | 
| 907             status = 0; | 924             state = ParserState::kDefault; | 
| 908           } | 925           } | 
| 909           break; | 926           break; | 
| 910         case 14: | 927 | 
|  | 928         case ParserState::kEndObj: | 
| 911           if (PDFCharIsWhitespace(byte)) { | 929           if (PDFCharIsWhitespace(byte)) { | 
| 912             status = 0; | 930             state = ParserState::kDefault; | 
| 913           } else if (byte == '%' || byte == '(' || byte == '<' || | 931           } else if (byte == '%' || byte == '(' || byte == '<' || | 
| 914                      byte == '\\') { | 932                      byte == '\\') { | 
| 915             status = 0; | 933             state = ParserState::kDefault; | 
| 916             --i; | 934             --i; | 
| 917           } else if (inside_index == 6) { | 935           } else if (inside_index == 6) { | 
| 918             status = 0; | 936             state = ParserState::kDefault; | 
| 919             --i; | 937             --i; | 
| 920           } else if (byte == "endobj"[inside_index]) { | 938           } else if (byte == "endobj"[inside_index]) { | 
| 921             inside_index++; | 939             inside_index++; | 
| 922           } | 940           } | 
| 923           break; | 941           break; | 
| 924       } | 942       } | 
| 925       if (bOverFlow) { | 943       if (bOverFlow) { | 
| 926         size = 0; | 944         size = 0; | 
| 927         break; | 945         break; | 
| 928       } | 946       } | 
| (...skipping 3793 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 4722   if (!m_pLinearizedDict) | 4740   if (!m_pLinearizedDict) | 
| 4723     return -1; | 4741     return -1; | 
| 4724   CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); | 4742   CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); | 
| 4725   if (!pRange) | 4743   if (!pRange) | 
| 4726     return -1; | 4744     return -1; | 
| 4727   CPDF_Object* pStreamLen = pRange->GetElementValue(1); | 4745   CPDF_Object* pStreamLen = pRange->GetElementValue(1); | 
| 4728   if (!pStreamLen) | 4746   if (!pStreamLen) | 
| 4729     return -1; | 4747     return -1; | 
| 4730   return pStreamLen->GetInteger(); | 4748   return pStreamLen->GetInteger(); | 
| 4731 } | 4749 } | 
| OLD | NEW | 
|---|