| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_PARSER_H_ | |
| 8 #define CORE_FPDFAPI_FPDF_PARSER_CPDF_PARSER_H_ | |
| 9 | |
| 10 #include <map> | |
| 11 #include <memory> | |
| 12 #include <set> | |
| 13 | |
| 14 #include "core/fxcrt/fx_basic.h" | |
| 15 | |
| 16 class CPDF_Array; | |
| 17 class CPDF_CryptoHandler; | |
| 18 class CPDF_Dictionary; | |
| 19 class CPDF_Document; | |
| 20 class CPDF_IndirectObjectHolder; | |
| 21 class CPDF_Object; | |
| 22 class CPDF_SecurityHandler; | |
| 23 class CPDF_StreamAcc; | |
| 24 class CPDF_SyntaxParser; | |
| 25 class IFX_FileRead; | |
| 26 | |
| 27 class CPDF_Parser { | |
| 28 public: | |
| 29 enum Error { | |
| 30 SUCCESS = 0, | |
| 31 FILE_ERROR, | |
| 32 FORMAT_ERROR, | |
| 33 PASSWORD_ERROR, | |
| 34 HANDLER_ERROR | |
| 35 }; | |
| 36 | |
| 37 // A limit on the maximum object number in the xref table. Theoretical limits | |
| 38 // are higher, but this may be large enough in practice. | |
| 39 static const uint32_t kMaxObjectNumber = 1048576; | |
| 40 | |
| 41 CPDF_Parser(); | |
| 42 ~CPDF_Parser(); | |
| 43 | |
| 44 Error StartParse(IFX_FileRead* pFile, CPDF_Document* pDocument); | |
| 45 Error StartLinearizedParse(IFX_FileRead* pFile, CPDF_Document* pDocument); | |
| 46 | |
| 47 void SetPassword(const FX_CHAR* password) { m_Password = password; } | |
| 48 CFX_ByteString GetPassword() { return m_Password; } | |
| 49 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } | |
| 50 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } | |
| 51 | |
| 52 uint32_t GetPermissions() const; | |
| 53 uint32_t GetRootObjNum(); | |
| 54 uint32_t GetInfoObjNum(); | |
| 55 CPDF_Array* GetIDArray(); | |
| 56 | |
| 57 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } | |
| 58 | |
| 59 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, | |
| 60 uint32_t objnum); | |
| 61 | |
| 62 uint32_t GetLastObjNum() const; | |
| 63 bool IsValidObjectNumber(uint32_t objnum) const; | |
| 64 FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; | |
| 65 uint8_t GetObjectType(uint32_t objnum) const; | |
| 66 uint16_t GetObjectGenNum(uint32_t objnum) const; | |
| 67 bool IsVersionUpdated() const { return m_bVersionUpdated; } | |
| 68 bool IsObjectFreeOrNull(uint32_t objnum) const; | |
| 69 CPDF_CryptoHandler* GetCryptoHandler(); | |
| 70 IFX_FileRead* GetFileAccess() const; | |
| 71 | |
| 72 FX_FILESIZE GetObjectOffset(uint32_t objnum) const; | |
| 73 FX_FILESIZE GetObjectSize(uint32_t objnum) const; | |
| 74 | |
| 75 void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); | |
| 76 int GetFileVersion() const { return m_FileVersion; } | |
| 77 FX_BOOL IsXRefStream() const { return m_bXRefStream; } | |
| 78 | |
| 79 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, | |
| 80 FX_FILESIZE pos, | |
| 81 uint32_t objnum); | |
| 82 | |
| 83 CPDF_Object* ParseIndirectObjectAtByStrict( | |
| 84 CPDF_IndirectObjectHolder* pObjList, | |
| 85 FX_FILESIZE pos, | |
| 86 uint32_t objnum, | |
| 87 FX_FILESIZE* pResultPos); | |
| 88 | |
| 89 uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } | |
| 90 | |
| 91 protected: | |
| 92 struct ObjectInfo { | |
| 93 ObjectInfo() : pos(0), type(0), gennum(0) {} | |
| 94 | |
| 95 FX_FILESIZE pos; | |
| 96 uint8_t type; | |
| 97 uint16_t gennum; | |
| 98 }; | |
| 99 | |
| 100 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; | |
| 101 std::map<uint32_t, ObjectInfo> m_ObjectInfo; | |
| 102 | |
| 103 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); | |
| 104 FX_BOOL RebuildCrossRef(); | |
| 105 | |
| 106 private: | |
| 107 friend class CPDF_DataAvail; | |
| 108 | |
| 109 enum class ParserState { | |
| 110 kDefault, | |
| 111 kComment, | |
| 112 kWhitespace, | |
| 113 kString, | |
| 114 kHexString, | |
| 115 kEscapedString, | |
| 116 kXref, | |
| 117 kObjNum, | |
| 118 kPostObjNum, | |
| 119 kGenNum, | |
| 120 kPostGenNum, | |
| 121 kTrailer, | |
| 122 kBeginObj, | |
| 123 kEndObj | |
| 124 }; | |
| 125 | |
| 126 CPDF_Object* ParseDirect(CPDF_Object* pObj); | |
| 127 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); | |
| 128 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); | |
| 129 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); | |
| 130 CPDF_Dictionary* LoadTrailerV4(); | |
| 131 Error SetEncryptHandler(); | |
| 132 void ReleaseEncryptHandler(); | |
| 133 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); | |
| 134 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); | |
| 135 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); | |
| 136 Error LoadLinearizedMainXRefTable(); | |
| 137 CPDF_StreamAcc* GetObjectStream(uint32_t number); | |
| 138 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset); | |
| 139 void SetEncryptDictionary(CPDF_Dictionary* pDict); | |
| 140 void ShrinkObjectMap(uint32_t size); | |
| 141 // A simple check whether the cross reference table matches with | |
| 142 // the objects. | |
| 143 bool VerifyCrossRefV4(); | |
| 144 | |
| 145 CPDF_Document* m_pDocument; // not owned | |
| 146 bool m_bHasParsed; | |
| 147 bool m_bOwnFileRead; | |
| 148 int m_FileVersion; | |
| 149 CPDF_Dictionary* m_pTrailer; | |
| 150 CPDF_Dictionary* m_pEncryptDict; | |
| 151 FX_FILESIZE m_LastXRefOffset; | |
| 152 FX_BOOL m_bXRefStream; | |
| 153 std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler; | |
| 154 CFX_ByteString m_Password; | |
| 155 std::set<FX_FILESIZE> m_SortedOffset; | |
| 156 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; | |
| 157 bool m_bVersionUpdated; | |
| 158 CPDF_Object* m_pLinearized; | |
| 159 uint32_t m_dwFirstPageNo; | |
| 160 uint32_t m_dwXrefStartObjNum; | |
| 161 | |
| 162 // A map of object numbers to indirect streams. Map owns the streams. | |
| 163 std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; | |
| 164 | |
| 165 // Mapping of object numbers to offsets. The offsets are relative to the first | |
| 166 // object in the stream. | |
| 167 using StreamObjectCache = std::map<uint32_t, uint32_t>; | |
| 168 | |
| 169 // Mapping of streams to their object caches. This is valid as long as the | |
| 170 // streams in |m_ObjectStreamMap| are valid. | |
| 171 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; | |
| 172 | |
| 173 // All indirect object numbers that are being parsed. | |
| 174 std::set<uint32_t> m_ParsingObjNums; | |
| 175 }; | |
| 176 | |
| 177 #endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_PARSER_H_ | |
| OLD | NEW |