| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
| 8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
| 9 | 9 |
| 10 #include <map> | 10 #include <map> |
| 11 #include <memory> | 11 #include <memory> |
| (...skipping 28 matching lines...) Expand all Loading... |
| 40 #define FPDFPERM_EXTRACT_ACCESS 0x0200 | 40 #define FPDFPERM_EXTRACT_ACCESS 0x0200 |
| 41 #define FPDFPERM_ASSEMBLE 0x0400 | 41 #define FPDFPERM_ASSEMBLE 0x0400 |
| 42 #define FPDFPERM_PRINT_HIGH 0x0800 | 42 #define FPDFPERM_PRINT_HIGH 0x0800 |
| 43 #define FPDF_PAGE_MAX_NUM 0xFFFFF | 43 #define FPDF_PAGE_MAX_NUM 0xFFFFF |
| 44 | 44 |
| 45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. | 45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. |
| 46 // Come up or wait for something better. | 46 // Come up or wait for something better. |
| 47 using ScopedFileStream = | 47 using ScopedFileStream = |
| 48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; | 48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; |
| 49 | 49 |
| 50 // Use the accessors below instead of directly accessing PDF_CharType. | |
| 51 extern const char PDF_CharType[256]; | |
| 52 | |
| 53 inline bool PDFCharIsWhitespace(uint8_t c) { | |
| 54 return PDF_CharType[c] == 'W'; | |
| 55 } | |
| 56 inline bool PDFCharIsNumeric(uint8_t c) { | |
| 57 return PDF_CharType[c] == 'N'; | |
| 58 } | |
| 59 inline bool PDFCharIsDelimiter(uint8_t c) { | |
| 60 return PDF_CharType[c] == 'D'; | |
| 61 } | |
| 62 inline bool PDFCharIsOther(uint8_t c) { | |
| 63 return PDF_CharType[c] == 'R'; | |
| 64 } | |
| 65 | |
| 66 inline bool PDFCharIsLineEnding(uint8_t c) { | |
| 67 return c == '\r' || c == '\n'; | |
| 68 } | |
| 69 | |
| 70 template <typename T> | 50 template <typename T> |
| 71 class ScopedSetInsertion { | 51 class ScopedSetInsertion { |
| 72 public: | 52 public: |
| 73 ScopedSetInsertion(std::set<T>* org_set, T elem) | 53 ScopedSetInsertion(std::set<T>* org_set, T elem) |
| 74 : m_Set(org_set), m_Entry(elem) { | 54 : m_Set(org_set), m_Entry(elem) { |
| 75 m_Set->insert(m_Entry); | 55 m_Set->insert(m_Entry); |
| 76 } | 56 } |
| 77 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } | 57 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } |
| 78 | 58 |
| 79 private: | 59 private: |
| 80 std::set<T>* const m_Set; | 60 std::set<T>* const m_Set; |
| 81 const T m_Entry; | 61 const T m_Entry; |
| 82 }; | 62 }; |
| 83 | 63 |
| 84 // Indexed by 8-bit char code, contains unicode code points. | 64 // Indexed by 8-bit char code, contains unicode code points. |
| 85 extern const FX_WORD PDFDocEncoding[256]; | 65 extern const FX_WORD PDFDocEncoding[256]; |
| 86 | 66 |
| 87 | |
| 88 class CPDF_SimpleParser { | |
| 89 public: | |
| 90 CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize); | |
| 91 CPDF_SimpleParser(const CFX_ByteStringC& str); | |
| 92 | |
| 93 CFX_ByteStringC GetWord(); | |
| 94 | |
| 95 // Find the token and its |nParams| parameters from the start of data, | |
| 96 // and move the current position to the start of those parameters. | |
| 97 bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams); | |
| 98 | |
| 99 // For testing only. | |
| 100 FX_DWORD GetCurPos() const { return m_dwCurPos; } | |
| 101 | |
| 102 private: | |
| 103 void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize); | |
| 104 | |
| 105 const uint8_t* m_pData; | |
| 106 FX_DWORD m_dwSize; | |
| 107 FX_DWORD m_dwCurPos; | |
| 108 }; | |
| 109 | |
| 110 class CPDF_Parser { | |
| 111 public: | |
| 112 enum Error { | |
| 113 SUCCESS = 0, | |
| 114 FILE_ERROR, | |
| 115 FORMAT_ERROR, | |
| 116 PASSWORD_ERROR, | |
| 117 HANDLER_ERROR | |
| 118 }; | |
| 119 | |
| 120 CPDF_Parser(); | |
| 121 ~CPDF_Parser(); | |
| 122 | |
| 123 Error StartParse(IFX_FileRead* pFile); | |
| 124 FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE); | |
| 125 | |
| 126 void SetPassword(const FX_CHAR* password) { m_Password = password; } | |
| 127 CFX_ByteString GetPassword() { return m_Password; } | |
| 128 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } | |
| 129 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } | |
| 130 CPDF_Document* GetDocument() const { return m_pDocument; } | |
| 131 | |
| 132 FX_DWORD GetRootObjNum(); | |
| 133 FX_DWORD GetInfoObjNum(); | |
| 134 CPDF_Array* GetIDArray(); | |
| 135 | |
| 136 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } | |
| 137 | |
| 138 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, | |
| 139 FX_DWORD objnum); | |
| 140 | |
| 141 FX_DWORD GetLastObjNum() const; | |
| 142 bool IsValidObjectNumber(FX_DWORD objnum) const; | |
| 143 FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const; | |
| 144 uint8_t GetObjectType(FX_DWORD objnum) const; | |
| 145 uint16_t GetObjectGenNum(FX_DWORD objnum) const; | |
| 146 bool IsVersionUpdated() const { return m_bVersionUpdated; } | |
| 147 bool IsObjectFreeOrNull(FX_DWORD objnum) const; | |
| 148 FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm); | |
| 149 CPDF_CryptoHandler* GetCryptoHandler(); | |
| 150 IFX_FileRead* GetFileAccess() const; | |
| 151 | |
| 152 FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const; | |
| 153 FX_FILESIZE GetObjectSize(FX_DWORD objnum) const; | |
| 154 | |
| 155 void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size); | |
| 156 int GetFileVersion() const { return m_FileVersion; } | |
| 157 FX_BOOL IsXRefStream() const { return m_bXRefStream; } | |
| 158 | |
| 159 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, | |
| 160 FX_FILESIZE pos, | |
| 161 FX_DWORD objnum); | |
| 162 | |
| 163 CPDF_Object* ParseIndirectObjectAtByStrict( | |
| 164 CPDF_IndirectObjectHolder* pObjList, | |
| 165 FX_FILESIZE pos, | |
| 166 FX_DWORD objnum, | |
| 167 FX_FILESIZE* pResultPos); | |
| 168 | |
| 169 Error StartAsyncParse(IFX_FileRead* pFile); | |
| 170 | |
| 171 FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; } | |
| 172 | |
| 173 protected: | |
| 174 struct ObjectInfo { | |
| 175 ObjectInfo() : pos(0), type(0), gennum(0) {} | |
| 176 | |
| 177 FX_FILESIZE pos; | |
| 178 uint8_t type; | |
| 179 uint16_t gennum; | |
| 180 }; | |
| 181 | |
| 182 void CloseParser(); | |
| 183 CPDF_Object* ParseDirect(CPDF_Object* pObj); | |
| 184 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); | |
| 185 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); | |
| 186 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); | |
| 187 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); | |
| 188 CPDF_Dictionary* LoadTrailerV4(); | |
| 189 FX_BOOL RebuildCrossRef(); | |
| 190 Error SetEncryptHandler(); | |
| 191 void ReleaseEncryptHandler(); | |
| 192 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); | |
| 193 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); | |
| 194 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); | |
| 195 Error LoadLinearizedMainXRefTable(); | |
| 196 CPDF_StreamAcc* GetObjectStream(FX_DWORD number); | |
| 197 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); | |
| 198 void SetEncryptDictionary(CPDF_Dictionary* pDict); | |
| 199 void ShrinkObjectMap(FX_DWORD size); | |
| 200 | |
| 201 CPDF_Document* m_pDocument; | |
| 202 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; | |
| 203 bool m_bOwnFileRead; | |
| 204 int m_FileVersion; | |
| 205 CPDF_Dictionary* m_pTrailer; | |
| 206 CPDF_Dictionary* m_pEncryptDict; | |
| 207 FX_FILESIZE m_LastXRefOffset; | |
| 208 FX_BOOL m_bXRefStream; | |
| 209 std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler; | |
| 210 CFX_ByteString m_bsRecipient; | |
| 211 CFX_ByteString m_FilePath; | |
| 212 CFX_ByteString m_Password; | |
| 213 std::map<FX_DWORD, ObjectInfo> m_ObjectInfo; | |
| 214 std::set<FX_FILESIZE> m_SortedOffset; | |
| 215 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; | |
| 216 FX_BOOL m_bVersionUpdated; | |
| 217 CPDF_Object* m_pLinearized; | |
| 218 FX_DWORD m_dwFirstPageNo; | |
| 219 FX_DWORD m_dwXrefStartObjNum; | |
| 220 | |
| 221 // A map of object numbers to indirect streams. Map owns the streams. | |
| 222 std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; | |
| 223 | |
| 224 // Mapping of object numbers to offsets. The offsets are relative to the first | |
| 225 // object in the stream. | |
| 226 using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>; | |
| 227 | |
| 228 // Mapping of streams to their object caches. This is valid as long as the | |
| 229 // streams in |m_ObjectStreamMap| are valid. | |
| 230 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; | |
| 231 | |
| 232 // All indirect object numbers that are being parsed. | |
| 233 std::set<FX_DWORD> m_ParsingObjNums; | |
| 234 | |
| 235 friend class CPDF_DataAvail; | |
| 236 | |
| 237 private: | |
| 238 enum class ParserState { | |
| 239 kDefault, | |
| 240 kComment, | |
| 241 kWhitespace, | |
| 242 kString, | |
| 243 kHexString, | |
| 244 kEscapedString, | |
| 245 kXref, | |
| 246 kObjNum, | |
| 247 kPostObjNum, | |
| 248 kGenNum, | |
| 249 kPostGenNum, | |
| 250 kTrailer, | |
| 251 kBeginObj, | |
| 252 kEndObj | |
| 253 }; | |
| 254 }; | |
| 255 | |
| 256 #define FXCIPHER_NONE 0 | 67 #define FXCIPHER_NONE 0 |
| 257 #define FXCIPHER_RC4 1 | 68 #define FXCIPHER_RC4 1 |
| 258 #define FXCIPHER_AES 2 | 69 #define FXCIPHER_AES 2 |
| 259 #define FXCIPHER_AES2 3 | 70 #define FXCIPHER_AES2 3 |
| 260 | 71 |
| 261 class IPDF_SecurityHandler { | 72 class IPDF_SecurityHandler { |
| 262 public: | 73 public: |
| 263 virtual ~IPDF_SecurityHandler() {} | 74 virtual ~IPDF_SecurityHandler() {} |
| 264 | 75 |
| 265 virtual FX_BOOL OnInit(CPDF_Parser* pParser, | 76 virtual FX_BOOL OnInit(CPDF_Parser* pParser, |
| (...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 646 FX_DWORD src_size, | 457 FX_DWORD src_size, |
| 647 const CPDF_Dictionary* pDict, | 458 const CPDF_Dictionary* pDict, |
| 648 uint8_t*& dest_buf, | 459 uint8_t*& dest_buf, |
| 649 FX_DWORD& dest_size, | 460 FX_DWORD& dest_size, |
| 650 CFX_ByteString& ImageEncoding, | 461 CFX_ByteString& ImageEncoding, |
| 651 CPDF_Dictionary*& pImageParms, | 462 CPDF_Dictionary*& pImageParms, |
| 652 FX_DWORD estimated_size, | 463 FX_DWORD estimated_size, |
| 653 FX_BOOL bImageAcc); | 464 FX_BOOL bImageAcc); |
| 654 | 465 |
| 655 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 466 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
| OLD | NEW |