OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2016 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
9 | 9 |
10 #include <map> | 10 #include <map> |
11 #include <memory> | 11 #include <memory> |
(...skipping 28 matching lines...) Expand all Loading... |
40 #define FPDFPERM_EXTRACT_ACCESS 0x0200 | 40 #define FPDFPERM_EXTRACT_ACCESS 0x0200 |
41 #define FPDFPERM_ASSEMBLE 0x0400 | 41 #define FPDFPERM_ASSEMBLE 0x0400 |
42 #define FPDFPERM_PRINT_HIGH 0x0800 | 42 #define FPDFPERM_PRINT_HIGH 0x0800 |
43 #define FPDF_PAGE_MAX_NUM 0xFFFFF | 43 #define FPDF_PAGE_MAX_NUM 0xFFFFF |
44 | 44 |
45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. | 45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. |
46 // Come up or wait for something better. | 46 // Come up or wait for something better. |
47 using ScopedFileStream = | 47 using ScopedFileStream = |
48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; | 48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; |
49 | 49 |
50 // Use the accessors below instead of directly accessing PDF_CharType. | |
51 extern const char PDF_CharType[256]; | |
52 | |
53 inline bool PDFCharIsWhitespace(uint8_t c) { | |
54 return PDF_CharType[c] == 'W'; | |
55 } | |
56 inline bool PDFCharIsNumeric(uint8_t c) { | |
57 return PDF_CharType[c] == 'N'; | |
58 } | |
59 inline bool PDFCharIsDelimiter(uint8_t c) { | |
60 return PDF_CharType[c] == 'D'; | |
61 } | |
62 inline bool PDFCharIsOther(uint8_t c) { | |
63 return PDF_CharType[c] == 'R'; | |
64 } | |
65 | |
66 inline bool PDFCharIsLineEnding(uint8_t c) { | |
67 return c == '\r' || c == '\n'; | |
68 } | |
69 | |
70 template <typename T> | 50 template <typename T> |
71 class ScopedSetInsertion { | 51 class ScopedSetInsertion { |
72 public: | 52 public: |
73 ScopedSetInsertion(std::set<T>* org_set, T elem) | 53 ScopedSetInsertion(std::set<T>* org_set, T elem) |
74 : m_Set(org_set), m_Entry(elem) { | 54 : m_Set(org_set), m_Entry(elem) { |
75 m_Set->insert(m_Entry); | 55 m_Set->insert(m_Entry); |
76 } | 56 } |
77 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } | 57 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } |
78 | 58 |
79 private: | 59 private: |
80 std::set<T>* const m_Set; | 60 std::set<T>* const m_Set; |
81 const T m_Entry; | 61 const T m_Entry; |
82 }; | 62 }; |
83 | 63 |
84 // Indexed by 8-bit char code, contains unicode code points. | 64 // Indexed by 8-bit char code, contains unicode code points. |
85 extern const FX_WORD PDFDocEncoding[256]; | 65 extern const FX_WORD PDFDocEncoding[256]; |
86 | 66 |
87 | |
88 class CPDF_SimpleParser { | |
89 public: | |
90 CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize); | |
91 CPDF_SimpleParser(const CFX_ByteStringC& str); | |
92 | |
93 CFX_ByteStringC GetWord(); | |
94 | |
95 // Find the token and its |nParams| parameters from the start of data, | |
96 // and move the current position to the start of those parameters. | |
97 bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams); | |
98 | |
99 // For testing only. | |
100 FX_DWORD GetCurPos() const { return m_dwCurPos; } | |
101 | |
102 private: | |
103 void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize); | |
104 | |
105 const uint8_t* m_pData; | |
106 FX_DWORD m_dwSize; | |
107 FX_DWORD m_dwCurPos; | |
108 }; | |
109 | |
110 class CPDF_Parser { | |
111 public: | |
112 enum Error { | |
113 SUCCESS = 0, | |
114 FILE_ERROR, | |
115 FORMAT_ERROR, | |
116 PASSWORD_ERROR, | |
117 HANDLER_ERROR | |
118 }; | |
119 | |
120 CPDF_Parser(); | |
121 ~CPDF_Parser(); | |
122 | |
123 Error StartParse(IFX_FileRead* pFile); | |
124 FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE); | |
125 | |
126 void SetPassword(const FX_CHAR* password) { m_Password = password; } | |
127 CFX_ByteString GetPassword() { return m_Password; } | |
128 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } | |
129 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } | |
130 CPDF_Document* GetDocument() const { return m_pDocument; } | |
131 | |
132 FX_DWORD GetRootObjNum(); | |
133 FX_DWORD GetInfoObjNum(); | |
134 CPDF_Array* GetIDArray(); | |
135 | |
136 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } | |
137 | |
138 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, | |
139 FX_DWORD objnum); | |
140 | |
141 FX_DWORD GetLastObjNum() const; | |
142 bool IsValidObjectNumber(FX_DWORD objnum) const; | |
143 FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const; | |
144 uint8_t GetObjectType(FX_DWORD objnum) const; | |
145 uint16_t GetObjectGenNum(FX_DWORD objnum) const; | |
146 bool IsVersionUpdated() const { return m_bVersionUpdated; } | |
147 bool IsObjectFreeOrNull(FX_DWORD objnum) const; | |
148 FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm); | |
149 CPDF_CryptoHandler* GetCryptoHandler(); | |
150 IFX_FileRead* GetFileAccess() const; | |
151 | |
152 FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const; | |
153 FX_FILESIZE GetObjectSize(FX_DWORD objnum) const; | |
154 | |
155 void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size); | |
156 int GetFileVersion() const { return m_FileVersion; } | |
157 FX_BOOL IsXRefStream() const { return m_bXRefStream; } | |
158 | |
159 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, | |
160 FX_FILESIZE pos, | |
161 FX_DWORD objnum); | |
162 | |
163 CPDF_Object* ParseIndirectObjectAtByStrict( | |
164 CPDF_IndirectObjectHolder* pObjList, | |
165 FX_FILESIZE pos, | |
166 FX_DWORD objnum, | |
167 FX_FILESIZE* pResultPos); | |
168 | |
169 Error StartAsyncParse(IFX_FileRead* pFile); | |
170 | |
171 FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; } | |
172 | |
173 protected: | |
174 struct ObjectInfo { | |
175 ObjectInfo() : pos(0), type(0), gennum(0) {} | |
176 | |
177 FX_FILESIZE pos; | |
178 uint8_t type; | |
179 uint16_t gennum; | |
180 }; | |
181 | |
182 void CloseParser(); | |
183 CPDF_Object* ParseDirect(CPDF_Object* pObj); | |
184 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); | |
185 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); | |
186 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); | |
187 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); | |
188 CPDF_Dictionary* LoadTrailerV4(); | |
189 FX_BOOL RebuildCrossRef(); | |
190 Error SetEncryptHandler(); | |
191 void ReleaseEncryptHandler(); | |
192 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); | |
193 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); | |
194 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); | |
195 Error LoadLinearizedMainXRefTable(); | |
196 CPDF_StreamAcc* GetObjectStream(FX_DWORD number); | |
197 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); | |
198 void SetEncryptDictionary(CPDF_Dictionary* pDict); | |
199 void ShrinkObjectMap(FX_DWORD size); | |
200 | |
201 CPDF_Document* m_pDocument; | |
202 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; | |
203 bool m_bOwnFileRead; | |
204 int m_FileVersion; | |
205 CPDF_Dictionary* m_pTrailer; | |
206 CPDF_Dictionary* m_pEncryptDict; | |
207 FX_FILESIZE m_LastXRefOffset; | |
208 FX_BOOL m_bXRefStream; | |
209 std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler; | |
210 CFX_ByteString m_bsRecipient; | |
211 CFX_ByteString m_FilePath; | |
212 CFX_ByteString m_Password; | |
213 std::map<FX_DWORD, ObjectInfo> m_ObjectInfo; | |
214 std::set<FX_FILESIZE> m_SortedOffset; | |
215 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; | |
216 FX_BOOL m_bVersionUpdated; | |
217 CPDF_Object* m_pLinearized; | |
218 FX_DWORD m_dwFirstPageNo; | |
219 FX_DWORD m_dwXrefStartObjNum; | |
220 | |
221 // A map of object numbers to indirect streams. Map owns the streams. | |
222 std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; | |
223 | |
224 // Mapping of object numbers to offsets. The offsets are relative to the first | |
225 // object in the stream. | |
226 using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>; | |
227 | |
228 // Mapping of streams to their object caches. This is valid as long as the | |
229 // streams in |m_ObjectStreamMap| are valid. | |
230 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; | |
231 | |
232 // All indirect object numbers that are being parsed. | |
233 std::set<FX_DWORD> m_ParsingObjNums; | |
234 | |
235 friend class CPDF_DataAvail; | |
236 | |
237 private: | |
238 enum class ParserState { | |
239 kDefault, | |
240 kComment, | |
241 kWhitespace, | |
242 kString, | |
243 kHexString, | |
244 kEscapedString, | |
245 kXref, | |
246 kObjNum, | |
247 kPostObjNum, | |
248 kGenNum, | |
249 kPostGenNum, | |
250 kTrailer, | |
251 kBeginObj, | |
252 kEndObj | |
253 }; | |
254 }; | |
255 | |
256 #define FXCIPHER_NONE 0 | 67 #define FXCIPHER_NONE 0 |
257 #define FXCIPHER_RC4 1 | 68 #define FXCIPHER_RC4 1 |
258 #define FXCIPHER_AES 2 | 69 #define FXCIPHER_AES 2 |
259 #define FXCIPHER_AES2 3 | 70 #define FXCIPHER_AES2 3 |
260 | 71 |
261 class IPDF_SecurityHandler { | 72 class IPDF_SecurityHandler { |
262 public: | 73 public: |
263 virtual ~IPDF_SecurityHandler() {} | 74 virtual ~IPDF_SecurityHandler() {} |
264 | 75 |
265 virtual FX_BOOL OnInit(CPDF_Parser* pParser, | 76 virtual FX_BOOL OnInit(CPDF_Parser* pParser, |
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
646 FX_DWORD src_size, | 457 FX_DWORD src_size, |
647 const CPDF_Dictionary* pDict, | 458 const CPDF_Dictionary* pDict, |
648 uint8_t*& dest_buf, | 459 uint8_t*& dest_buf, |
649 FX_DWORD& dest_size, | 460 FX_DWORD& dest_size, |
650 CFX_ByteString& ImageEncoding, | 461 CFX_ByteString& ImageEncoding, |
651 CPDF_Dictionary*& pImageParms, | 462 CPDF_Dictionary*& pImageParms, |
652 FX_DWORD estimated_size, | 463 FX_DWORD estimated_size, |
653 FX_BOOL bImageAcc); | 464 FX_BOOL bImageAcc); |
654 | 465 |
655 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ | 466 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ |
OLD | NEW |