OLD | NEW |
| (Empty) |
1 // Copyright 2016 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #ifndef CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ | |
8 #define CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ | |
9 | |
10 #include <map> | |
11 #include <memory> | |
12 #include <set> | |
13 | |
14 #include "core/fxcrt/include/fx_basic.h" | |
15 | |
16 class CPDF_Array; | |
17 class CPDF_CryptoHandler; | |
18 class CPDF_Dictionary; | |
19 class CPDF_Document; | |
20 class CPDF_IndirectObjectHolder; | |
21 class CPDF_Object; | |
22 class CPDF_SecurityHandler; | |
23 class CPDF_StreamAcc; | |
24 class CPDF_SyntaxParser; | |
25 class IFX_FileRead; | |
26 | |
27 class CPDF_Parser { | |
28 public: | |
29 enum Error { | |
30 SUCCESS = 0, | |
31 FILE_ERROR, | |
32 FORMAT_ERROR, | |
33 PASSWORD_ERROR, | |
34 HANDLER_ERROR | |
35 }; | |
36 | |
37 // A limit on the maximum object number in the xref table. Theoretical limits | |
38 // are higher, but this may be large enough in practice. | |
39 static const uint32_t kMaxObjectNumber = 1048576; | |
40 | |
41 CPDF_Parser(); | |
42 ~CPDF_Parser(); | |
43 | |
44 Error StartParse(IFX_FileRead* pFile, CPDF_Document* pDocument); | |
45 Error StartLinearizedParse(IFX_FileRead* pFile, CPDF_Document* pDocument); | |
46 | |
47 void SetPassword(const FX_CHAR* password) { m_Password = password; } | |
48 CFX_ByteString GetPassword() { return m_Password; } | |
49 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } | |
50 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } | |
51 | |
52 uint32_t GetPermissions() const; | |
53 uint32_t GetRootObjNum(); | |
54 uint32_t GetInfoObjNum(); | |
55 CPDF_Array* GetIDArray(); | |
56 | |
57 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } | |
58 | |
59 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, | |
60 uint32_t objnum); | |
61 | |
62 uint32_t GetLastObjNum() const; | |
63 bool IsValidObjectNumber(uint32_t objnum) const; | |
64 FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; | |
65 uint8_t GetObjectType(uint32_t objnum) const; | |
66 uint16_t GetObjectGenNum(uint32_t objnum) const; | |
67 bool IsVersionUpdated() const { return m_bVersionUpdated; } | |
68 bool IsObjectFreeOrNull(uint32_t objnum) const; | |
69 CPDF_CryptoHandler* GetCryptoHandler(); | |
70 IFX_FileRead* GetFileAccess() const; | |
71 | |
72 FX_FILESIZE GetObjectOffset(uint32_t objnum) const; | |
73 FX_FILESIZE GetObjectSize(uint32_t objnum) const; | |
74 | |
75 void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); | |
76 int GetFileVersion() const { return m_FileVersion; } | |
77 FX_BOOL IsXRefStream() const { return m_bXRefStream; } | |
78 | |
79 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, | |
80 FX_FILESIZE pos, | |
81 uint32_t objnum); | |
82 | |
83 CPDF_Object* ParseIndirectObjectAtByStrict( | |
84 CPDF_IndirectObjectHolder* pObjList, | |
85 FX_FILESIZE pos, | |
86 uint32_t objnum, | |
87 FX_FILESIZE* pResultPos); | |
88 | |
89 uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } | |
90 | |
91 protected: | |
92 struct ObjectInfo { | |
93 ObjectInfo() : pos(0), type(0), gennum(0) {} | |
94 | |
95 FX_FILESIZE pos; | |
96 uint8_t type; | |
97 uint16_t gennum; | |
98 }; | |
99 | |
100 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; | |
101 std::map<uint32_t, ObjectInfo> m_ObjectInfo; | |
102 | |
103 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); | |
104 FX_BOOL RebuildCrossRef(); | |
105 | |
106 private: | |
107 friend class CPDF_DataAvail; | |
108 | |
109 enum class ParserState { | |
110 kDefault, | |
111 kComment, | |
112 kWhitespace, | |
113 kString, | |
114 kHexString, | |
115 kEscapedString, | |
116 kXref, | |
117 kObjNum, | |
118 kPostObjNum, | |
119 kGenNum, | |
120 kPostGenNum, | |
121 kTrailer, | |
122 kBeginObj, | |
123 kEndObj | |
124 }; | |
125 | |
126 CPDF_Object* ParseDirect(CPDF_Object* pObj); | |
127 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); | |
128 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); | |
129 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); | |
130 CPDF_Dictionary* LoadTrailerV4(); | |
131 Error SetEncryptHandler(); | |
132 void ReleaseEncryptHandler(); | |
133 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); | |
134 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); | |
135 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); | |
136 Error LoadLinearizedMainXRefTable(); | |
137 CPDF_StreamAcc* GetObjectStream(uint32_t number); | |
138 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset); | |
139 void SetEncryptDictionary(CPDF_Dictionary* pDict); | |
140 void ShrinkObjectMap(uint32_t size); | |
141 // A simple check whether the cross reference table matches with | |
142 // the objects. | |
143 bool VerifyCrossRefV4(); | |
144 | |
145 CPDF_Document* m_pDocument; // not owned | |
146 bool m_bHasParsed; | |
147 bool m_bOwnFileRead; | |
148 int m_FileVersion; | |
149 CPDF_Dictionary* m_pTrailer; | |
150 CPDF_Dictionary* m_pEncryptDict; | |
151 FX_FILESIZE m_LastXRefOffset; | |
152 FX_BOOL m_bXRefStream; | |
153 std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler; | |
154 CFX_ByteString m_Password; | |
155 std::set<FX_FILESIZE> m_SortedOffset; | |
156 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; | |
157 bool m_bVersionUpdated; | |
158 CPDF_Object* m_pLinearized; | |
159 uint32_t m_dwFirstPageNo; | |
160 uint32_t m_dwXrefStartObjNum; | |
161 | |
162 // A map of object numbers to indirect streams. Map owns the streams. | |
163 std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; | |
164 | |
165 // Mapping of object numbers to offsets. The offsets are relative to the first | |
166 // object in the stream. | |
167 using StreamObjectCache = std::map<uint32_t, uint32_t>; | |
168 | |
169 // Mapping of streams to their object caches. This is valid as long as the | |
170 // streams in |m_ObjectStreamMap| are valid. | |
171 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; | |
172 | |
173 // All indirect object numbers that are being parsed. | |
174 std::set<uint32_t> m_ParsingObjNums; | |
175 }; | |
176 | |
177 #endif // CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ | |
OLD | NEW |