Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1701)

Side by Side Diff: core/include/fpdfapi/fpdf_parser.h

Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Then address C#3. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ 7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ 8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
9 9
10 #include <map> 10 #include <map>
11 #include <memory> 11 #include <memory>
(...skipping 28 matching lines...) Expand all
40 #define FPDFPERM_EXTRACT_ACCESS 0x0200 40 #define FPDFPERM_EXTRACT_ACCESS 0x0200
41 #define FPDFPERM_ASSEMBLE 0x0400 41 #define FPDFPERM_ASSEMBLE 0x0400
42 #define FPDFPERM_PRINT_HIGH 0x0800 42 #define FPDFPERM_PRINT_HIGH 0x0800
43 #define FPDF_PAGE_MAX_NUM 0xFFFFF 43 #define FPDF_PAGE_MAX_NUM 0xFFFFF
44 44
45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. 45 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
46 // Come up or wait for something better. 46 // Come up or wait for something better.
47 using ScopedFileStream = 47 using ScopedFileStream =
48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; 48 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
49 49
50 // Use the accessors below instead of directly accessing PDF_CharType.
51 extern const char PDF_CharType[256];
52
53 inline bool PDFCharIsWhitespace(uint8_t c) {
54 return PDF_CharType[c] == 'W';
55 }
56 inline bool PDFCharIsNumeric(uint8_t c) {
57 return PDF_CharType[c] == 'N';
58 }
59 inline bool PDFCharIsDelimiter(uint8_t c) {
60 return PDF_CharType[c] == 'D';
61 }
62 inline bool PDFCharIsOther(uint8_t c) {
63 return PDF_CharType[c] == 'R';
64 }
65
66 inline bool PDFCharIsLineEnding(uint8_t c) {
67 return c == '\r' || c == '\n';
68 }
69
70 template <typename T> 50 template <typename T>
71 class ScopedSetInsertion { 51 class ScopedSetInsertion {
72 public: 52 public:
73 ScopedSetInsertion(std::set<T>* org_set, T elem) 53 ScopedSetInsertion(std::set<T>* org_set, T elem)
74 : m_Set(org_set), m_Entry(elem) { 54 : m_Set(org_set), m_Entry(elem) {
75 m_Set->insert(m_Entry); 55 m_Set->insert(m_Entry);
76 } 56 }
77 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } 57 ~ScopedSetInsertion() { m_Set->erase(m_Entry); }
78 58
79 private: 59 private:
80 std::set<T>* const m_Set; 60 std::set<T>* const m_Set;
81 const T m_Entry; 61 const T m_Entry;
82 }; 62 };
83 63
84 // Indexed by 8-bit char code, contains unicode code points. 64 // Indexed by 8-bit char code, contains unicode code points.
85 extern const FX_WORD PDFDocEncoding[256]; 65 extern const FX_WORD PDFDocEncoding[256];
86 66
87
88 class CPDF_SimpleParser {
89 public:
90 CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize);
91 CPDF_SimpleParser(const CFX_ByteStringC& str);
92
93 CFX_ByteStringC GetWord();
94
95 // Find the token and its |nParams| parameters from the start of data,
96 // and move the current position to the start of those parameters.
97 bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams);
98
99 // For testing only.
100 FX_DWORD GetCurPos() const { return m_dwCurPos; }
101
102 private:
103 void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize);
104
105 const uint8_t* m_pData;
106 FX_DWORD m_dwSize;
107 FX_DWORD m_dwCurPos;
108 };
109
110 class CPDF_Parser {
111 public:
112 enum Error {
113 SUCCESS = 0,
114 FILE_ERROR,
115 FORMAT_ERROR,
116 PASSWORD_ERROR,
117 HANDLER_ERROR
118 };
119
120 CPDF_Parser();
121 ~CPDF_Parser();
122
123 Error StartParse(IFX_FileRead* pFile);
124 FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE);
125
126 void SetPassword(const FX_CHAR* password) { m_Password = password; }
127 CFX_ByteString GetPassword() { return m_Password; }
128 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
129 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
130 CPDF_Document* GetDocument() const { return m_pDocument; }
131
132 FX_DWORD GetRootObjNum();
133 FX_DWORD GetInfoObjNum();
134 CPDF_Array* GetIDArray();
135
136 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; }
137
138 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
139 FX_DWORD objnum);
140
141 FX_DWORD GetLastObjNum() const;
142 bool IsValidObjectNumber(FX_DWORD objnum) const;
143 FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const;
144 uint8_t GetObjectType(FX_DWORD objnum) const;
145 uint16_t GetObjectGenNum(FX_DWORD objnum) const;
146 bool IsVersionUpdated() const { return m_bVersionUpdated; }
147 bool IsObjectFreeOrNull(FX_DWORD objnum) const;
148 FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
149 CPDF_CryptoHandler* GetCryptoHandler();
150 IFX_FileRead* GetFileAccess() const;
151
152 FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
153 FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
154
155 void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
156 int GetFileVersion() const { return m_FileVersion; }
157 FX_BOOL IsXRefStream() const { return m_bXRefStream; }
158
159 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
160 FX_FILESIZE pos,
161 FX_DWORD objnum);
162
163 CPDF_Object* ParseIndirectObjectAtByStrict(
164 CPDF_IndirectObjectHolder* pObjList,
165 FX_FILESIZE pos,
166 FX_DWORD objnum,
167 FX_FILESIZE* pResultPos);
168
169 Error StartAsyncParse(IFX_FileRead* pFile);
170
171 FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; }
172
173 protected:
174 struct ObjectInfo {
175 ObjectInfo() : pos(0), type(0), gennum(0) {}
176
177 FX_FILESIZE pos;
178 uint8_t type;
179 uint16_t gennum;
180 };
181
182 void CloseParser();
183 CPDF_Object* ParseDirect(CPDF_Object* pObj);
184 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
185 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
186 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
187 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
188 CPDF_Dictionary* LoadTrailerV4();
189 FX_BOOL RebuildCrossRef();
190 Error SetEncryptHandler();
191 void ReleaseEncryptHandler();
192 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
193 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
194 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
195 Error LoadLinearizedMainXRefTable();
196 CPDF_StreamAcc* GetObjectStream(FX_DWORD number);
197 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset);
198 void SetEncryptDictionary(CPDF_Dictionary* pDict);
199 void ShrinkObjectMap(FX_DWORD size);
200
201 CPDF_Document* m_pDocument;
202 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
203 bool m_bOwnFileRead;
204 int m_FileVersion;
205 CPDF_Dictionary* m_pTrailer;
206 CPDF_Dictionary* m_pEncryptDict;
207 FX_FILESIZE m_LastXRefOffset;
208 FX_BOOL m_bXRefStream;
209 std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler;
210 CFX_ByteString m_bsRecipient;
211 CFX_ByteString m_FilePath;
212 CFX_ByteString m_Password;
213 std::map<FX_DWORD, ObjectInfo> m_ObjectInfo;
214 std::set<FX_FILESIZE> m_SortedOffset;
215 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
216 FX_BOOL m_bVersionUpdated;
217 CPDF_Object* m_pLinearized;
218 FX_DWORD m_dwFirstPageNo;
219 FX_DWORD m_dwXrefStartObjNum;
220
221 // A map of object numbers to indirect streams. Map owns the streams.
222 std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap;
223
224 // Mapping of object numbers to offsets. The offsets are relative to the first
225 // object in the stream.
226 using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>;
227
228 // Mapping of streams to their object caches. This is valid as long as the
229 // streams in |m_ObjectStreamMap| are valid.
230 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
231
232 // All indirect object numbers that are being parsed.
233 std::set<FX_DWORD> m_ParsingObjNums;
234
235 friend class CPDF_DataAvail;
236
237 private:
238 enum class ParserState {
239 kDefault,
240 kComment,
241 kWhitespace,
242 kString,
243 kHexString,
244 kEscapedString,
245 kXref,
246 kObjNum,
247 kPostObjNum,
248 kGenNum,
249 kPostGenNum,
250 kTrailer,
251 kBeginObj,
252 kEndObj
253 };
254 };
255
256 #define FXCIPHER_NONE 0 67 #define FXCIPHER_NONE 0
257 #define FXCIPHER_RC4 1 68 #define FXCIPHER_RC4 1
258 #define FXCIPHER_AES 2 69 #define FXCIPHER_AES 2
259 #define FXCIPHER_AES2 3 70 #define FXCIPHER_AES2 3
260 71
261 class IPDF_SecurityHandler { 72 class IPDF_SecurityHandler {
262 public: 73 public:
263 virtual ~IPDF_SecurityHandler() {} 74 virtual ~IPDF_SecurityHandler() {}
264 75
265 virtual FX_BOOL OnInit(CPDF_Parser* pParser, 76 virtual FX_BOOL OnInit(CPDF_Parser* pParser,
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after
646 FX_DWORD src_size, 457 FX_DWORD src_size,
647 const CPDF_Dictionary* pDict, 458 const CPDF_Dictionary* pDict,
648 uint8_t*& dest_buf, 459 uint8_t*& dest_buf,
649 FX_DWORD& dest_size, 460 FX_DWORD& dest_size,
650 CFX_ByteString& ImageEncoding, 461 CFX_ByteString& ImageEncoding,
651 CPDF_Dictionary*& pImageParms, 462 CPDF_Dictionary*& pImageParms,
652 FX_DWORD estimated_size, 463 FX_DWORD estimated_size,
653 FX_BOOL bImageAcc); 464 FX_BOOL bImageAcc);
654 465
655 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ 466 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
OLDNEW
« no previous file with comments | « core/include/fpdfapi/cpdf_simple_parser.h ('k') | core/src/fpdfapi/fpdf_edit/fpdf_edit_create.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698