OLD | NEW |
| (Empty) |
1 // Copyright 2016 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "core/fpdfapi/fpdf_parser/cpdf_parser.h" | |
8 | |
9 #include <vector> | |
10 | |
11 #include "core/fpdfapi/fpdf_parser/cpdf_array.h" | |
12 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h" | |
13 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h" | |
14 #include "core/fpdfapi/fpdf_parser/cpdf_document.h" | |
15 #include "core/fpdfapi/fpdf_parser/cpdf_number.h" | |
16 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h" | |
17 #include "core/fpdfapi/fpdf_parser/cpdf_security_handler.h" | |
18 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h" | |
19 #include "core/fpdfapi/fpdf_parser/cpdf_stream_acc.h" | |
20 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
21 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" | |
22 #include "core/fxcrt/fx_ext.h" | |
23 #include "core/fxcrt/fx_safe_types.h" | |
24 #include "third_party/base/stl_util.h" | |
25 | |
26 namespace { | |
27 | |
28 // A limit on the size of the xref table. Theoretical limits are higher, but | |
29 // this may be large enough in practice. | |
30 const int32_t kMaxXRefSize = 1048576; | |
31 | |
32 uint32_t GetVarInt(const uint8_t* p, int32_t n) { | |
33 uint32_t result = 0; | |
34 for (int32_t i = 0; i < n; ++i) | |
35 result = result * 256 + p[i]; | |
36 return result; | |
37 } | |
38 | |
39 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { | |
40 return pObjStream->GetDict()->GetIntegerFor("N"); | |
41 } | |
42 | |
43 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { | |
44 return pObjStream->GetDict()->GetIntegerFor("First"); | |
45 } | |
46 | |
47 } // namespace | |
48 | |
49 CPDF_Parser::CPDF_Parser() | |
50 : m_pDocument(nullptr), | |
51 m_bHasParsed(false), | |
52 m_bOwnFileRead(true), | |
53 m_FileVersion(0), | |
54 m_pTrailer(nullptr), | |
55 m_pEncryptDict(nullptr), | |
56 m_bVersionUpdated(false), | |
57 m_pLinearized(nullptr), | |
58 m_dwFirstPageNo(0), | |
59 m_dwXrefStartObjNum(0) { | |
60 m_pSyntax.reset(new CPDF_SyntaxParser); | |
61 } | |
62 | |
63 CPDF_Parser::~CPDF_Parser() { | |
64 if (m_pTrailer) | |
65 m_pTrailer->Release(); | |
66 | |
67 ReleaseEncryptHandler(); | |
68 SetEncryptDictionary(nullptr); | |
69 | |
70 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { | |
71 m_pSyntax->m_pFileAccess->Release(); | |
72 m_pSyntax->m_pFileAccess = nullptr; | |
73 } | |
74 | |
75 int32_t iLen = m_Trailers.GetSize(); | |
76 for (int32_t i = 0; i < iLen; ++i) { | |
77 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) | |
78 trailer->Release(); | |
79 } | |
80 | |
81 if (m_pLinearized) | |
82 m_pLinearized->Release(); | |
83 } | |
84 | |
85 uint32_t CPDF_Parser::GetLastObjNum() const { | |
86 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; | |
87 } | |
88 | |
89 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { | |
90 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; | |
91 } | |
92 | |
93 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { | |
94 auto it = m_ObjectInfo.find(objnum); | |
95 return it != m_ObjectInfo.end() ? it->second.pos : 0; | |
96 } | |
97 | |
98 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const { | |
99 ASSERT(IsValidObjectNumber(objnum)); | |
100 auto it = m_ObjectInfo.find(objnum); | |
101 return it != m_ObjectInfo.end() ? it->second.type : 0; | |
102 } | |
103 | |
104 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { | |
105 ASSERT(IsValidObjectNumber(objnum)); | |
106 auto it = m_ObjectInfo.find(objnum); | |
107 return it != m_ObjectInfo.end() ? it->second.gennum : 0; | |
108 } | |
109 | |
110 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { | |
111 uint8_t type = GetObjectType(objnum); | |
112 return type == 0 || type == 255; | |
113 } | |
114 | |
115 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { | |
116 m_pEncryptDict = pDict; | |
117 } | |
118 | |
119 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { | |
120 return m_pSyntax->m_pCryptoHandler.get(); | |
121 } | |
122 | |
123 IFX_FileRead* CPDF_Parser::GetFileAccess() const { | |
124 return m_pSyntax->m_pFileAccess; | |
125 } | |
126 | |
127 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { | |
128 if (objnum == 0) { | |
129 m_ObjectInfo.clear(); | |
130 return; | |
131 } | |
132 | |
133 auto it = m_ObjectInfo.lower_bound(objnum); | |
134 while (it != m_ObjectInfo.end()) { | |
135 auto saved_it = it++; | |
136 m_ObjectInfo.erase(saved_it); | |
137 } | |
138 | |
139 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) | |
140 m_ObjectInfo[objnum - 1].pos = 0; | |
141 } | |
142 | |
143 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, | |
144 CPDF_Document* pDocument) { | |
145 ASSERT(!m_bHasParsed); | |
146 m_bHasParsed = true; | |
147 | |
148 m_bXRefStream = FALSE; | |
149 m_LastXRefOffset = 0; | |
150 m_bOwnFileRead = true; | |
151 | |
152 int32_t offset = GetHeaderOffset(pFileAccess); | |
153 if (offset == -1) { | |
154 if (pFileAccess) | |
155 pFileAccess->Release(); | |
156 return FORMAT_ERROR; | |
157 } | |
158 m_pSyntax->InitParser(pFileAccess, offset); | |
159 | |
160 uint8_t ch; | |
161 if (!m_pSyntax->GetCharAt(5, ch)) | |
162 return FORMAT_ERROR; | |
163 if (std::isdigit(ch)) | |
164 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; | |
165 | |
166 if (!m_pSyntax->GetCharAt(7, ch)) | |
167 return FORMAT_ERROR; | |
168 if (std::isdigit(ch)) | |
169 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
170 | |
171 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) | |
172 return FORMAT_ERROR; | |
173 | |
174 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); | |
175 m_pDocument = pDocument; | |
176 | |
177 FX_BOOL bXRefRebuilt = FALSE; | |
178 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { | |
179 m_SortedOffset.insert(m_pSyntax->SavePos()); | |
180 m_pSyntax->GetKeyword(); | |
181 | |
182 bool bNumber; | |
183 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); | |
184 if (!bNumber) | |
185 return FORMAT_ERROR; | |
186 | |
187 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str()); | |
188 if (!LoadAllCrossRefV4(m_LastXRefOffset) && | |
189 !LoadAllCrossRefV5(m_LastXRefOffset)) { | |
190 if (!RebuildCrossRef()) | |
191 return FORMAT_ERROR; | |
192 | |
193 bXRefRebuilt = TRUE; | |
194 m_LastXRefOffset = 0; | |
195 } | |
196 } else { | |
197 if (!RebuildCrossRef()) | |
198 return FORMAT_ERROR; | |
199 | |
200 bXRefRebuilt = TRUE; | |
201 } | |
202 Error eRet = SetEncryptHandler(); | |
203 if (eRet != SUCCESS) | |
204 return eRet; | |
205 | |
206 m_pDocument->LoadDoc(); | |
207 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
208 if (bXRefRebuilt) | |
209 return FORMAT_ERROR; | |
210 | |
211 ReleaseEncryptHandler(); | |
212 if (!RebuildCrossRef()) | |
213 return FORMAT_ERROR; | |
214 | |
215 eRet = SetEncryptHandler(); | |
216 if (eRet != SUCCESS) | |
217 return eRet; | |
218 | |
219 m_pDocument->LoadDoc(); | |
220 if (!m_pDocument->GetRoot()) | |
221 return FORMAT_ERROR; | |
222 } | |
223 if (GetRootObjNum() == 0) { | |
224 ReleaseEncryptHandler(); | |
225 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
226 return FORMAT_ERROR; | |
227 | |
228 eRet = SetEncryptHandler(); | |
229 if (eRet != SUCCESS) | |
230 return eRet; | |
231 } | |
232 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { | |
233 CPDF_Reference* pMetadata = | |
234 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")); | |
235 if (pMetadata) | |
236 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
237 } | |
238 return SUCCESS; | |
239 } | |
240 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { | |
241 ReleaseEncryptHandler(); | |
242 SetEncryptDictionary(nullptr); | |
243 | |
244 if (!m_pTrailer) | |
245 return FORMAT_ERROR; | |
246 | |
247 CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt"); | |
248 if (pEncryptObj) { | |
249 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { | |
250 SetEncryptDictionary(pEncryptDict); | |
251 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { | |
252 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); | |
253 if (pEncryptObj) | |
254 SetEncryptDictionary(pEncryptObj->GetDict()); | |
255 } | |
256 } | |
257 | |
258 if (m_pEncryptDict) { | |
259 CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter"); | |
260 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler; | |
261 Error err = HANDLER_ERROR; | |
262 if (filter == "Standard") { | |
263 pSecurityHandler.reset(new CPDF_SecurityHandler); | |
264 err = PASSWORD_ERROR; | |
265 } | |
266 if (!pSecurityHandler) | |
267 return HANDLER_ERROR; | |
268 | |
269 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) | |
270 return err; | |
271 | |
272 m_pSecurityHandler = std::move(pSecurityHandler); | |
273 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( | |
274 m_pSecurityHandler->CreateCryptoHandler()); | |
275 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) | |
276 return HANDLER_ERROR; | |
277 m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); | |
278 } | |
279 return SUCCESS; | |
280 } | |
281 | |
282 void CPDF_Parser::ReleaseEncryptHandler() { | |
283 m_pSyntax->m_pCryptoHandler.reset(); | |
284 m_pSecurityHandler.reset(); | |
285 } | |
286 | |
287 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const { | |
288 if (!IsValidObjectNumber(objnum)) | |
289 return 0; | |
290 | |
291 if (GetObjectType(objnum) == 1) | |
292 return GetObjectPositionOrZero(objnum); | |
293 | |
294 if (GetObjectType(objnum) == 2) { | |
295 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); | |
296 return GetObjectPositionOrZero(pos); | |
297 } | |
298 return 0; | |
299 } | |
300 | |
301 // Ideally, all the cross reference entries should be verified. | |
302 // In reality, we rarely see well-formed cross references don't match | |
303 // with the objects. crbug/602650 showed a case where object numbers | |
304 // in the cross reference table are all off by one. | |
305 bool CPDF_Parser::VerifyCrossRefV4() { | |
306 for (const auto& it : m_ObjectInfo) { | |
307 if (it.second.pos == 0) | |
308 continue; | |
309 // Find the first non-zero position. | |
310 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
311 m_pSyntax->RestorePos(it.second.pos); | |
312 bool is_num = false; | |
313 CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num); | |
314 m_pSyntax->RestorePos(SavedPos); | |
315 if (!is_num || num_str.IsEmpty() || | |
316 FXSYS_atoui(num_str.c_str()) != it.first) { | |
317 // If the object number read doesn't match the one stored, | |
318 // something is wrong with the cross reference table. | |
319 return false; | |
320 } else { | |
321 return true; | |
322 } | |
323 } | |
324 return true; | |
325 } | |
326 | |
327 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { | |
328 if (!LoadCrossRefV4(xrefpos, 0, TRUE)) | |
329 return FALSE; | |
330 | |
331 m_pTrailer = LoadTrailerV4(); | |
332 if (!m_pTrailer) | |
333 return FALSE; | |
334 | |
335 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
336 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) | |
337 ShrinkObjectMap(xrefsize); | |
338 | |
339 std::vector<FX_FILESIZE> CrossRefList; | |
340 std::vector<FX_FILESIZE> XRefStreamList; | |
341 std::set<FX_FILESIZE> seen_xrefpos; | |
342 | |
343 CrossRefList.push_back(xrefpos); | |
344 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
345 seen_xrefpos.insert(xrefpos); | |
346 | |
347 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not | |
348 // numerical, GetDirectInteger() returns 0. Loading will end. | |
349 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
350 while (xrefpos) { | |
351 // Check for circular references. | |
352 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
353 return FALSE; | |
354 | |
355 seen_xrefpos.insert(xrefpos); | |
356 | |
357 // SLOW ... | |
358 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
359 LoadCrossRefV4(xrefpos, 0, TRUE); | |
360 | |
361 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
362 LoadTrailerV4()); | |
363 if (!pDict) | |
364 return FALSE; | |
365 | |
366 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
367 | |
368 // SLOW ... | |
369 XRefStreamList.insert(XRefStreamList.begin(), | |
370 pDict->GetIntegerFor("XRefStm")); | |
371 m_Trailers.Add(pDict.release()); | |
372 } | |
373 | |
374 for (size_t i = 0; i < CrossRefList.size(); ++i) { | |
375 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
376 return FALSE; | |
377 if (i == 0 && !VerifyCrossRefV4()) | |
378 return FALSE; | |
379 } | |
380 return TRUE; | |
381 } | |
382 | |
383 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, | |
384 uint32_t dwObjCount) { | |
385 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) | |
386 return FALSE; | |
387 | |
388 m_pTrailer = LoadTrailerV4(); | |
389 if (!m_pTrailer) | |
390 return FALSE; | |
391 | |
392 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
393 if (xrefsize == 0) | |
394 return FALSE; | |
395 | |
396 std::vector<FX_FILESIZE> CrossRefList; | |
397 std::vector<FX_FILESIZE> XRefStreamList; | |
398 std::set<FX_FILESIZE> seen_xrefpos; | |
399 | |
400 CrossRefList.push_back(xrefpos); | |
401 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
402 seen_xrefpos.insert(xrefpos); | |
403 | |
404 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
405 while (xrefpos) { | |
406 // Check for circular references. | |
407 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
408 return FALSE; | |
409 | |
410 seen_xrefpos.insert(xrefpos); | |
411 | |
412 // SLOW ... | |
413 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
414 LoadCrossRefV4(xrefpos, 0, TRUE); | |
415 | |
416 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
417 LoadTrailerV4()); | |
418 if (!pDict) | |
419 return FALSE; | |
420 | |
421 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
422 | |
423 // SLOW ... | |
424 XRefStreamList.insert(XRefStreamList.begin(), | |
425 pDict->GetIntegerFor("XRefStm")); | |
426 m_Trailers.Add(pDict.release()); | |
427 } | |
428 | |
429 for (size_t i = 1; i < CrossRefList.size(); ++i) { | |
430 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
431 return FALSE; | |
432 } | |
433 return TRUE; | |
434 } | |
435 | |
436 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, | |
437 uint32_t dwObjCount) { | |
438 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; | |
439 | |
440 m_pSyntax->RestorePos(dwStartPos); | |
441 m_SortedOffset.insert(pos); | |
442 | |
443 uint32_t start_objnum = 0; | |
444 uint32_t count = dwObjCount; | |
445 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
446 | |
447 const int32_t recordsize = 20; | |
448 std::vector<char> buf(1024 * recordsize + 1); | |
449 buf[1024 * recordsize] = '\0'; | |
450 | |
451 int32_t nBlocks = count / 1024 + 1; | |
452 for (int32_t block = 0; block < nBlocks; block++) { | |
453 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
454 uint32_t dwReadSize = block_size * recordsize; | |
455 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) | |
456 return FALSE; | |
457 | |
458 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
459 dwReadSize)) { | |
460 return FALSE; | |
461 } | |
462 | |
463 for (int32_t i = 0; i < block_size; i++) { | |
464 uint32_t objnum = start_objnum + block * 1024 + i; | |
465 char* pEntry = &buf[i * recordsize]; | |
466 if (pEntry[17] == 'f') { | |
467 m_ObjectInfo[objnum].pos = 0; | |
468 m_ObjectInfo[objnum].type = 0; | |
469 } else { | |
470 int32_t offset = FXSYS_atoi(pEntry); | |
471 if (offset == 0) { | |
472 for (int32_t c = 0; c < 10; c++) { | |
473 if (!std::isdigit(pEntry[c])) | |
474 return FALSE; | |
475 } | |
476 } | |
477 | |
478 m_ObjectInfo[objnum].pos = offset; | |
479 int32_t version = FXSYS_atoi(pEntry + 11); | |
480 if (version >= 1) | |
481 m_bVersionUpdated = true; | |
482 | |
483 m_ObjectInfo[objnum].gennum = version; | |
484 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
485 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
486 | |
487 m_ObjectInfo[objnum].type = 1; | |
488 } | |
489 } | |
490 } | |
491 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
492 return TRUE; | |
493 } | |
494 | |
495 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, | |
496 FX_FILESIZE streampos, | |
497 FX_BOOL bSkip) { | |
498 m_pSyntax->RestorePos(pos); | |
499 if (m_pSyntax->GetKeyword() != "xref") | |
500 return false; | |
501 | |
502 m_SortedOffset.insert(pos); | |
503 if (streampos) | |
504 m_SortedOffset.insert(streampos); | |
505 | |
506 while (1) { | |
507 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
508 bool bIsNumber; | |
509 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
510 if (word.IsEmpty()) | |
511 return false; | |
512 | |
513 if (!bIsNumber) { | |
514 m_pSyntax->RestorePos(SavedPos); | |
515 break; | |
516 } | |
517 | |
518 uint32_t start_objnum = FXSYS_atoui(word.c_str()); | |
519 if (start_objnum >= kMaxObjectNumber) | |
520 return false; | |
521 | |
522 uint32_t count = m_pSyntax->GetDirectNum(); | |
523 m_pSyntax->ToNextWord(); | |
524 SavedPos = m_pSyntax->SavePos(); | |
525 const int32_t recordsize = 20; | |
526 | |
527 m_dwXrefStartObjNum = start_objnum; | |
528 if (!bSkip) { | |
529 std::vector<char> buf(1024 * recordsize + 1); | |
530 buf[1024 * recordsize] = '\0'; | |
531 | |
532 int32_t nBlocks = count / 1024 + 1; | |
533 for (int32_t block = 0; block < nBlocks; block++) { | |
534 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
535 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
536 block_size * recordsize); | |
537 | |
538 for (int32_t i = 0; i < block_size; i++) { | |
539 uint32_t objnum = start_objnum + block * 1024 + i; | |
540 char* pEntry = &buf[i * recordsize]; | |
541 if (pEntry[17] == 'f') { | |
542 m_ObjectInfo[objnum].pos = 0; | |
543 m_ObjectInfo[objnum].type = 0; | |
544 } else { | |
545 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); | |
546 if (offset == 0) { | |
547 for (int32_t c = 0; c < 10; c++) { | |
548 if (!std::isdigit(pEntry[c])) | |
549 return false; | |
550 } | |
551 } | |
552 | |
553 m_ObjectInfo[objnum].pos = offset; | |
554 int32_t version = FXSYS_atoi(pEntry + 11); | |
555 if (version >= 1) | |
556 m_bVersionUpdated = true; | |
557 | |
558 m_ObjectInfo[objnum].gennum = version; | |
559 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
560 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
561 | |
562 m_ObjectInfo[objnum].type = 1; | |
563 } | |
564 } | |
565 } | |
566 } | |
567 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
568 } | |
569 return !streampos || LoadCrossRefV5(&streampos, FALSE); | |
570 } | |
571 | |
572 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { | |
573 if (!LoadCrossRefV5(&xrefpos, TRUE)) | |
574 return FALSE; | |
575 | |
576 std::set<FX_FILESIZE> seen_xrefpos; | |
577 while (xrefpos) { | |
578 seen_xrefpos.insert(xrefpos); | |
579 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
580 return FALSE; | |
581 | |
582 // Check for circular references. | |
583 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
584 return FALSE; | |
585 } | |
586 m_ObjectStreamMap.clear(); | |
587 m_bXRefStream = TRUE; | |
588 return TRUE; | |
589 } | |
590 | |
591 FX_BOOL CPDF_Parser::RebuildCrossRef() { | |
592 m_ObjectInfo.clear(); | |
593 m_SortedOffset.clear(); | |
594 if (m_pTrailer) { | |
595 m_pTrailer->Release(); | |
596 m_pTrailer = nullptr; | |
597 } | |
598 | |
599 ParserState state = ParserState::kDefault; | |
600 | |
601 int32_t inside_index = 0; | |
602 uint32_t objnum = 0; | |
603 uint32_t gennum = 0; | |
604 int32_t depth = 0; | |
605 | |
606 const uint32_t kBufferSize = 4096; | |
607 std::vector<uint8_t> buffer(kBufferSize); | |
608 | |
609 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; | |
610 FX_FILESIZE start_pos = 0; | |
611 FX_FILESIZE start_pos1 = 0; | |
612 FX_FILESIZE last_obj = -1; | |
613 FX_FILESIZE last_xref = -1; | |
614 FX_FILESIZE last_trailer = -1; | |
615 | |
616 while (pos < m_pSyntax->m_FileLen) { | |
617 const FX_FILESIZE saved_pos = pos; | |
618 bool bOverFlow = false; | |
619 uint32_t size = | |
620 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize); | |
621 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | |
622 break; | |
623 | |
624 for (uint32_t i = 0; i < size; i++) { | |
625 uint8_t byte = buffer[i]; | |
626 switch (state) { | |
627 case ParserState::kDefault: | |
628 if (PDFCharIsWhitespace(byte)) { | |
629 state = ParserState::kWhitespace; | |
630 } else if (std::isdigit(byte)) { | |
631 --i; | |
632 state = ParserState::kWhitespace; | |
633 } else if (byte == '%') { | |
634 inside_index = 0; | |
635 state = ParserState::kComment; | |
636 } else if (byte == '(') { | |
637 state = ParserState::kString; | |
638 depth = 1; | |
639 } else if (byte == '<') { | |
640 inside_index = 1; | |
641 state = ParserState::kHexString; | |
642 } else if (byte == '\\') { | |
643 state = ParserState::kEscapedString; | |
644 } else if (byte == 't') { | |
645 state = ParserState::kTrailer; | |
646 inside_index = 1; | |
647 } | |
648 break; | |
649 | |
650 case ParserState::kWhitespace: | |
651 if (std::isdigit(byte)) { | |
652 start_pos = pos + i; | |
653 state = ParserState::kObjNum; | |
654 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
655 } else if (byte == 't') { | |
656 state = ParserState::kTrailer; | |
657 inside_index = 1; | |
658 } else if (byte == 'x') { | |
659 state = ParserState::kXref; | |
660 inside_index = 1; | |
661 } else if (!PDFCharIsWhitespace(byte)) { | |
662 --i; | |
663 state = ParserState::kDefault; | |
664 } | |
665 break; | |
666 | |
667 case ParserState::kObjNum: | |
668 if (std::isdigit(byte)) { | |
669 objnum = | |
670 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
671 } else if (PDFCharIsWhitespace(byte)) { | |
672 state = ParserState::kPostObjNum; | |
673 } else { | |
674 --i; | |
675 state = ParserState::kEndObj; | |
676 inside_index = 0; | |
677 } | |
678 break; | |
679 | |
680 case ParserState::kPostObjNum: | |
681 if (std::isdigit(byte)) { | |
682 start_pos1 = pos + i; | |
683 state = ParserState::kGenNum; | |
684 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
685 } else if (byte == 't') { | |
686 state = ParserState::kTrailer; | |
687 inside_index = 1; | |
688 } else if (!PDFCharIsWhitespace(byte)) { | |
689 --i; | |
690 state = ParserState::kDefault; | |
691 } | |
692 break; | |
693 | |
694 case ParserState::kGenNum: | |
695 if (std::isdigit(byte)) { | |
696 gennum = | |
697 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
698 } else if (PDFCharIsWhitespace(byte)) { | |
699 state = ParserState::kPostGenNum; | |
700 } else { | |
701 --i; | |
702 state = ParserState::kDefault; | |
703 } | |
704 break; | |
705 | |
706 case ParserState::kPostGenNum: | |
707 if (byte == 'o') { | |
708 state = ParserState::kBeginObj; | |
709 inside_index = 1; | |
710 } else if (std::isdigit(byte)) { | |
711 objnum = gennum; | |
712 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
713 start_pos = start_pos1; | |
714 start_pos1 = pos + i; | |
715 state = ParserState::kGenNum; | |
716 } else if (byte == 't') { | |
717 state = ParserState::kTrailer; | |
718 inside_index = 1; | |
719 } else if (!PDFCharIsWhitespace(byte)) { | |
720 --i; | |
721 state = ParserState::kDefault; | |
722 } | |
723 break; | |
724 | |
725 case ParserState::kBeginObj: | |
726 switch (inside_index) { | |
727 case 1: | |
728 if (byte != 'b') { | |
729 --i; | |
730 state = ParserState::kDefault; | |
731 } else { | |
732 inside_index++; | |
733 } | |
734 break; | |
735 case 2: | |
736 if (byte != 'j') { | |
737 --i; | |
738 state = ParserState::kDefault; | |
739 } else { | |
740 inside_index++; | |
741 } | |
742 break; | |
743 case 3: | |
744 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
745 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; | |
746 m_SortedOffset.insert(obj_pos); | |
747 last_obj = start_pos; | |
748 FX_FILESIZE obj_end = 0; | |
749 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | |
750 m_pDocument, obj_pos, objnum, &obj_end); | |
751 if (CPDF_Stream* pStream = ToStream(pObject)) { | |
752 if (CPDF_Dictionary* pDict = pStream->GetDict()) { | |
753 if ((pDict->KeyExist("Type")) && | |
754 (pDict->GetStringFor("Type") == "XRef" && | |
755 pDict->KeyExist("Size"))) { | |
756 CPDF_Object* pRoot = pDict->GetObjectFor("Root"); | |
757 if (pRoot && pRoot->GetDict() && | |
758 pRoot->GetDict()->GetObjectFor("Pages")) { | |
759 if (m_pTrailer) | |
760 m_pTrailer->Release(); | |
761 m_pTrailer = ToDictionary(pDict->Clone()); | |
762 } | |
763 } | |
764 } | |
765 } | |
766 | |
767 FX_FILESIZE offset = 0; | |
768 m_pSyntax->RestorePos(obj_pos); | |
769 offset = m_pSyntax->FindTag("obj", 0); | |
770 if (offset == -1) | |
771 offset = 0; | |
772 else | |
773 offset += 3; | |
774 | |
775 FX_FILESIZE nLen = obj_end - obj_pos - offset; | |
776 if ((uint32_t)nLen > size - i) { | |
777 pos = obj_end + m_pSyntax->m_HeaderOffset; | |
778 bOverFlow = true; | |
779 } else { | |
780 i += (uint32_t)nLen; | |
781 } | |
782 | |
783 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && | |
784 m_ObjectInfo[objnum].pos) { | |
785 if (pObject) { | |
786 uint32_t oldgen = GetObjectGenNum(objnum); | |
787 m_ObjectInfo[objnum].pos = obj_pos; | |
788 m_ObjectInfo[objnum].gennum = gennum; | |
789 if (oldgen != gennum) | |
790 m_bVersionUpdated = true; | |
791 } | |
792 } else { | |
793 m_ObjectInfo[objnum].pos = obj_pos; | |
794 m_ObjectInfo[objnum].type = 1; | |
795 m_ObjectInfo[objnum].gennum = gennum; | |
796 } | |
797 | |
798 if (pObject) | |
799 pObject->Release(); | |
800 } | |
801 --i; | |
802 state = ParserState::kDefault; | |
803 break; | |
804 } | |
805 break; | |
806 | |
807 case ParserState::kTrailer: | |
808 if (inside_index == 7) { | |
809 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
810 last_trailer = pos + i - 7; | |
811 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); | |
812 | |
813 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); | |
814 if (pObj) { | |
815 if (!pObj->IsDictionary() && !pObj->AsStream()) { | |
816 pObj->Release(); | |
817 } else { | |
818 CPDF_Stream* pStream = pObj->AsStream(); | |
819 if (CPDF_Dictionary* pTrailer = | |
820 pStream ? pStream->GetDict() : pObj->AsDictionary()) { | |
821 if (m_pTrailer) { | |
822 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root"); | |
823 CPDF_Reference* pRef = ToReference(pRoot); | |
824 if (!pRoot || | |
825 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && | |
826 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { | |
827 auto it = pTrailer->begin(); | |
828 while (it != pTrailer->end()) { | |
829 const CFX_ByteString& key = it->first; | |
830 CPDF_Object* pElement = it->second; | |
831 ++it; | |
832 uint32_t dwObjNum = | |
833 pElement ? pElement->GetObjNum() : 0; | |
834 if (dwObjNum) { | |
835 m_pTrailer->SetReferenceFor(key, m_pDocument, | |
836 dwObjNum); | |
837 } else { | |
838 m_pTrailer->SetFor(key, pElement->Clone()); | |
839 } | |
840 } | |
841 } | |
842 pObj->Release(); | |
843 } else { | |
844 if (pObj->IsStream()) { | |
845 m_pTrailer = ToDictionary(pTrailer->Clone()); | |
846 pObj->Release(); | |
847 } else { | |
848 m_pTrailer = pTrailer; | |
849 } | |
850 | |
851 FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); | |
852 CFX_ByteString strWord = m_pSyntax->GetKeyword(); | |
853 if (!strWord.Compare("startxref")) { | |
854 bool bNumber; | |
855 CFX_ByteString bsOffset = | |
856 m_pSyntax->GetNextWord(&bNumber); | |
857 if (bNumber) | |
858 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str()); | |
859 } | |
860 m_pSyntax->RestorePos(dwSavePos); | |
861 } | |
862 } else { | |
863 pObj->Release(); | |
864 } | |
865 } | |
866 } | |
867 } | |
868 --i; | |
869 state = ParserState::kDefault; | |
870 } else if (byte == "trailer"[inside_index]) { | |
871 inside_index++; | |
872 } else { | |
873 --i; | |
874 state = ParserState::kDefault; | |
875 } | |
876 break; | |
877 | |
878 case ParserState::kXref: | |
879 if (inside_index == 4) { | |
880 last_xref = pos + i - 4; | |
881 state = ParserState::kWhitespace; | |
882 } else if (byte == "xref"[inside_index]) { | |
883 inside_index++; | |
884 } else { | |
885 --i; | |
886 state = ParserState::kDefault; | |
887 } | |
888 break; | |
889 | |
890 case ParserState::kComment: | |
891 if (PDFCharIsLineEnding(byte)) | |
892 state = ParserState::kDefault; | |
893 break; | |
894 | |
895 case ParserState::kString: | |
896 if (byte == ')') { | |
897 if (depth > 0) | |
898 depth--; | |
899 } else if (byte == '(') { | |
900 depth++; | |
901 } | |
902 | |
903 if (!depth) | |
904 state = ParserState::kDefault; | |
905 break; | |
906 | |
907 case ParserState::kHexString: | |
908 if (byte == '>' || (byte == '<' && inside_index == 1)) | |
909 state = ParserState::kDefault; | |
910 inside_index = 0; | |
911 break; | |
912 | |
913 case ParserState::kEscapedString: | |
914 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | |
915 --i; | |
916 state = ParserState::kDefault; | |
917 } | |
918 break; | |
919 | |
920 case ParserState::kEndObj: | |
921 if (PDFCharIsWhitespace(byte)) { | |
922 state = ParserState::kDefault; | |
923 } else if (byte == '%' || byte == '(' || byte == '<' || | |
924 byte == '\\') { | |
925 state = ParserState::kDefault; | |
926 --i; | |
927 } else if (inside_index == 6) { | |
928 state = ParserState::kDefault; | |
929 --i; | |
930 } else if (byte == "endobj"[inside_index]) { | |
931 inside_index++; | |
932 } | |
933 break; | |
934 } | |
935 | |
936 if (bOverFlow) { | |
937 size = 0; | |
938 break; | |
939 } | |
940 } | |
941 pos += size; | |
942 | |
943 // If the position has not changed at all or went backwards in a loop | |
944 // iteration, then break out to prevent infinite looping. | |
945 if (pos <= saved_pos) | |
946 break; | |
947 } | |
948 | |
949 if (last_xref != -1 && last_xref > last_obj) | |
950 last_trailer = last_xref; | |
951 else if (last_trailer == -1 || last_xref < last_obj) | |
952 last_trailer = m_pSyntax->m_FileLen; | |
953 | |
954 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); | |
955 return m_pTrailer && !m_ObjectInfo.empty(); | |
956 } | |
957 | |
958 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { | |
959 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); | |
960 if (!pObject) | |
961 return FALSE; | |
962 | |
963 if (m_pDocument) { | |
964 CPDF_Dictionary* pRootDict = m_pDocument->GetRoot(); | |
965 if (pRootDict && pRootDict->GetObjNum() == pObject->m_ObjNum) { | |
966 // If |pObject| has an objnum assigned then this will leak as Release() | |
967 // will early exit. | |
968 if (pObject->IsStream()) | |
969 pObject->Release(); | |
970 return FALSE; | |
971 } | |
972 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(pObject->m_ObjNum, | |
973 pObject)) { | |
974 return FALSE; | |
975 } | |
976 } | |
977 | |
978 CPDF_Stream* pStream = pObject->AsStream(); | |
979 if (!pStream) | |
980 return FALSE; | |
981 | |
982 CPDF_Dictionary* pDict = pStream->GetDict(); | |
983 *pos = pDict->GetIntegerFor("Prev"); | |
984 int32_t size = pDict->GetIntegerFor("Size"); | |
985 if (size < 0) { | |
986 pStream->Release(); | |
987 return FALSE; | |
988 } | |
989 | |
990 CPDF_Dictionary* pNewTrailer = ToDictionary(pDict->Clone()); | |
991 if (bMainXRef) { | |
992 m_pTrailer = pNewTrailer; | |
993 ShrinkObjectMap(size); | |
994 for (auto& it : m_ObjectInfo) | |
995 it.second.type = 0; | |
996 } else { | |
997 m_Trailers.Add(pNewTrailer); | |
998 } | |
999 | |
1000 std::vector<std::pair<int32_t, int32_t>> arrIndex; | |
1001 CPDF_Array* pArray = pDict->GetArrayFor("Index"); | |
1002 if (pArray) { | |
1003 for (size_t i = 0; i < pArray->GetCount() / 2; i++) { | |
1004 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2); | |
1005 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1); | |
1006 | |
1007 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { | |
1008 int nStartNum = pStartNumObj->GetInteger(); | |
1009 int nCount = pCountObj->GetInteger(); | |
1010 if (nStartNum >= 0 && nCount > 0) | |
1011 arrIndex.push_back(std::make_pair(nStartNum, nCount)); | |
1012 } | |
1013 } | |
1014 } | |
1015 | |
1016 if (arrIndex.size() == 0) | |
1017 arrIndex.push_back(std::make_pair(0, size)); | |
1018 | |
1019 pArray = pDict->GetArrayFor("W"); | |
1020 if (!pArray) { | |
1021 pStream->Release(); | |
1022 return FALSE; | |
1023 } | |
1024 | |
1025 CFX_ArrayTemplate<uint32_t> WidthArray; | |
1026 FX_SAFE_UINT32 dwAccWidth = 0; | |
1027 for (size_t i = 0; i < pArray->GetCount(); ++i) { | |
1028 WidthArray.Add(pArray->GetIntegerAt(i)); | |
1029 dwAccWidth += WidthArray[i]; | |
1030 } | |
1031 | |
1032 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { | |
1033 pStream->Release(); | |
1034 return FALSE; | |
1035 } | |
1036 | |
1037 uint32_t totalWidth = dwAccWidth.ValueOrDie(); | |
1038 CPDF_StreamAcc acc; | |
1039 acc.LoadAllData(pStream); | |
1040 | |
1041 const uint8_t* pData = acc.GetData(); | |
1042 uint32_t dwTotalSize = acc.GetSize(); | |
1043 uint32_t segindex = 0; | |
1044 for (uint32_t i = 0; i < arrIndex.size(); i++) { | |
1045 int32_t startnum = arrIndex[i].first; | |
1046 if (startnum < 0) | |
1047 continue; | |
1048 | |
1049 m_dwXrefStartObjNum = | |
1050 pdfium::base::checked_cast<uint32_t, int32_t>(startnum); | |
1051 uint32_t count = | |
1052 pdfium::base::checked_cast<uint32_t, int32_t>(arrIndex[i].second); | |
1053 FX_SAFE_UINT32 dwCaculatedSize = segindex; | |
1054 dwCaculatedSize += count; | |
1055 dwCaculatedSize *= totalWidth; | |
1056 if (!dwCaculatedSize.IsValid() || | |
1057 dwCaculatedSize.ValueOrDie() > dwTotalSize) { | |
1058 continue; | |
1059 } | |
1060 | |
1061 const uint8_t* segstart = pData + segindex * totalWidth; | |
1062 FX_SAFE_UINT32 dwMaxObjNum = startnum; | |
1063 dwMaxObjNum += count; | |
1064 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; | |
1065 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) | |
1066 continue; | |
1067 | |
1068 for (uint32_t j = 0; j < count; j++) { | |
1069 int32_t type = 1; | |
1070 const uint8_t* entrystart = segstart + j * totalWidth; | |
1071 if (WidthArray[0]) | |
1072 type = GetVarInt(entrystart, WidthArray[0]); | |
1073 | |
1074 if (GetObjectType(startnum + j) == 255) { | |
1075 FX_FILESIZE offset = | |
1076 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
1077 m_ObjectInfo[startnum + j].pos = offset; | |
1078 m_SortedOffset.insert(offset); | |
1079 continue; | |
1080 } | |
1081 | |
1082 if (GetObjectType(startnum + j)) | |
1083 continue; | |
1084 | |
1085 m_ObjectInfo[startnum + j].type = type; | |
1086 if (type == 0) { | |
1087 m_ObjectInfo[startnum + j].pos = 0; | |
1088 } else { | |
1089 FX_FILESIZE offset = | |
1090 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
1091 m_ObjectInfo[startnum + j].pos = offset; | |
1092 if (type == 1) { | |
1093 m_SortedOffset.insert(offset); | |
1094 } else { | |
1095 if (offset < 0 || !IsValidObjectNumber(offset)) { | |
1096 pStream->Release(); | |
1097 return FALSE; | |
1098 } | |
1099 m_ObjectInfo[offset].type = 255; | |
1100 } | |
1101 } | |
1102 } | |
1103 segindex += count; | |
1104 } | |
1105 pStream->Release(); | |
1106 return TRUE; | |
1107 } | |
1108 | |
1109 CPDF_Array* CPDF_Parser::GetIDArray() { | |
1110 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetObjectFor("ID") : nullptr; | |
1111 if (!pID) | |
1112 return nullptr; | |
1113 | |
1114 if (CPDF_Reference* pRef = pID->AsReference()) { | |
1115 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); | |
1116 m_pTrailer->SetFor("ID", pID); | |
1117 } | |
1118 return ToArray(pID); | |
1119 } | |
1120 | |
1121 uint32_t CPDF_Parser::GetRootObjNum() { | |
1122 CPDF_Reference* pRef = | |
1123 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr); | |
1124 return pRef ? pRef->GetRefObjNum() : 0; | |
1125 } | |
1126 | |
1127 uint32_t CPDF_Parser::GetInfoObjNum() { | |
1128 CPDF_Reference* pRef = | |
1129 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr); | |
1130 return pRef ? pRef->GetRefObjNum() : 0; | |
1131 } | |
1132 | |
1133 CPDF_Object* CPDF_Parser::ParseIndirectObject( | |
1134 CPDF_IndirectObjectHolder* pObjList, | |
1135 uint32_t objnum) { | |
1136 if (!IsValidObjectNumber(objnum)) | |
1137 return nullptr; | |
1138 | |
1139 // Prevent circular parsing the same object. | |
1140 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) | |
1141 return nullptr; | |
1142 | |
1143 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum); | |
1144 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { | |
1145 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
1146 if (pos <= 0) | |
1147 return nullptr; | |
1148 return ParseIndirectObjectAt(pObjList, pos, objnum); | |
1149 } | |
1150 if (GetObjectType(objnum) != 2) | |
1151 return nullptr; | |
1152 | |
1153 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
1154 if (!pObjStream) | |
1155 return nullptr; | |
1156 | |
1157 ScopedFileStream file(FX_CreateMemoryStream( | |
1158 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); | |
1159 CPDF_SyntaxParser syntax; | |
1160 syntax.InitParser(file.get(), 0); | |
1161 const int32_t offset = GetStreamFirst(pObjStream); | |
1162 | |
1163 // Read object numbers from |pObjStream| into a cache. | |
1164 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { | |
1165 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { | |
1166 uint32_t thisnum = syntax.GetDirectNum(); | |
1167 uint32_t thisoff = syntax.GetDirectNum(); | |
1168 m_ObjCache[pObjStream][thisnum] = thisoff; | |
1169 } | |
1170 } | |
1171 | |
1172 const auto it = m_ObjCache[pObjStream].find(objnum); | |
1173 if (it == m_ObjCache[pObjStream].end()) | |
1174 return nullptr; | |
1175 | |
1176 syntax.RestorePos(offset + it->second); | |
1177 return syntax.GetObject(pObjList, 0, 0, true); | |
1178 } | |
1179 | |
1180 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) { | |
1181 auto it = m_ObjectStreamMap.find(objnum); | |
1182 if (it != m_ObjectStreamMap.end()) | |
1183 return it->second.get(); | |
1184 | |
1185 if (!m_pDocument) | |
1186 return nullptr; | |
1187 | |
1188 const CPDF_Stream* pStream = | |
1189 ToStream(m_pDocument->GetOrParseIndirectObject(objnum)); | |
1190 if (!pStream) | |
1191 return nullptr; | |
1192 | |
1193 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; | |
1194 pStreamAcc->LoadAllData(pStream); | |
1195 m_ObjectStreamMap[objnum].reset(pStreamAcc); | |
1196 return pStreamAcc; | |
1197 } | |
1198 | |
1199 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const { | |
1200 if (!IsValidObjectNumber(objnum)) | |
1201 return 0; | |
1202 | |
1203 if (GetObjectType(objnum) == 2) | |
1204 objnum = GetObjectPositionOrZero(objnum); | |
1205 | |
1206 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) | |
1207 return 0; | |
1208 | |
1209 FX_FILESIZE offset = GetObjectPositionOrZero(objnum); | |
1210 if (offset == 0) | |
1211 return 0; | |
1212 | |
1213 auto it = m_SortedOffset.find(offset); | |
1214 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) | |
1215 return 0; | |
1216 | |
1217 return *it - offset; | |
1218 } | |
1219 | |
1220 void CPDF_Parser::GetIndirectBinary(uint32_t objnum, | |
1221 uint8_t*& pBuffer, | |
1222 uint32_t& size) { | |
1223 pBuffer = nullptr; | |
1224 size = 0; | |
1225 if (!IsValidObjectNumber(objnum)) | |
1226 return; | |
1227 | |
1228 if (GetObjectType(objnum) == 2) { | |
1229 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
1230 if (!pObjStream) | |
1231 return; | |
1232 | |
1233 int32_t offset = GetStreamFirst(pObjStream); | |
1234 const uint8_t* pData = pObjStream->GetData(); | |
1235 uint32_t totalsize = pObjStream->GetSize(); | |
1236 ScopedFileStream file( | |
1237 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); | |
1238 | |
1239 CPDF_SyntaxParser syntax; | |
1240 syntax.InitParser(file.get(), 0); | |
1241 for (int i = GetStreamNCount(pObjStream); i > 0; --i) { | |
1242 uint32_t thisnum = syntax.GetDirectNum(); | |
1243 uint32_t thisoff = syntax.GetDirectNum(); | |
1244 if (thisnum != objnum) | |
1245 continue; | |
1246 | |
1247 if (i == 1) { | |
1248 size = totalsize - (thisoff + offset); | |
1249 } else { | |
1250 syntax.GetDirectNum(); // Skip nextnum. | |
1251 uint32_t nextoff = syntax.GetDirectNum(); | |
1252 size = nextoff - thisoff; | |
1253 } | |
1254 | |
1255 pBuffer = FX_Alloc(uint8_t, size); | |
1256 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); | |
1257 return; | |
1258 } | |
1259 return; | |
1260 } | |
1261 | |
1262 if (GetObjectType(objnum) != 1) | |
1263 return; | |
1264 | |
1265 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
1266 if (pos == 0) | |
1267 return; | |
1268 | |
1269 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1270 m_pSyntax->RestorePos(pos); | |
1271 | |
1272 bool bIsNumber; | |
1273 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1274 if (!bIsNumber) { | |
1275 m_pSyntax->RestorePos(SavedPos); | |
1276 return; | |
1277 } | |
1278 | |
1279 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
1280 if (parser_objnum && parser_objnum != objnum) { | |
1281 m_pSyntax->RestorePos(SavedPos); | |
1282 return; | |
1283 } | |
1284 | |
1285 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1286 if (!bIsNumber) { | |
1287 m_pSyntax->RestorePos(SavedPos); | |
1288 return; | |
1289 } | |
1290 | |
1291 if (m_pSyntax->GetKeyword() != "obj") { | |
1292 m_pSyntax->RestorePos(SavedPos); | |
1293 return; | |
1294 } | |
1295 | |
1296 auto it = m_SortedOffset.find(pos); | |
1297 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { | |
1298 m_pSyntax->RestorePos(SavedPos); | |
1299 return; | |
1300 } | |
1301 | |
1302 FX_FILESIZE nextoff = *it; | |
1303 FX_BOOL bNextOffValid = FALSE; | |
1304 if (nextoff != pos) { | |
1305 m_pSyntax->RestorePos(nextoff); | |
1306 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1307 if (word == "xref") { | |
1308 bNextOffValid = TRUE; | |
1309 } else if (bIsNumber) { | |
1310 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1311 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { | |
1312 bNextOffValid = TRUE; | |
1313 } | |
1314 } | |
1315 } | |
1316 | |
1317 if (!bNextOffValid) { | |
1318 m_pSyntax->RestorePos(pos); | |
1319 while (1) { | |
1320 if (m_pSyntax->GetKeyword() == "endobj") | |
1321 break; | |
1322 | |
1323 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) | |
1324 break; | |
1325 } | |
1326 nextoff = m_pSyntax->SavePos(); | |
1327 } | |
1328 | |
1329 size = (uint32_t)(nextoff - pos); | |
1330 pBuffer = FX_Alloc(uint8_t, size); | |
1331 m_pSyntax->RestorePos(pos); | |
1332 m_pSyntax->ReadBlock(pBuffer, size); | |
1333 m_pSyntax->RestorePos(SavedPos); | |
1334 } | |
1335 | |
1336 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( | |
1337 CPDF_IndirectObjectHolder* pObjList, | |
1338 FX_FILESIZE pos, | |
1339 uint32_t objnum) { | |
1340 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1341 m_pSyntax->RestorePos(pos); | |
1342 bool bIsNumber; | |
1343 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1344 if (!bIsNumber) { | |
1345 m_pSyntax->RestorePos(SavedPos); | |
1346 return nullptr; | |
1347 } | |
1348 | |
1349 FX_FILESIZE objOffset = m_pSyntax->SavePos(); | |
1350 objOffset -= word.GetLength(); | |
1351 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
1352 if (objnum && parser_objnum != objnum) { | |
1353 m_pSyntax->RestorePos(SavedPos); | |
1354 return nullptr; | |
1355 } | |
1356 | |
1357 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1358 if (!bIsNumber) { | |
1359 m_pSyntax->RestorePos(SavedPos); | |
1360 return nullptr; | |
1361 } | |
1362 | |
1363 uint32_t parser_gennum = FXSYS_atoui(word.c_str()); | |
1364 if (m_pSyntax->GetKeyword() != "obj") { | |
1365 m_pSyntax->RestorePos(SavedPos); | |
1366 return nullptr; | |
1367 } | |
1368 | |
1369 CPDF_Object* pObj = | |
1370 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); | |
1371 m_pSyntax->SavePos(); | |
1372 | |
1373 CFX_ByteString bsWord = m_pSyntax->GetKeyword(); | |
1374 if (bsWord == "endobj") | |
1375 m_pSyntax->SavePos(); | |
1376 | |
1377 m_pSyntax->RestorePos(SavedPos); | |
1378 if (pObj) { | |
1379 if (!objnum) | |
1380 pObj->m_ObjNum = parser_objnum; | |
1381 pObj->m_GenNum = parser_gennum; | |
1382 } | |
1383 return pObj; | |
1384 } | |
1385 | |
1386 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( | |
1387 CPDF_IndirectObjectHolder* pObjList, | |
1388 FX_FILESIZE pos, | |
1389 uint32_t objnum, | |
1390 FX_FILESIZE* pResultPos) { | |
1391 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1392 m_pSyntax->RestorePos(pos); | |
1393 | |
1394 bool bIsNumber; | |
1395 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1396 if (!bIsNumber) { | |
1397 m_pSyntax->RestorePos(SavedPos); | |
1398 return nullptr; | |
1399 } | |
1400 | |
1401 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
1402 if (objnum && parser_objnum != objnum) { | |
1403 m_pSyntax->RestorePos(SavedPos); | |
1404 return nullptr; | |
1405 } | |
1406 | |
1407 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1408 if (!bIsNumber) { | |
1409 m_pSyntax->RestorePos(SavedPos); | |
1410 return nullptr; | |
1411 } | |
1412 | |
1413 uint32_t gennum = FXSYS_atoui(word.c_str()); | |
1414 if (m_pSyntax->GetKeyword() != "obj") { | |
1415 m_pSyntax->RestorePos(SavedPos); | |
1416 return nullptr; | |
1417 } | |
1418 | |
1419 CPDF_Object* pObj = m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum); | |
1420 if (pResultPos) | |
1421 *pResultPos = m_pSyntax->m_Pos; | |
1422 | |
1423 m_pSyntax->RestorePos(SavedPos); | |
1424 return pObj; | |
1425 } | |
1426 | |
1427 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { | |
1428 if (m_pSyntax->GetKeyword() != "trailer") | |
1429 return nullptr; | |
1430 | |
1431 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( | |
1432 m_pSyntax->GetObject(m_pDocument, 0, 0, true)); | |
1433 if (!ToDictionary(pObj.get())) | |
1434 return nullptr; | |
1435 return pObj.release()->AsDictionary(); | |
1436 } | |
1437 | |
1438 uint32_t CPDF_Parser::GetPermissions() const { | |
1439 if (!m_pSecurityHandler) | |
1440 return 0xFFFFFFFF; | |
1441 | |
1442 uint32_t dwPermission = m_pSecurityHandler->GetPermissions(); | |
1443 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") { | |
1444 // See PDF Reference 1.7, page 123, table 3.20. | |
1445 dwPermission &= 0xFFFFFFFC; | |
1446 dwPermission |= 0xFFFFF0C0; | |
1447 } | |
1448 return dwPermission; | |
1449 } | |
1450 | |
1451 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, | |
1452 uint32_t offset) { | |
1453 m_pSyntax->InitParser(pFileAccess, offset); | |
1454 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); | |
1455 | |
1456 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1457 bool bIsNumber; | |
1458 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1459 if (!bIsNumber) | |
1460 return FALSE; | |
1461 | |
1462 uint32_t objnum = FXSYS_atoui(word.c_str()); | |
1463 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1464 if (!bIsNumber) | |
1465 return FALSE; | |
1466 | |
1467 uint32_t gennum = FXSYS_atoui(word.c_str()); | |
1468 if (m_pSyntax->GetKeyword() != "obj") { | |
1469 m_pSyntax->RestorePos(SavedPos); | |
1470 return FALSE; | |
1471 } | |
1472 | |
1473 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); | |
1474 if (!m_pLinearized) | |
1475 return FALSE; | |
1476 | |
1477 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); | |
1478 if (pDict && pDict->GetObjectFor("Linearized")) { | |
1479 m_pSyntax->GetNextWord(nullptr); | |
1480 | |
1481 CPDF_Object* pLen = pDict->GetObjectFor("L"); | |
1482 if (!pLen) { | |
1483 m_pLinearized->Release(); | |
1484 m_pLinearized = nullptr; | |
1485 return FALSE; | |
1486 } | |
1487 | |
1488 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) | |
1489 return FALSE; | |
1490 | |
1491 if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) | |
1492 m_dwFirstPageNo = pNo->GetInteger(); | |
1493 | |
1494 if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T"))) | |
1495 m_LastXRefOffset = pTable->GetInteger(); | |
1496 | |
1497 return TRUE; | |
1498 } | |
1499 m_pLinearized->Release(); | |
1500 m_pLinearized = nullptr; | |
1501 return FALSE; | |
1502 } | |
1503 | |
1504 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(IFX_FileRead* pFileAccess, | |
1505 CPDF_Document* pDocument) { | |
1506 ASSERT(!m_bHasParsed); | |
1507 | |
1508 m_bXRefStream = FALSE; | |
1509 m_LastXRefOffset = 0; | |
1510 m_bOwnFileRead = true; | |
1511 | |
1512 int32_t offset = GetHeaderOffset(pFileAccess); | |
1513 if (offset == -1) | |
1514 return FORMAT_ERROR; | |
1515 | |
1516 if (!IsLinearizedFile(pFileAccess, offset)) { | |
1517 m_pSyntax->m_pFileAccess = nullptr; | |
1518 return StartParse(pFileAccess, std::move(pDocument)); | |
1519 } | |
1520 m_bHasParsed = true; | |
1521 m_pDocument = pDocument; | |
1522 | |
1523 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); | |
1524 | |
1525 FX_BOOL bXRefRebuilt = FALSE; | |
1526 FX_BOOL bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE); | |
1527 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { | |
1528 if (!RebuildCrossRef()) | |
1529 return FORMAT_ERROR; | |
1530 | |
1531 bXRefRebuilt = TRUE; | |
1532 m_LastXRefOffset = 0; | |
1533 } | |
1534 | |
1535 if (bLoadV4) { | |
1536 m_pTrailer = LoadTrailerV4(); | |
1537 if (!m_pTrailer) | |
1538 return SUCCESS; | |
1539 | |
1540 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
1541 if (xrefsize > 0) | |
1542 ShrinkObjectMap(xrefsize); | |
1543 } | |
1544 | |
1545 Error eRet = SetEncryptHandler(); | |
1546 if (eRet != SUCCESS) | |
1547 return eRet; | |
1548 | |
1549 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); | |
1550 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
1551 if (bXRefRebuilt) | |
1552 return FORMAT_ERROR; | |
1553 | |
1554 ReleaseEncryptHandler(); | |
1555 if (!RebuildCrossRef()) | |
1556 return FORMAT_ERROR; | |
1557 | |
1558 eRet = SetEncryptHandler(); | |
1559 if (eRet != SUCCESS) | |
1560 return eRet; | |
1561 | |
1562 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); | |
1563 if (!m_pDocument->GetRoot()) | |
1564 return FORMAT_ERROR; | |
1565 } | |
1566 | |
1567 if (GetRootObjNum() == 0) { | |
1568 ReleaseEncryptHandler(); | |
1569 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
1570 return FORMAT_ERROR; | |
1571 | |
1572 eRet = SetEncryptHandler(); | |
1573 if (eRet != SUCCESS) | |
1574 return eRet; | |
1575 } | |
1576 | |
1577 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { | |
1578 if (CPDF_Reference* pMetadata = | |
1579 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"))) | |
1580 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
1581 } | |
1582 return SUCCESS; | |
1583 } | |
1584 | |
1585 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { | |
1586 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
1587 return FALSE; | |
1588 | |
1589 std::set<FX_FILESIZE> seen_xrefpos; | |
1590 while (xrefpos) { | |
1591 seen_xrefpos.insert(xrefpos); | |
1592 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
1593 return FALSE; | |
1594 | |
1595 // Check for circular references. | |
1596 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
1597 return FALSE; | |
1598 } | |
1599 m_ObjectStreamMap.clear(); | |
1600 m_bXRefStream = TRUE; | |
1601 return TRUE; | |
1602 } | |
1603 | |
1604 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { | |
1605 uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; | |
1606 m_pSyntax->m_MetadataObjnum = 0; | |
1607 if (m_pTrailer) { | |
1608 m_pTrailer->Release(); | |
1609 m_pTrailer = nullptr; | |
1610 } | |
1611 | |
1612 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); | |
1613 uint8_t ch = 0; | |
1614 uint32_t dwCount = 0; | |
1615 m_pSyntax->GetNextChar(ch); | |
1616 while (PDFCharIsWhitespace(ch)) { | |
1617 ++dwCount; | |
1618 if (m_pSyntax->m_FileLen >= | |
1619 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { | |
1620 break; | |
1621 } | |
1622 m_pSyntax->GetNextChar(ch); | |
1623 } | |
1624 m_LastXRefOffset += dwCount; | |
1625 m_ObjectStreamMap.clear(); | |
1626 m_ObjCache.clear(); | |
1627 | |
1628 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && | |
1629 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { | |
1630 m_LastXRefOffset = 0; | |
1631 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
1632 return FORMAT_ERROR; | |
1633 } | |
1634 | |
1635 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
1636 return SUCCESS; | |
1637 } | |
OLD | NEW |