OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "core/include/fpdfapi/cpdf_parser.h" | |
8 | |
9 #include "core/include/fxcrt/fx_safe_types.h" | |
10 #include "core/include/fpdfapi/cpdf_document.h" | |
dsinclair
2016/03/09 15:24:57
nit: Sorting
Tom Sepez
2016/03/09 18:39:29
Done.
| |
11 #include "core/include/fpdfapi/fpdf_parser.h" | |
12 #include "core/include/fxcrt/fx_ext.h" | |
13 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
14 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" | |
15 #include "third_party/base/stl_util.h" | |
16 | |
17 namespace { | |
18 | |
19 // A limit on the size of the xref table. Theoretical limits are higher, but | |
20 // this may be large enough in practice. | |
21 const int32_t kMaxXRefSize = 1048576; | |
22 | |
23 // A limit on the maximum object number in the xref table. Theoretical limits | |
24 // are higher, but this may be large enough in practice. | |
25 const FX_DWORD kMaxObjectNumber = 1048576; | |
26 | |
27 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { | |
28 FX_DWORD result = 0; | |
29 for (int32_t i = 0; i < n; ++i) | |
30 result = result * 256 + p[i]; | |
31 return result; | |
32 } | |
33 | |
34 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { | |
35 return pObjStream->GetDict()->GetIntegerBy("N"); | |
36 } | |
37 | |
38 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { | |
39 return pObjStream->GetDict()->GetIntegerBy("First"); | |
40 } | |
41 | |
42 } // namespace | |
43 | |
44 CPDF_Parser::CPDF_Parser() | |
45 : m_pDocument(nullptr), | |
46 m_bOwnFileRead(true), | |
47 m_FileVersion(0), | |
48 m_pTrailer(nullptr), | |
49 m_pEncryptDict(nullptr), | |
50 m_pLinearized(nullptr), | |
51 m_dwFirstPageNo(0), | |
52 m_dwXrefStartObjNum(0) { | |
53 m_pSyntax.reset(new CPDF_SyntaxParser); | |
54 } | |
55 | |
56 CPDF_Parser::~CPDF_Parser() { | |
57 CloseParser(); | |
58 } | |
59 | |
60 FX_DWORD CPDF_Parser::GetLastObjNum() const { | |
61 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; | |
62 } | |
63 | |
64 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { | |
65 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; | |
66 } | |
67 | |
68 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { | |
69 auto it = m_ObjectInfo.find(objnum); | |
70 return it != m_ObjectInfo.end() ? it->second.pos : 0; | |
71 } | |
72 | |
73 uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const { | |
74 ASSERT(IsValidObjectNumber(objnum)); | |
75 auto it = m_ObjectInfo.find(objnum); | |
76 return it != m_ObjectInfo.end() ? it->second.type : 0; | |
77 } | |
78 | |
79 uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const { | |
80 ASSERT(IsValidObjectNumber(objnum)); | |
81 auto it = m_ObjectInfo.find(objnum); | |
82 return it != m_ObjectInfo.end() ? it->second.gennum : 0; | |
83 } | |
84 | |
85 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { | |
86 uint8_t type = GetObjectType(objnum); | |
87 return type == 0 || type == 255; | |
88 } | |
89 | |
90 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { | |
91 m_pEncryptDict = pDict; | |
92 } | |
93 | |
94 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { | |
95 return m_pSyntax->m_pCryptoHandler.get(); | |
96 } | |
97 | |
98 IFX_FileRead* CPDF_Parser::GetFileAccess() const { | |
99 return m_pSyntax->m_pFileAccess; | |
100 } | |
101 | |
102 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { | |
103 if (objnum == 0) { | |
104 m_ObjectInfo.clear(); | |
105 return; | |
106 } | |
107 | |
108 auto it = m_ObjectInfo.lower_bound(objnum); | |
109 while (it != m_ObjectInfo.end()) { | |
110 auto saved_it = it++; | |
111 m_ObjectInfo.erase(saved_it); | |
112 } | |
113 | |
114 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) | |
115 m_ObjectInfo[objnum - 1].pos = 0; | |
116 } | |
117 | |
118 void CPDF_Parser::CloseParser() { | |
119 m_bVersionUpdated = FALSE; | |
120 delete m_pDocument; | |
121 m_pDocument = nullptr; | |
122 | |
123 if (m_pTrailer) { | |
124 m_pTrailer->Release(); | |
125 m_pTrailer = nullptr; | |
126 } | |
127 ReleaseEncryptHandler(); | |
128 SetEncryptDictionary(nullptr); | |
129 | |
130 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { | |
131 m_pSyntax->m_pFileAccess->Release(); | |
132 m_pSyntax->m_pFileAccess = nullptr; | |
133 } | |
134 | |
135 m_ObjectStreamMap.clear(); | |
136 m_ObjCache.clear(); | |
137 m_SortedOffset.clear(); | |
138 m_ObjectInfo.clear(); | |
139 | |
140 int32_t iLen = m_Trailers.GetSize(); | |
141 for (int32_t i = 0; i < iLen; ++i) { | |
142 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) | |
143 trailer->Release(); | |
144 } | |
145 m_Trailers.RemoveAll(); | |
146 | |
147 if (m_pLinearized) { | |
148 m_pLinearized->Release(); | |
149 m_pLinearized = nullptr; | |
150 } | |
151 } | |
152 | |
153 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) { | |
154 CloseParser(); | |
155 | |
156 m_bXRefStream = FALSE; | |
157 m_LastXRefOffset = 0; | |
158 m_bOwnFileRead = true; | |
159 | |
160 int32_t offset = GetHeaderOffset(pFileAccess); | |
161 if (offset == -1) { | |
162 if (pFileAccess) | |
163 pFileAccess->Release(); | |
164 return FORMAT_ERROR; | |
165 } | |
166 m_pSyntax->InitParser(pFileAccess, offset); | |
167 | |
168 uint8_t ch; | |
169 if (!m_pSyntax->GetCharAt(5, ch)) | |
170 return FORMAT_ERROR; | |
171 if (std::isdigit(ch)) | |
172 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; | |
173 | |
174 if (!m_pSyntax->GetCharAt(7, ch)) | |
175 return FORMAT_ERROR; | |
176 if (std::isdigit(ch)) | |
177 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
178 | |
179 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) | |
180 return FORMAT_ERROR; | |
181 | |
182 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); | |
183 m_pDocument = new CPDF_Document(this); | |
184 | |
185 FX_BOOL bXRefRebuilt = FALSE; | |
186 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { | |
187 m_SortedOffset.insert(m_pSyntax->SavePos()); | |
188 m_pSyntax->GetKeyword(); | |
189 | |
190 bool bNumber; | |
191 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); | |
192 if (!bNumber) | |
193 return FORMAT_ERROR; | |
194 | |
195 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); | |
196 if (!LoadAllCrossRefV4(m_LastXRefOffset) && | |
197 !LoadAllCrossRefV5(m_LastXRefOffset)) { | |
198 if (!RebuildCrossRef()) | |
199 return FORMAT_ERROR; | |
200 | |
201 bXRefRebuilt = TRUE; | |
202 m_LastXRefOffset = 0; | |
203 } | |
204 } else { | |
205 if (!RebuildCrossRef()) | |
206 return FORMAT_ERROR; | |
207 | |
208 bXRefRebuilt = TRUE; | |
209 } | |
210 Error eRet = SetEncryptHandler(); | |
211 if (eRet != SUCCESS) | |
212 return eRet; | |
213 | |
214 m_pDocument->LoadDoc(); | |
215 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
216 if (bXRefRebuilt) | |
217 return FORMAT_ERROR; | |
218 | |
219 ReleaseEncryptHandler(); | |
220 if (!RebuildCrossRef()) | |
221 return FORMAT_ERROR; | |
222 | |
223 eRet = SetEncryptHandler(); | |
224 if (eRet != SUCCESS) | |
225 return eRet; | |
226 | |
227 m_pDocument->LoadDoc(); | |
228 if (!m_pDocument->GetRoot()) | |
229 return FORMAT_ERROR; | |
230 } | |
231 if (GetRootObjNum() == 0) { | |
232 ReleaseEncryptHandler(); | |
233 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
234 return FORMAT_ERROR; | |
235 | |
236 eRet = SetEncryptHandler(); | |
237 if (eRet != SUCCESS) | |
238 return eRet; | |
239 } | |
240 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { | |
241 CPDF_Reference* pMetadata = | |
242 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); | |
243 if (pMetadata) | |
244 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
245 } | |
246 return SUCCESS; | |
247 } | |
248 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { | |
249 ReleaseEncryptHandler(); | |
250 SetEncryptDictionary(nullptr); | |
251 | |
252 if (!m_pTrailer) | |
253 return FORMAT_ERROR; | |
254 | |
255 CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); | |
256 if (pEncryptObj) { | |
257 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { | |
258 SetEncryptDictionary(pEncryptDict); | |
259 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { | |
260 pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); | |
261 if (pEncryptObj) | |
262 SetEncryptDictionary(pEncryptObj->GetDict()); | |
263 } | |
264 } | |
265 | |
266 if (m_pEncryptDict) { | |
267 CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter"); | |
268 std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler; | |
269 Error err = HANDLER_ERROR; | |
270 if (filter == "Standard") { | |
271 pSecurityHandler.reset(new CPDF_StandardSecurityHandler); | |
272 err = PASSWORD_ERROR; | |
273 } | |
274 if (!pSecurityHandler) | |
275 return HANDLER_ERROR; | |
276 | |
277 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) | |
278 return err; | |
279 | |
280 m_pSecurityHandler = std::move(pSecurityHandler); | |
281 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( | |
282 m_pSecurityHandler->CreateCryptoHandler()); | |
283 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) | |
284 return HANDLER_ERROR; | |
285 m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); | |
286 } | |
287 return SUCCESS; | |
288 } | |
289 | |
290 void CPDF_Parser::ReleaseEncryptHandler() { | |
291 m_pSyntax->m_pCryptoHandler.reset(); | |
292 m_pSecurityHandler.reset(); | |
293 } | |
294 | |
295 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { | |
296 if (!IsValidObjectNumber(objnum)) | |
297 return 0; | |
298 | |
299 if (GetObjectType(objnum) == 1) | |
300 return GetObjectPositionOrZero(objnum); | |
301 | |
302 if (GetObjectType(objnum) == 2) { | |
303 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); | |
304 return GetObjectPositionOrZero(pos); | |
305 } | |
306 return 0; | |
307 } | |
308 | |
309 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { | |
310 if (!LoadCrossRefV4(xrefpos, 0, TRUE)) | |
311 return FALSE; | |
312 | |
313 m_pTrailer = LoadTrailerV4(); | |
314 if (!m_pTrailer) | |
315 return FALSE; | |
316 | |
317 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
318 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) | |
319 ShrinkObjectMap(xrefsize); | |
320 | |
321 std::vector<FX_FILESIZE> CrossRefList; | |
322 std::vector<FX_FILESIZE> XRefStreamList; | |
323 std::set<FX_FILESIZE> seen_xrefpos; | |
324 | |
325 CrossRefList.push_back(xrefpos); | |
326 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
327 seen_xrefpos.insert(xrefpos); | |
328 | |
329 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not | |
330 // numerical, GetDirectInteger() returns 0. Loading will end. | |
331 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
332 while (xrefpos) { | |
333 // Check for circular references. | |
334 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
335 return FALSE; | |
336 | |
337 seen_xrefpos.insert(xrefpos); | |
338 | |
339 // SLOW ... | |
340 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
341 LoadCrossRefV4(xrefpos, 0, TRUE); | |
342 | |
343 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
344 LoadTrailerV4()); | |
345 if (!pDict) | |
346 return FALSE; | |
347 | |
348 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
349 | |
350 // SLOW ... | |
351 XRefStreamList.insert(XRefStreamList.begin(), | |
352 pDict->GetIntegerBy("XRefStm")); | |
353 m_Trailers.Add(pDict.release()); | |
354 } | |
355 | |
356 for (size_t i = 0; i < CrossRefList.size(); ++i) { | |
357 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
358 return FALSE; | |
359 } | |
360 return TRUE; | |
361 } | |
362 | |
363 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, | |
364 FX_DWORD dwObjCount) { | |
365 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) | |
366 return FALSE; | |
367 | |
368 m_pTrailer = LoadTrailerV4(); | |
369 if (!m_pTrailer) | |
370 return FALSE; | |
371 | |
372 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
373 if (xrefsize == 0) | |
374 return FALSE; | |
375 | |
376 std::vector<FX_FILESIZE> CrossRefList; | |
377 std::vector<FX_FILESIZE> XRefStreamList; | |
378 std::set<FX_FILESIZE> seen_xrefpos; | |
379 | |
380 CrossRefList.push_back(xrefpos); | |
381 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
382 seen_xrefpos.insert(xrefpos); | |
383 | |
384 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
385 while (xrefpos) { | |
386 // Check for circular references. | |
387 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
388 return FALSE; | |
389 | |
390 seen_xrefpos.insert(xrefpos); | |
391 | |
392 // SLOW ... | |
393 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
394 LoadCrossRefV4(xrefpos, 0, TRUE); | |
395 | |
396 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
397 LoadTrailerV4()); | |
398 if (!pDict) | |
399 return FALSE; | |
400 | |
401 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
402 | |
403 // SLOW ... | |
404 XRefStreamList.insert(XRefStreamList.begin(), | |
405 pDict->GetIntegerBy("XRefStm")); | |
406 m_Trailers.Add(pDict.release()); | |
407 } | |
408 | |
409 for (size_t i = 1; i < CrossRefList.size(); ++i) { | |
410 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
411 return FALSE; | |
412 } | |
413 return TRUE; | |
414 } | |
415 | |
416 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, | |
417 FX_DWORD dwObjCount) { | |
418 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; | |
419 | |
420 m_pSyntax->RestorePos(dwStartPos); | |
421 m_SortedOffset.insert(pos); | |
422 | |
423 FX_DWORD start_objnum = 0; | |
424 FX_DWORD count = dwObjCount; | |
425 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
426 | |
427 const int32_t recordsize = 20; | |
428 std::vector<char> buf(1024 * recordsize + 1); | |
429 buf[1024 * recordsize] = '\0'; | |
430 | |
431 int32_t nBlocks = count / 1024 + 1; | |
432 for (int32_t block = 0; block < nBlocks; block++) { | |
433 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
434 FX_DWORD dwReadSize = block_size * recordsize; | |
435 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) | |
436 return FALSE; | |
437 | |
438 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
439 dwReadSize)) { | |
440 return FALSE; | |
441 } | |
442 | |
443 for (int32_t i = 0; i < block_size; i++) { | |
444 FX_DWORD objnum = start_objnum + block * 1024 + i; | |
445 char* pEntry = &buf[i * recordsize]; | |
446 if (pEntry[17] == 'f') { | |
447 m_ObjectInfo[objnum].pos = 0; | |
448 m_ObjectInfo[objnum].type = 0; | |
449 } else { | |
450 int32_t offset = FXSYS_atoi(pEntry); | |
451 if (offset == 0) { | |
452 for (int32_t c = 0; c < 10; c++) { | |
453 if (!std::isdigit(pEntry[c])) | |
454 return FALSE; | |
455 } | |
456 } | |
457 | |
458 m_ObjectInfo[objnum].pos = offset; | |
459 int32_t version = FXSYS_atoi(pEntry + 11); | |
460 if (version >= 1) | |
461 m_bVersionUpdated = TRUE; | |
462 | |
463 m_ObjectInfo[objnum].gennum = version; | |
464 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
465 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
466 | |
467 m_ObjectInfo[objnum].type = 1; | |
468 } | |
469 } | |
470 } | |
471 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
472 return TRUE; | |
473 } | |
474 | |
475 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, | |
476 FX_FILESIZE streampos, | |
477 FX_BOOL bSkip) { | |
478 m_pSyntax->RestorePos(pos); | |
479 if (m_pSyntax->GetKeyword() != "xref") | |
480 return false; | |
481 | |
482 m_SortedOffset.insert(pos); | |
483 if (streampos) | |
484 m_SortedOffset.insert(streampos); | |
485 | |
486 while (1) { | |
487 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
488 bool bIsNumber; | |
489 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
490 if (word.IsEmpty()) | |
491 return false; | |
492 | |
493 if (!bIsNumber) { | |
494 m_pSyntax->RestorePos(SavedPos); | |
495 break; | |
496 } | |
497 | |
498 FX_DWORD start_objnum = FXSYS_atoui(word); | |
499 if (start_objnum >= kMaxObjectNumber) | |
500 return false; | |
501 | |
502 FX_DWORD count = m_pSyntax->GetDirectNum(); | |
503 m_pSyntax->ToNextWord(); | |
504 SavedPos = m_pSyntax->SavePos(); | |
505 const int32_t recordsize = 20; | |
506 | |
507 m_dwXrefStartObjNum = start_objnum; | |
508 if (!bSkip) { | |
509 std::vector<char> buf(1024 * recordsize + 1); | |
510 buf[1024 * recordsize] = '\0'; | |
511 | |
512 int32_t nBlocks = count / 1024 + 1; | |
513 for (int32_t block = 0; block < nBlocks; block++) { | |
514 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
515 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
516 block_size * recordsize); | |
517 | |
518 for (int32_t i = 0; i < block_size; i++) { | |
519 FX_DWORD objnum = start_objnum + block * 1024 + i; | |
520 char* pEntry = &buf[i * recordsize]; | |
521 if (pEntry[17] == 'f') { | |
522 m_ObjectInfo[objnum].pos = 0; | |
523 m_ObjectInfo[objnum].type = 0; | |
524 } else { | |
525 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); | |
526 if (offset == 0) { | |
527 for (int32_t c = 0; c < 10; c++) { | |
528 if (!std::isdigit(pEntry[c])) | |
529 return false; | |
530 } | |
531 } | |
532 | |
533 m_ObjectInfo[objnum].pos = offset; | |
534 int32_t version = FXSYS_atoi(pEntry + 11); | |
535 if (version >= 1) | |
536 m_bVersionUpdated = TRUE; | |
537 | |
538 m_ObjectInfo[objnum].gennum = version; | |
539 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
540 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
541 | |
542 m_ObjectInfo[objnum].type = 1; | |
543 } | |
544 } | |
545 } | |
546 } | |
547 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
548 } | |
549 return !streampos || LoadCrossRefV5(&streampos, FALSE); | |
550 } | |
551 | |
552 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { | |
553 if (!LoadCrossRefV5(&xrefpos, TRUE)) | |
554 return FALSE; | |
555 | |
556 std::set<FX_FILESIZE> seen_xrefpos; | |
557 while (xrefpos) { | |
558 seen_xrefpos.insert(xrefpos); | |
559 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
560 return FALSE; | |
561 | |
562 // Check for circular references. | |
563 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
564 return FALSE; | |
565 } | |
566 m_ObjectStreamMap.clear(); | |
567 m_bXRefStream = TRUE; | |
568 return TRUE; | |
569 } | |
570 | |
571 FX_BOOL CPDF_Parser::RebuildCrossRef() { | |
572 m_ObjectInfo.clear(); | |
573 m_SortedOffset.clear(); | |
574 if (m_pTrailer) { | |
575 m_pTrailer->Release(); | |
576 m_pTrailer = nullptr; | |
577 } | |
578 | |
579 ParserState state = ParserState::kDefault; | |
580 | |
581 int32_t inside_index = 0; | |
582 FX_DWORD objnum = 0; | |
583 FX_DWORD gennum = 0; | |
584 int32_t depth = 0; | |
585 | |
586 const FX_DWORD kBufferSize = 4096; | |
587 std::vector<uint8_t> buffer(kBufferSize); | |
588 | |
589 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; | |
590 FX_FILESIZE start_pos = 0; | |
591 FX_FILESIZE start_pos1 = 0; | |
592 FX_FILESIZE last_obj = -1; | |
593 FX_FILESIZE last_xref = -1; | |
594 FX_FILESIZE last_trailer = -1; | |
595 | |
596 while (pos < m_pSyntax->m_FileLen) { | |
597 const FX_FILESIZE saved_pos = pos; | |
598 bool bOverFlow = false; | |
599 FX_DWORD size = | |
600 std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize); | |
601 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | |
602 break; | |
603 | |
604 for (FX_DWORD i = 0; i < size; i++) { | |
605 uint8_t byte = buffer[i]; | |
606 switch (state) { | |
607 case ParserState::kDefault: | |
608 if (PDFCharIsWhitespace(byte)) { | |
609 state = ParserState::kWhitespace; | |
610 } else if (std::isdigit(byte)) { | |
611 --i; | |
612 state = ParserState::kWhitespace; | |
613 } else if (byte == '%') { | |
614 inside_index = 0; | |
615 state = ParserState::kComment; | |
616 } else if (byte == '(') { | |
617 state = ParserState::kString; | |
618 depth = 1; | |
619 } else if (byte == '<') { | |
620 inside_index = 1; | |
621 state = ParserState::kHexString; | |
622 } else if (byte == '\\') { | |
623 state = ParserState::kEscapedString; | |
624 } else if (byte == 't') { | |
625 state = ParserState::kTrailer; | |
626 inside_index = 1; | |
627 } | |
628 break; | |
629 | |
630 case ParserState::kWhitespace: | |
631 if (std::isdigit(byte)) { | |
632 start_pos = pos + i; | |
633 state = ParserState::kObjNum; | |
634 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
635 } else if (byte == 't') { | |
636 state = ParserState::kTrailer; | |
637 inside_index = 1; | |
638 } else if (byte == 'x') { | |
639 state = ParserState::kXref; | |
640 inside_index = 1; | |
641 } else if (!PDFCharIsWhitespace(byte)) { | |
642 --i; | |
643 state = ParserState::kDefault; | |
644 } | |
645 break; | |
646 | |
647 case ParserState::kObjNum: | |
648 if (std::isdigit(byte)) { | |
649 objnum = | |
650 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
651 } else if (PDFCharIsWhitespace(byte)) { | |
652 state = ParserState::kPostObjNum; | |
653 } else { | |
654 --i; | |
655 state = ParserState::kEndObj; | |
656 inside_index = 0; | |
657 } | |
658 break; | |
659 | |
660 case ParserState::kPostObjNum: | |
661 if (std::isdigit(byte)) { | |
662 start_pos1 = pos + i; | |
663 state = ParserState::kGenNum; | |
664 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
665 } else if (byte == 't') { | |
666 state = ParserState::kTrailer; | |
667 inside_index = 1; | |
668 } else if (!PDFCharIsWhitespace(byte)) { | |
669 --i; | |
670 state = ParserState::kDefault; | |
671 } | |
672 break; | |
673 | |
674 case ParserState::kGenNum: | |
675 if (std::isdigit(byte)) { | |
676 gennum = | |
677 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
678 } else if (PDFCharIsWhitespace(byte)) { | |
679 state = ParserState::kPostGenNum; | |
680 } else { | |
681 --i; | |
682 state = ParserState::kDefault; | |
683 } | |
684 break; | |
685 | |
686 case ParserState::kPostGenNum: | |
687 if (byte == 'o') { | |
688 state = ParserState::kBeginObj; | |
689 inside_index = 1; | |
690 } else if (std::isdigit(byte)) { | |
691 objnum = gennum; | |
692 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
693 start_pos = start_pos1; | |
694 start_pos1 = pos + i; | |
695 state = ParserState::kGenNum; | |
696 } else if (byte == 't') { | |
697 state = ParserState::kTrailer; | |
698 inside_index = 1; | |
699 } else if (!PDFCharIsWhitespace(byte)) { | |
700 --i; | |
701 state = ParserState::kDefault; | |
702 } | |
703 break; | |
704 | |
705 case ParserState::kBeginObj: | |
706 switch (inside_index) { | |
707 case 1: | |
708 if (byte != 'b') { | |
709 --i; | |
710 state = ParserState::kDefault; | |
711 } else { | |
712 inside_index++; | |
713 } | |
714 break; | |
715 case 2: | |
716 if (byte != 'j') { | |
717 --i; | |
718 state = ParserState::kDefault; | |
719 } else { | |
720 inside_index++; | |
721 } | |
722 break; | |
723 case 3: | |
724 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
725 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; | |
726 m_SortedOffset.insert(obj_pos); | |
727 last_obj = start_pos; | |
728 FX_FILESIZE obj_end = 0; | |
729 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | |
730 m_pDocument, obj_pos, objnum, &obj_end); | |
731 if (CPDF_Stream* pStream = ToStream(pObject)) { | |
732 if (CPDF_Dictionary* pDict = pStream->GetDict()) { | |
733 if ((pDict->KeyExist("Type")) && | |
734 (pDict->GetStringBy("Type") == "XRef" && | |
735 pDict->KeyExist("Size"))) { | |
736 CPDF_Object* pRoot = pDict->GetElement("Root"); | |
737 if (pRoot && pRoot->GetDict() && | |
738 pRoot->GetDict()->GetElement("Pages")) { | |
739 if (m_pTrailer) | |
740 m_pTrailer->Release(); | |
741 m_pTrailer = ToDictionary(pDict->Clone()); | |
742 } | |
743 } | |
744 } | |
745 } | |
746 | |
747 FX_FILESIZE offset = 0; | |
748 m_pSyntax->RestorePos(obj_pos); | |
749 offset = m_pSyntax->FindTag("obj", 0); | |
750 if (offset == -1) | |
751 offset = 0; | |
752 else | |
753 offset += 3; | |
754 | |
755 FX_FILESIZE nLen = obj_end - obj_pos - offset; | |
756 if ((FX_DWORD)nLen > size - i) { | |
757 pos = obj_end + m_pSyntax->m_HeaderOffset; | |
758 bOverFlow = true; | |
759 } else { | |
760 i += (FX_DWORD)nLen; | |
761 } | |
762 | |
763 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && | |
764 m_ObjectInfo[objnum].pos) { | |
765 if (pObject) { | |
766 FX_DWORD oldgen = GetObjectGenNum(objnum); | |
767 m_ObjectInfo[objnum].pos = obj_pos; | |
768 m_ObjectInfo[objnum].gennum = gennum; | |
769 if (oldgen != gennum) | |
770 m_bVersionUpdated = TRUE; | |
771 } | |
772 } else { | |
773 m_ObjectInfo[objnum].pos = obj_pos; | |
774 m_ObjectInfo[objnum].type = 1; | |
775 m_ObjectInfo[objnum].gennum = gennum; | |
776 } | |
777 | |
778 if (pObject) | |
779 pObject->Release(); | |
780 } | |
781 --i; | |
782 state = ParserState::kDefault; | |
783 break; | |
784 } | |
785 break; | |
786 | |
787 case ParserState::kTrailer: | |
788 if (inside_index == 7) { | |
789 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
790 last_trailer = pos + i - 7; | |
791 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); | |
792 | |
793 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); | |
794 if (pObj) { | |
795 if (!pObj->IsDictionary() && !pObj->AsStream()) { | |
796 pObj->Release(); | |
797 } else { | |
798 CPDF_Stream* pStream = pObj->AsStream(); | |
799 if (CPDF_Dictionary* pTrailer = | |
800 pStream ? pStream->GetDict() : pObj->AsDictionary()) { | |
801 if (m_pTrailer) { | |
802 CPDF_Object* pRoot = pTrailer->GetElement("Root"); | |
803 CPDF_Reference* pRef = ToReference(pRoot); | |
804 if (!pRoot || | |
805 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && | |
806 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { | |
807 auto it = pTrailer->begin(); | |
808 while (it != pTrailer->end()) { | |
809 const CFX_ByteString& key = it->first; | |
810 CPDF_Object* pElement = it->second; | |
811 ++it; | |
812 FX_DWORD dwObjNum = | |
813 pElement ? pElement->GetObjNum() : 0; | |
814 if (dwObjNum) { | |
815 m_pTrailer->SetAtReference(key, m_pDocument, | |
816 dwObjNum); | |
817 } else { | |
818 m_pTrailer->SetAt(key, pElement->Clone()); | |
819 } | |
820 } | |
821 } | |
822 pObj->Release(); | |
823 } else { | |
824 if (pObj->IsStream()) { | |
825 m_pTrailer = ToDictionary(pTrailer->Clone()); | |
826 pObj->Release(); | |
827 } else { | |
828 m_pTrailer = pTrailer; | |
829 } | |
830 | |
831 FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); | |
832 CFX_ByteString strWord = m_pSyntax->GetKeyword(); | |
833 if (!strWord.Compare("startxref")) { | |
834 bool bNumber; | |
835 CFX_ByteString bsOffset = | |
836 m_pSyntax->GetNextWord(&bNumber); | |
837 if (bNumber) | |
838 m_LastXRefOffset = FXSYS_atoi(bsOffset); | |
839 } | |
840 m_pSyntax->RestorePos(dwSavePos); | |
841 } | |
842 } else { | |
843 pObj->Release(); | |
844 } | |
845 } | |
846 } | |
847 } | |
848 --i; | |
849 state = ParserState::kDefault; | |
850 } else if (byte == "trailer"[inside_index]) { | |
851 inside_index++; | |
852 } else { | |
853 --i; | |
854 state = ParserState::kDefault; | |
855 } | |
856 break; | |
857 | |
858 case ParserState::kXref: | |
859 if (inside_index == 4) { | |
860 last_xref = pos + i - 4; | |
861 state = ParserState::kWhitespace; | |
862 } else if (byte == "xref"[inside_index]) { | |
863 inside_index++; | |
864 } else { | |
865 --i; | |
866 state = ParserState::kDefault; | |
867 } | |
868 break; | |
869 | |
870 case ParserState::kComment: | |
871 if (byte == '\r' || byte == '\n') | |
dsinclair
2016/03/09 15:24:57
PDFCharIsLineEnding(byte)
Tom Sepez
2016/03/09 18:39:29
Much better. Done.
| |
872 state = ParserState::kDefault; | |
873 break; | |
874 | |
875 case ParserState::kString: | |
876 if (byte == ')') { | |
877 if (depth > 0) | |
878 depth--; | |
879 } else if (byte == '(') { | |
880 depth++; | |
881 } | |
882 | |
883 if (!depth) | |
884 state = ParserState::kDefault; | |
885 break; | |
886 | |
887 case ParserState::kHexString: | |
888 if (byte == '>' || (byte == '<' && inside_index == 1)) | |
889 state = ParserState::kDefault; | |
890 inside_index = 0; | |
891 break; | |
892 | |
893 case ParserState::kEscapedString: | |
894 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | |
895 --i; | |
896 state = ParserState::kDefault; | |
897 } | |
898 break; | |
899 | |
900 case ParserState::kEndObj: | |
901 if (PDFCharIsWhitespace(byte)) { | |
902 state = ParserState::kDefault; | |
903 } else if (byte == '%' || byte == '(' || byte == '<' || | |
904 byte == '\\') { | |
905 state = ParserState::kDefault; | |
906 --i; | |
907 } else if (inside_index == 6) { | |
908 state = ParserState::kDefault; | |
909 --i; | |
910 } else if (byte == "endobj"[inside_index]) { | |
911 inside_index++; | |
912 } | |
913 break; | |
914 } | |
915 | |
916 if (bOverFlow) { | |
917 size = 0; | |
918 break; | |
919 } | |
920 } | |
921 pos += size; | |
922 | |
923 // If the position has not changed at all in a loop iteration, then break | |
924 // out to prevent infinite looping. | |
925 if (pos == saved_pos) | |
926 break; | |
927 } | |
928 | |
929 if (last_xref != -1 && last_xref > last_obj) | |
930 last_trailer = last_xref; | |
931 else if (last_trailer == -1 || last_xref < last_obj) | |
932 last_trailer = m_pSyntax->m_FileLen; | |
933 | |
934 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); | |
935 return m_pTrailer && !m_ObjectInfo.empty(); | |
936 } | |
937 | |
938 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { | |
939 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); | |
940 if (!pObject) | |
941 return FALSE; | |
942 | |
943 if (m_pDocument) { | |
944 FX_BOOL bInserted = FALSE; | |
945 CPDF_Dictionary* pDict = m_pDocument->GetRoot(); | |
946 if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { | |
947 bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); | |
948 } else { | |
949 if (pObject->IsStream()) | |
950 pObject->Release(); | |
951 } | |
952 | |
953 if (!bInserted) | |
954 return FALSE; | |
955 } | |
956 | |
957 CPDF_Stream* pStream = pObject->AsStream(); | |
958 if (!pStream) | |
959 return FALSE; | |
960 | |
961 *pos = pStream->GetDict()->GetIntegerBy("Prev"); | |
962 int32_t size = pStream->GetDict()->GetIntegerBy("Size"); | |
963 if (size < 0) { | |
964 pStream->Release(); | |
965 return FALSE; | |
966 } | |
967 | |
968 if (bMainXRef) { | |
969 m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); | |
970 ShrinkObjectMap(size); | |
971 for (auto& it : m_ObjectInfo) | |
972 it.second.type = 0; | |
973 } else { | |
974 m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); | |
975 } | |
976 | |
977 std::vector<std::pair<int32_t, int32_t>> arrIndex; | |
978 CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index"); | |
979 if (pArray) { | |
980 FX_DWORD nPairSize = pArray->GetCount() / 2; | |
981 for (FX_DWORD i = 0; i < nPairSize; i++) { | |
982 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); | |
983 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); | |
984 | |
985 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { | |
986 int nStartNum = pStartNumObj->GetInteger(); | |
987 int nCount = pCountObj->GetInteger(); | |
988 if (nStartNum >= 0 && nCount > 0) | |
989 arrIndex.push_back(std::make_pair(nStartNum, nCount)); | |
990 } | |
991 } | |
992 } | |
993 | |
994 if (arrIndex.size() == 0) | |
995 arrIndex.push_back(std::make_pair(0, size)); | |
996 | |
997 pArray = pStream->GetDict()->GetArrayBy("W"); | |
998 if (!pArray) { | |
999 pStream->Release(); | |
1000 return FALSE; | |
1001 } | |
1002 | |
1003 CFX_DWordArray WidthArray; | |
1004 FX_SAFE_DWORD dwAccWidth = 0; | |
1005 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { | |
1006 WidthArray.Add(pArray->GetIntegerAt(i)); | |
1007 dwAccWidth += WidthArray[i]; | |
1008 } | |
1009 | |
1010 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { | |
1011 pStream->Release(); | |
1012 return FALSE; | |
1013 } | |
1014 | |
1015 FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); | |
1016 CPDF_StreamAcc acc; | |
1017 acc.LoadAllData(pStream); | |
1018 | |
1019 const uint8_t* pData = acc.GetData(); | |
1020 FX_DWORD dwTotalSize = acc.GetSize(); | |
1021 FX_DWORD segindex = 0; | |
1022 for (FX_DWORD i = 0; i < arrIndex.size(); i++) { | |
1023 int32_t startnum = arrIndex[i].first; | |
1024 if (startnum < 0) | |
1025 continue; | |
1026 | |
1027 m_dwXrefStartObjNum = | |
1028 pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); | |
1029 FX_DWORD count = | |
1030 pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); | |
1031 FX_SAFE_DWORD dwCaculatedSize = segindex; | |
1032 dwCaculatedSize += count; | |
1033 dwCaculatedSize *= totalWidth; | |
1034 if (!dwCaculatedSize.IsValid() || | |
1035 dwCaculatedSize.ValueOrDie() > dwTotalSize) { | |
1036 continue; | |
1037 } | |
1038 | |
1039 const uint8_t* segstart = pData + segindex * totalWidth; | |
1040 FX_SAFE_DWORD dwMaxObjNum = startnum; | |
1041 dwMaxObjNum += count; | |
1042 FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; | |
1043 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) | |
1044 continue; | |
1045 | |
1046 for (FX_DWORD j = 0; j < count; j++) { | |
1047 int32_t type = 1; | |
1048 const uint8_t* entrystart = segstart + j * totalWidth; | |
1049 if (WidthArray[0]) | |
1050 type = GetVarInt(entrystart, WidthArray[0]); | |
1051 | |
1052 if (GetObjectType(startnum + j) == 255) { | |
1053 FX_FILESIZE offset = | |
1054 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
1055 m_ObjectInfo[startnum + j].pos = offset; | |
1056 m_SortedOffset.insert(offset); | |
1057 continue; | |
1058 } | |
1059 | |
1060 if (GetObjectType(startnum + j)) | |
1061 continue; | |
1062 | |
1063 m_ObjectInfo[startnum + j].type = type; | |
1064 if (type == 0) { | |
1065 m_ObjectInfo[startnum + j].pos = 0; | |
1066 } else { | |
1067 FX_FILESIZE offset = | |
1068 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
1069 m_ObjectInfo[startnum + j].pos = offset; | |
1070 if (type == 1) { | |
1071 m_SortedOffset.insert(offset); | |
1072 } else { | |
1073 if (offset < 0 || !IsValidObjectNumber(offset)) { | |
1074 pStream->Release(); | |
1075 return FALSE; | |
1076 } | |
1077 m_ObjectInfo[offset].type = 255; | |
1078 } | |
1079 } | |
1080 } | |
1081 segindex += count; | |
1082 } | |
1083 pStream->Release(); | |
1084 return TRUE; | |
1085 } | |
1086 | |
1087 CPDF_Array* CPDF_Parser::GetIDArray() { | |
1088 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr; | |
1089 if (!pID) | |
1090 return nullptr; | |
1091 | |
1092 if (CPDF_Reference* pRef = pID->AsReference()) { | |
1093 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); | |
1094 m_pTrailer->SetAt("ID", pID); | |
1095 } | |
1096 return ToArray(pID); | |
1097 } | |
1098 | |
1099 FX_DWORD CPDF_Parser::GetRootObjNum() { | |
1100 CPDF_Reference* pRef = | |
1101 ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); | |
1102 return pRef ? pRef->GetRefObjNum() : 0; | |
1103 } | |
1104 | |
1105 FX_DWORD CPDF_Parser::GetInfoObjNum() { | |
1106 CPDF_Reference* pRef = | |
1107 ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); | |
1108 return pRef ? pRef->GetRefObjNum() : 0; | |
1109 } | |
1110 | |
1111 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { | |
1112 bForm = FALSE; | |
1113 if (!IsValidObjectNumber(objnum)) | |
1114 return TRUE; | |
1115 | |
1116 if (GetObjectType(objnum) == 0) | |
1117 return TRUE; | |
1118 | |
1119 if (GetObjectType(objnum) == 2) | |
1120 return TRUE; | |
1121 | |
1122 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
1123 auto it = m_SortedOffset.find(pos); | |
1124 if (it == m_SortedOffset.end()) | |
1125 return TRUE; | |
1126 | |
1127 if (++it == m_SortedOffset.end()) | |
1128 return FALSE; | |
1129 | |
1130 FX_FILESIZE size = *it - pos; | |
1131 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1132 m_pSyntax->RestorePos(pos); | |
1133 | |
1134 const char kFormStream[] = "/Form\0stream"; | |
1135 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); | |
1136 bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0; | |
1137 m_pSyntax->RestorePos(SavedPos); | |
1138 return TRUE; | |
1139 } | |
1140 | |
1141 CPDF_Object* CPDF_Parser::ParseIndirectObject( | |
1142 CPDF_IndirectObjectHolder* pObjList, | |
1143 FX_DWORD objnum) { | |
1144 if (!IsValidObjectNumber(objnum)) | |
1145 return nullptr; | |
1146 | |
1147 // Prevent circular parsing the same object. | |
1148 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) | |
1149 return nullptr; | |
1150 ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); | |
1151 | |
1152 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { | |
1153 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
1154 if (pos <= 0) | |
1155 return nullptr; | |
1156 return ParseIndirectObjectAt(pObjList, pos, objnum); | |
1157 } | |
1158 if (GetObjectType(objnum) != 2) | |
1159 return nullptr; | |
1160 | |
1161 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
1162 if (!pObjStream) | |
1163 return nullptr; | |
1164 | |
1165 ScopedFileStream file(FX_CreateMemoryStream( | |
1166 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); | |
1167 CPDF_SyntaxParser syntax; | |
1168 syntax.InitParser(file.get(), 0); | |
1169 const int32_t offset = GetStreamFirst(pObjStream); | |
1170 | |
1171 // Read object numbers from |pObjStream| into a cache. | |
1172 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { | |
1173 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { | |
1174 FX_DWORD thisnum = syntax.GetDirectNum(); | |
1175 FX_DWORD thisoff = syntax.GetDirectNum(); | |
1176 m_ObjCache[pObjStream][thisnum] = thisoff; | |
1177 } | |
1178 } | |
1179 | |
1180 const auto it = m_ObjCache[pObjStream].find(objnum); | |
1181 if (it == m_ObjCache[pObjStream].end()) | |
1182 return nullptr; | |
1183 | |
1184 syntax.RestorePos(offset + it->second); | |
1185 return syntax.GetObject(pObjList, 0, 0, true); | |
1186 } | |
1187 | |
1188 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { | |
1189 auto it = m_ObjectStreamMap.find(objnum); | |
1190 if (it != m_ObjectStreamMap.end()) | |
1191 return it->second.get(); | |
1192 | |
1193 if (!m_pDocument) | |
1194 return nullptr; | |
1195 | |
1196 const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum)); | |
1197 if (!pStream) | |
1198 return nullptr; | |
1199 | |
1200 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; | |
1201 pStreamAcc->LoadAllData(pStream); | |
1202 m_ObjectStreamMap[objnum].reset(pStreamAcc); | |
1203 return pStreamAcc; | |
1204 } | |
1205 | |
1206 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { | |
1207 if (!IsValidObjectNumber(objnum)) | |
1208 return 0; | |
1209 | |
1210 if (GetObjectType(objnum) == 2) | |
1211 objnum = GetObjectPositionOrZero(objnum); | |
1212 | |
1213 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) | |
1214 return 0; | |
1215 | |
1216 FX_FILESIZE offset = GetObjectPositionOrZero(objnum); | |
1217 if (offset == 0) | |
1218 return 0; | |
1219 | |
1220 auto it = m_SortedOffset.find(offset); | |
1221 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) | |
1222 return 0; | |
1223 | |
1224 return *it - offset; | |
1225 } | |
1226 | |
1227 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, | |
1228 uint8_t*& pBuffer, | |
1229 FX_DWORD& size) { | |
1230 pBuffer = nullptr; | |
1231 size = 0; | |
1232 if (!IsValidObjectNumber(objnum)) | |
1233 return; | |
1234 | |
1235 if (GetObjectType(objnum) == 2) { | |
1236 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
1237 if (!pObjStream) | |
1238 return; | |
1239 | |
1240 int32_t offset = GetStreamFirst(pObjStream); | |
1241 const uint8_t* pData = pObjStream->GetData(); | |
1242 FX_DWORD totalsize = pObjStream->GetSize(); | |
1243 ScopedFileStream file( | |
1244 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); | |
1245 | |
1246 CPDF_SyntaxParser syntax; | |
1247 syntax.InitParser(file.get(), 0); | |
1248 for (int i = GetStreamNCount(pObjStream); i > 0; --i) { | |
1249 FX_DWORD thisnum = syntax.GetDirectNum(); | |
1250 FX_DWORD thisoff = syntax.GetDirectNum(); | |
1251 if (thisnum != objnum) | |
1252 continue; | |
1253 | |
1254 if (i == 1) { | |
1255 size = totalsize - (thisoff + offset); | |
1256 } else { | |
1257 syntax.GetDirectNum(); // Skip nextnum. | |
1258 FX_DWORD nextoff = syntax.GetDirectNum(); | |
1259 size = nextoff - thisoff; | |
1260 } | |
1261 | |
1262 pBuffer = FX_Alloc(uint8_t, size); | |
1263 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); | |
1264 return; | |
1265 } | |
1266 return; | |
1267 } | |
1268 | |
1269 if (GetObjectType(objnum) != 1) | |
1270 return; | |
1271 | |
1272 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
1273 if (pos == 0) | |
1274 return; | |
1275 | |
1276 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1277 m_pSyntax->RestorePos(pos); | |
1278 | |
1279 bool bIsNumber; | |
1280 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1281 if (!bIsNumber) { | |
1282 m_pSyntax->RestorePos(SavedPos); | |
1283 return; | |
1284 } | |
1285 | |
1286 FX_DWORD parser_objnum = FXSYS_atoui(word); | |
1287 if (parser_objnum && parser_objnum != objnum) { | |
1288 m_pSyntax->RestorePos(SavedPos); | |
1289 return; | |
1290 } | |
1291 | |
1292 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1293 if (!bIsNumber) { | |
1294 m_pSyntax->RestorePos(SavedPos); | |
1295 return; | |
1296 } | |
1297 | |
1298 if (m_pSyntax->GetKeyword() != "obj") { | |
1299 m_pSyntax->RestorePos(SavedPos); | |
1300 return; | |
1301 } | |
1302 | |
1303 auto it = m_SortedOffset.find(pos); | |
1304 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { | |
1305 m_pSyntax->RestorePos(SavedPos); | |
1306 return; | |
1307 } | |
1308 | |
1309 FX_FILESIZE nextoff = *it; | |
1310 FX_BOOL bNextOffValid = FALSE; | |
1311 if (nextoff != pos) { | |
1312 m_pSyntax->RestorePos(nextoff); | |
1313 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1314 if (word == "xref") { | |
1315 bNextOffValid = TRUE; | |
1316 } else if (bIsNumber) { | |
1317 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1318 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { | |
1319 bNextOffValid = TRUE; | |
1320 } | |
1321 } | |
1322 } | |
1323 | |
1324 if (!bNextOffValid) { | |
1325 m_pSyntax->RestorePos(pos); | |
1326 while (1) { | |
1327 if (m_pSyntax->GetKeyword() == "endobj") | |
1328 break; | |
1329 | |
1330 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) | |
1331 break; | |
1332 } | |
1333 nextoff = m_pSyntax->SavePos(); | |
1334 } | |
1335 | |
1336 size = (FX_DWORD)(nextoff - pos); | |
1337 pBuffer = FX_Alloc(uint8_t, size); | |
1338 m_pSyntax->RestorePos(pos); | |
1339 m_pSyntax->ReadBlock(pBuffer, size); | |
1340 m_pSyntax->RestorePos(SavedPos); | |
1341 } | |
1342 | |
1343 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( | |
1344 CPDF_IndirectObjectHolder* pObjList, | |
1345 FX_FILESIZE pos, | |
1346 FX_DWORD objnum) { | |
1347 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1348 m_pSyntax->RestorePos(pos); | |
1349 bool bIsNumber; | |
1350 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1351 if (!bIsNumber) { | |
1352 m_pSyntax->RestorePos(SavedPos); | |
1353 return nullptr; | |
1354 } | |
1355 | |
1356 FX_FILESIZE objOffset = m_pSyntax->SavePos(); | |
1357 objOffset -= word.GetLength(); | |
1358 FX_DWORD parser_objnum = FXSYS_atoui(word); | |
1359 if (objnum && parser_objnum != objnum) { | |
1360 m_pSyntax->RestorePos(SavedPos); | |
1361 return nullptr; | |
1362 } | |
1363 | |
1364 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1365 if (!bIsNumber) { | |
1366 m_pSyntax->RestorePos(SavedPos); | |
1367 return nullptr; | |
1368 } | |
1369 | |
1370 FX_DWORD parser_gennum = FXSYS_atoui(word); | |
1371 if (m_pSyntax->GetKeyword() != "obj") { | |
1372 m_pSyntax->RestorePos(SavedPos); | |
1373 return nullptr; | |
1374 } | |
1375 | |
1376 CPDF_Object* pObj = | |
1377 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); | |
1378 m_pSyntax->SavePos(); | |
1379 | |
1380 CFX_ByteString bsWord = m_pSyntax->GetKeyword(); | |
1381 if (bsWord == "endobj") | |
1382 m_pSyntax->SavePos(); | |
1383 | |
1384 m_pSyntax->RestorePos(SavedPos); | |
1385 if (pObj) { | |
1386 if (!objnum) | |
1387 pObj->m_ObjNum = parser_objnum; | |
1388 pObj->m_GenNum = parser_gennum; | |
1389 } | |
1390 return pObj; | |
1391 } | |
1392 | |
1393 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( | |
1394 CPDF_IndirectObjectHolder* pObjList, | |
1395 FX_FILESIZE pos, | |
1396 FX_DWORD objnum, | |
1397 FX_FILESIZE* pResultPos) { | |
1398 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1399 m_pSyntax->RestorePos(pos); | |
1400 | |
1401 bool bIsNumber; | |
1402 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1403 if (!bIsNumber) { | |
1404 m_pSyntax->RestorePos(SavedPos); | |
1405 return nullptr; | |
1406 } | |
1407 | |
1408 FX_DWORD parser_objnum = FXSYS_atoui(word); | |
1409 if (objnum && parser_objnum != objnum) { | |
1410 m_pSyntax->RestorePos(SavedPos); | |
1411 return nullptr; | |
1412 } | |
1413 | |
1414 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1415 if (!bIsNumber) { | |
1416 m_pSyntax->RestorePos(SavedPos); | |
1417 return nullptr; | |
1418 } | |
1419 | |
1420 FX_DWORD gennum = FXSYS_atoui(word); | |
1421 if (m_pSyntax->GetKeyword() != "obj") { | |
1422 m_pSyntax->RestorePos(SavedPos); | |
1423 return nullptr; | |
1424 } | |
1425 | |
1426 CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum); | |
1427 if (pResultPos) | |
1428 *pResultPos = m_pSyntax->m_Pos; | |
1429 | |
1430 m_pSyntax->RestorePos(SavedPos); | |
1431 return pObj; | |
1432 } | |
1433 | |
1434 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { | |
1435 if (m_pSyntax->GetKeyword() != "trailer") | |
1436 return nullptr; | |
1437 | |
1438 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( | |
1439 m_pSyntax->GetObject(m_pDocument, 0, 0, true)); | |
1440 if (!ToDictionary(pObj.get())) | |
1441 return nullptr; | |
1442 return pObj.release()->AsDictionary(); | |
1443 } | |
1444 | |
1445 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { | |
1446 if (!m_pSecurityHandler) | |
1447 return (FX_DWORD)-1; | |
1448 | |
1449 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); | |
1450 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { | |
1451 dwPermission &= 0xFFFFFFFC; | |
1452 dwPermission |= 0xFFFFF0C0; | |
1453 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) | |
1454 dwPermission &= 0xFFFFF0FF; | |
1455 } | |
1456 return dwPermission; | |
1457 } | |
1458 | |
1459 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, | |
1460 FX_DWORD offset) { | |
1461 m_pSyntax->InitParser(pFileAccess, offset); | |
1462 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); | |
1463 | |
1464 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
1465 bool bIsNumber; | |
1466 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
1467 if (!bIsNumber) | |
1468 return FALSE; | |
1469 | |
1470 FX_DWORD objnum = FXSYS_atoui(word); | |
1471 word = m_pSyntax->GetNextWord(&bIsNumber); | |
1472 if (!bIsNumber) | |
1473 return FALSE; | |
1474 | |
1475 FX_DWORD gennum = FXSYS_atoui(word); | |
1476 if (m_pSyntax->GetKeyword() != "obj") { | |
1477 m_pSyntax->RestorePos(SavedPos); | |
1478 return FALSE; | |
1479 } | |
1480 | |
1481 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); | |
1482 if (!m_pLinearized) | |
1483 return FALSE; | |
1484 | |
1485 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); | |
1486 if (pDict && pDict->GetElement("Linearized")) { | |
1487 m_pSyntax->GetNextWord(nullptr); | |
1488 | |
1489 CPDF_Object* pLen = pDict->GetElement("L"); | |
1490 if (!pLen) { | |
1491 m_pLinearized->Release(); | |
1492 m_pLinearized = nullptr; | |
1493 return FALSE; | |
1494 } | |
1495 | |
1496 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) | |
1497 return FALSE; | |
1498 | |
1499 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) | |
1500 m_dwFirstPageNo = pNo->GetInteger(); | |
1501 | |
1502 if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) | |
1503 m_LastXRefOffset = pTable->GetInteger(); | |
1504 | |
1505 return TRUE; | |
1506 } | |
1507 m_pLinearized->Release(); | |
1508 m_pLinearized = nullptr; | |
1509 return FALSE; | |
1510 } | |
1511 | |
1512 CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) { | |
1513 CloseParser(); | |
1514 m_bXRefStream = FALSE; | |
1515 m_LastXRefOffset = 0; | |
1516 m_bOwnFileRead = true; | |
1517 | |
1518 int32_t offset = GetHeaderOffset(pFileAccess); | |
1519 if (offset == -1) | |
1520 return FORMAT_ERROR; | |
1521 | |
1522 if (!IsLinearizedFile(pFileAccess, offset)) { | |
1523 m_pSyntax->m_pFileAccess = nullptr; | |
1524 return StartParse(pFileAccess); | |
1525 } | |
1526 | |
1527 m_pDocument = new CPDF_Document(this); | |
1528 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); | |
1529 | |
1530 FX_BOOL bXRefRebuilt = FALSE; | |
1531 FX_BOOL bLoadV4 = FALSE; | |
1532 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && | |
1533 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { | |
1534 if (!RebuildCrossRef()) | |
1535 return FORMAT_ERROR; | |
1536 | |
1537 bXRefRebuilt = TRUE; | |
1538 m_LastXRefOffset = 0; | |
1539 } | |
1540 | |
1541 if (bLoadV4) { | |
1542 m_pTrailer = LoadTrailerV4(); | |
1543 if (!m_pTrailer) | |
1544 return SUCCESS; | |
1545 | |
1546 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
1547 if (xrefsize > 0) | |
1548 ShrinkObjectMap(xrefsize); | |
1549 } | |
1550 | |
1551 Error eRet = SetEncryptHandler(); | |
1552 if (eRet != SUCCESS) | |
1553 return eRet; | |
1554 | |
1555 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); | |
1556 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
1557 if (bXRefRebuilt) | |
1558 return FORMAT_ERROR; | |
1559 | |
1560 ReleaseEncryptHandler(); | |
1561 if (!RebuildCrossRef()) | |
1562 return FORMAT_ERROR; | |
1563 | |
1564 eRet = SetEncryptHandler(); | |
1565 if (eRet != SUCCESS) | |
1566 return eRet; | |
1567 | |
1568 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); | |
1569 if (!m_pDocument->GetRoot()) | |
1570 return FORMAT_ERROR; | |
1571 } | |
1572 | |
1573 if (GetRootObjNum() == 0) { | |
1574 ReleaseEncryptHandler(); | |
1575 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
1576 return FORMAT_ERROR; | |
1577 | |
1578 eRet = SetEncryptHandler(); | |
1579 if (eRet != SUCCESS) | |
1580 return eRet; | |
1581 } | |
1582 | |
1583 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { | |
1584 if (CPDF_Reference* pMetadata = | |
1585 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) | |
1586 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
1587 } | |
1588 return SUCCESS; | |
1589 } | |
1590 | |
1591 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { | |
1592 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
1593 return FALSE; | |
1594 | |
1595 std::set<FX_FILESIZE> seen_xrefpos; | |
1596 while (xrefpos) { | |
1597 seen_xrefpos.insert(xrefpos); | |
1598 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
1599 return FALSE; | |
1600 | |
1601 // Check for circular references. | |
1602 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
1603 return FALSE; | |
1604 } | |
1605 m_ObjectStreamMap.clear(); | |
1606 m_bXRefStream = TRUE; | |
1607 return TRUE; | |
1608 } | |
1609 | |
1610 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { | |
1611 FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; | |
1612 m_pSyntax->m_MetadataObjnum = 0; | |
1613 if (m_pTrailer) { | |
1614 m_pTrailer->Release(); | |
1615 m_pTrailer = nullptr; | |
1616 } | |
1617 | |
1618 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); | |
1619 uint8_t ch = 0; | |
1620 FX_DWORD dwCount = 0; | |
1621 m_pSyntax->GetNextChar(ch); | |
1622 while (PDFCharIsWhitespace(ch)) { | |
1623 ++dwCount; | |
1624 if (m_pSyntax->m_FileLen >= | |
1625 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { | |
1626 break; | |
1627 } | |
1628 m_pSyntax->GetNextChar(ch); | |
1629 } | |
1630 m_LastXRefOffset += dwCount; | |
1631 m_ObjectStreamMap.clear(); | |
1632 m_ObjCache.clear(); | |
1633 | |
1634 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && | |
1635 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { | |
1636 m_LastXRefOffset = 0; | |
1637 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
1638 return FORMAT_ERROR; | |
1639 } | |
1640 | |
1641 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
1642 return SUCCESS; | |
1643 } | |
OLD | NEW |