| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #include "core/fpdfapi/fpdf_parser/cpdf_parser.h" | |
| 8 | |
| 9 #include <vector> | |
| 10 | |
| 11 #include "core/fpdfapi/fpdf_parser/cpdf_array.h" | |
| 12 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h" | |
| 13 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h" | |
| 14 #include "core/fpdfapi/fpdf_parser/cpdf_document.h" | |
| 15 #include "core/fpdfapi/fpdf_parser/cpdf_number.h" | |
| 16 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h" | |
| 17 #include "core/fpdfapi/fpdf_parser/cpdf_security_handler.h" | |
| 18 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h" | |
| 19 #include "core/fpdfapi/fpdf_parser/cpdf_stream_acc.h" | |
| 20 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
| 21 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" | |
| 22 #include "core/fxcrt/fx_ext.h" | |
| 23 #include "core/fxcrt/fx_safe_types.h" | |
| 24 #include "third_party/base/stl_util.h" | |
| 25 | |
| 26 namespace { | |
| 27 | |
| 28 // A limit on the size of the xref table. Theoretical limits are higher, but | |
| 29 // this may be large enough in practice. | |
| 30 const int32_t kMaxXRefSize = 1048576; | |
| 31 | |
| 32 uint32_t GetVarInt(const uint8_t* p, int32_t n) { | |
| 33 uint32_t result = 0; | |
| 34 for (int32_t i = 0; i < n; ++i) | |
| 35 result = result * 256 + p[i]; | |
| 36 return result; | |
| 37 } | |
| 38 | |
| 39 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { | |
| 40 return pObjStream->GetDict()->GetIntegerFor("N"); | |
| 41 } | |
| 42 | |
| 43 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { | |
| 44 return pObjStream->GetDict()->GetIntegerFor("First"); | |
| 45 } | |
| 46 | |
| 47 } // namespace | |
| 48 | |
| 49 CPDF_Parser::CPDF_Parser() | |
| 50 : m_pDocument(nullptr), | |
| 51 m_bHasParsed(false), | |
| 52 m_bOwnFileRead(true), | |
| 53 m_FileVersion(0), | |
| 54 m_pTrailer(nullptr), | |
| 55 m_pEncryptDict(nullptr), | |
| 56 m_bVersionUpdated(false), | |
| 57 m_pLinearized(nullptr), | |
| 58 m_dwFirstPageNo(0), | |
| 59 m_dwXrefStartObjNum(0) { | |
| 60 m_pSyntax.reset(new CPDF_SyntaxParser); | |
| 61 } | |
| 62 | |
| 63 CPDF_Parser::~CPDF_Parser() { | |
| 64 if (m_pTrailer) | |
| 65 m_pTrailer->Release(); | |
| 66 | |
| 67 ReleaseEncryptHandler(); | |
| 68 SetEncryptDictionary(nullptr); | |
| 69 | |
| 70 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { | |
| 71 m_pSyntax->m_pFileAccess->Release(); | |
| 72 m_pSyntax->m_pFileAccess = nullptr; | |
| 73 } | |
| 74 | |
| 75 int32_t iLen = m_Trailers.GetSize(); | |
| 76 for (int32_t i = 0; i < iLen; ++i) { | |
| 77 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) | |
| 78 trailer->Release(); | |
| 79 } | |
| 80 | |
| 81 if (m_pLinearized) | |
| 82 m_pLinearized->Release(); | |
| 83 } | |
| 84 | |
| 85 uint32_t CPDF_Parser::GetLastObjNum() const { | |
| 86 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; | |
| 87 } | |
| 88 | |
| 89 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { | |
| 90 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; | |
| 91 } | |
| 92 | |
| 93 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { | |
| 94 auto it = m_ObjectInfo.find(objnum); | |
| 95 return it != m_ObjectInfo.end() ? it->second.pos : 0; | |
| 96 } | |
| 97 | |
| 98 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const { | |
| 99 ASSERT(IsValidObjectNumber(objnum)); | |
| 100 auto it = m_ObjectInfo.find(objnum); | |
| 101 return it != m_ObjectInfo.end() ? it->second.type : 0; | |
| 102 } | |
| 103 | |
| 104 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { | |
| 105 ASSERT(IsValidObjectNumber(objnum)); | |
| 106 auto it = m_ObjectInfo.find(objnum); | |
| 107 return it != m_ObjectInfo.end() ? it->second.gennum : 0; | |
| 108 } | |
| 109 | |
| 110 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { | |
| 111 uint8_t type = GetObjectType(objnum); | |
| 112 return type == 0 || type == 255; | |
| 113 } | |
| 114 | |
| 115 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { | |
| 116 m_pEncryptDict = pDict; | |
| 117 } | |
| 118 | |
| 119 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { | |
| 120 return m_pSyntax->m_pCryptoHandler.get(); | |
| 121 } | |
| 122 | |
| 123 IFX_FileRead* CPDF_Parser::GetFileAccess() const { | |
| 124 return m_pSyntax->m_pFileAccess; | |
| 125 } | |
| 126 | |
| 127 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { | |
| 128 if (objnum == 0) { | |
| 129 m_ObjectInfo.clear(); | |
| 130 return; | |
| 131 } | |
| 132 | |
| 133 auto it = m_ObjectInfo.lower_bound(objnum); | |
| 134 while (it != m_ObjectInfo.end()) { | |
| 135 auto saved_it = it++; | |
| 136 m_ObjectInfo.erase(saved_it); | |
| 137 } | |
| 138 | |
| 139 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) | |
| 140 m_ObjectInfo[objnum - 1].pos = 0; | |
| 141 } | |
| 142 | |
| 143 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, | |
| 144 CPDF_Document* pDocument) { | |
| 145 ASSERT(!m_bHasParsed); | |
| 146 m_bHasParsed = true; | |
| 147 | |
| 148 m_bXRefStream = FALSE; | |
| 149 m_LastXRefOffset = 0; | |
| 150 m_bOwnFileRead = true; | |
| 151 | |
| 152 int32_t offset = GetHeaderOffset(pFileAccess); | |
| 153 if (offset == -1) { | |
| 154 if (pFileAccess) | |
| 155 pFileAccess->Release(); | |
| 156 return FORMAT_ERROR; | |
| 157 } | |
| 158 m_pSyntax->InitParser(pFileAccess, offset); | |
| 159 | |
| 160 uint8_t ch; | |
| 161 if (!m_pSyntax->GetCharAt(5, ch)) | |
| 162 return FORMAT_ERROR; | |
| 163 if (std::isdigit(ch)) | |
| 164 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; | |
| 165 | |
| 166 if (!m_pSyntax->GetCharAt(7, ch)) | |
| 167 return FORMAT_ERROR; | |
| 168 if (std::isdigit(ch)) | |
| 169 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 170 | |
| 171 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) | |
| 172 return FORMAT_ERROR; | |
| 173 | |
| 174 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); | |
| 175 m_pDocument = pDocument; | |
| 176 | |
| 177 FX_BOOL bXRefRebuilt = FALSE; | |
| 178 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { | |
| 179 m_SortedOffset.insert(m_pSyntax->SavePos()); | |
| 180 m_pSyntax->GetKeyword(); | |
| 181 | |
| 182 bool bNumber; | |
| 183 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); | |
| 184 if (!bNumber) | |
| 185 return FORMAT_ERROR; | |
| 186 | |
| 187 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str()); | |
| 188 if (!LoadAllCrossRefV4(m_LastXRefOffset) && | |
| 189 !LoadAllCrossRefV5(m_LastXRefOffset)) { | |
| 190 if (!RebuildCrossRef()) | |
| 191 return FORMAT_ERROR; | |
| 192 | |
| 193 bXRefRebuilt = TRUE; | |
| 194 m_LastXRefOffset = 0; | |
| 195 } | |
| 196 } else { | |
| 197 if (!RebuildCrossRef()) | |
| 198 return FORMAT_ERROR; | |
| 199 | |
| 200 bXRefRebuilt = TRUE; | |
| 201 } | |
| 202 Error eRet = SetEncryptHandler(); | |
| 203 if (eRet != SUCCESS) | |
| 204 return eRet; | |
| 205 | |
| 206 m_pDocument->LoadDoc(); | |
| 207 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
| 208 if (bXRefRebuilt) | |
| 209 return FORMAT_ERROR; | |
| 210 | |
| 211 ReleaseEncryptHandler(); | |
| 212 if (!RebuildCrossRef()) | |
| 213 return FORMAT_ERROR; | |
| 214 | |
| 215 eRet = SetEncryptHandler(); | |
| 216 if (eRet != SUCCESS) | |
| 217 return eRet; | |
| 218 | |
| 219 m_pDocument->LoadDoc(); | |
| 220 if (!m_pDocument->GetRoot()) | |
| 221 return FORMAT_ERROR; | |
| 222 } | |
| 223 if (GetRootObjNum() == 0) { | |
| 224 ReleaseEncryptHandler(); | |
| 225 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
| 226 return FORMAT_ERROR; | |
| 227 | |
| 228 eRet = SetEncryptHandler(); | |
| 229 if (eRet != SUCCESS) | |
| 230 return eRet; | |
| 231 } | |
| 232 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { | |
| 233 CPDF_Reference* pMetadata = | |
| 234 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")); | |
| 235 if (pMetadata) | |
| 236 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
| 237 } | |
| 238 return SUCCESS; | |
| 239 } | |
| 240 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { | |
| 241 ReleaseEncryptHandler(); | |
| 242 SetEncryptDictionary(nullptr); | |
| 243 | |
| 244 if (!m_pTrailer) | |
| 245 return FORMAT_ERROR; | |
| 246 | |
| 247 CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt"); | |
| 248 if (pEncryptObj) { | |
| 249 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { | |
| 250 SetEncryptDictionary(pEncryptDict); | |
| 251 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { | |
| 252 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); | |
| 253 if (pEncryptObj) | |
| 254 SetEncryptDictionary(pEncryptObj->GetDict()); | |
| 255 } | |
| 256 } | |
| 257 | |
| 258 if (m_pEncryptDict) { | |
| 259 CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter"); | |
| 260 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler; | |
| 261 Error err = HANDLER_ERROR; | |
| 262 if (filter == "Standard") { | |
| 263 pSecurityHandler.reset(new CPDF_SecurityHandler); | |
| 264 err = PASSWORD_ERROR; | |
| 265 } | |
| 266 if (!pSecurityHandler) | |
| 267 return HANDLER_ERROR; | |
| 268 | |
| 269 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) | |
| 270 return err; | |
| 271 | |
| 272 m_pSecurityHandler = std::move(pSecurityHandler); | |
| 273 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( | |
| 274 m_pSecurityHandler->CreateCryptoHandler()); | |
| 275 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) | |
| 276 return HANDLER_ERROR; | |
| 277 m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); | |
| 278 } | |
| 279 return SUCCESS; | |
| 280 } | |
| 281 | |
| 282 void CPDF_Parser::ReleaseEncryptHandler() { | |
| 283 m_pSyntax->m_pCryptoHandler.reset(); | |
| 284 m_pSecurityHandler.reset(); | |
| 285 } | |
| 286 | |
| 287 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const { | |
| 288 if (!IsValidObjectNumber(objnum)) | |
| 289 return 0; | |
| 290 | |
| 291 if (GetObjectType(objnum) == 1) | |
| 292 return GetObjectPositionOrZero(objnum); | |
| 293 | |
| 294 if (GetObjectType(objnum) == 2) { | |
| 295 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); | |
| 296 return GetObjectPositionOrZero(pos); | |
| 297 } | |
| 298 return 0; | |
| 299 } | |
| 300 | |
| 301 // Ideally, all the cross reference entries should be verified. | |
| 302 // In reality, we rarely see well-formed cross references don't match | |
| 303 // with the objects. crbug/602650 showed a case where object numbers | |
| 304 // in the cross reference table are all off by one. | |
| 305 bool CPDF_Parser::VerifyCrossRefV4() { | |
| 306 for (const auto& it : m_ObjectInfo) { | |
| 307 if (it.second.pos == 0) | |
| 308 continue; | |
| 309 // Find the first non-zero position. | |
| 310 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 311 m_pSyntax->RestorePos(it.second.pos); | |
| 312 bool is_num = false; | |
| 313 CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num); | |
| 314 m_pSyntax->RestorePos(SavedPos); | |
| 315 if (!is_num || num_str.IsEmpty() || | |
| 316 FXSYS_atoui(num_str.c_str()) != it.first) { | |
| 317 // If the object number read doesn't match the one stored, | |
| 318 // something is wrong with the cross reference table. | |
| 319 return false; | |
| 320 } else { | |
| 321 return true; | |
| 322 } | |
| 323 } | |
| 324 return true; | |
| 325 } | |
| 326 | |
| 327 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { | |
| 328 if (!LoadCrossRefV4(xrefpos, 0, TRUE)) | |
| 329 return FALSE; | |
| 330 | |
| 331 m_pTrailer = LoadTrailerV4(); | |
| 332 if (!m_pTrailer) | |
| 333 return FALSE; | |
| 334 | |
| 335 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
| 336 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) | |
| 337 ShrinkObjectMap(xrefsize); | |
| 338 | |
| 339 std::vector<FX_FILESIZE> CrossRefList; | |
| 340 std::vector<FX_FILESIZE> XRefStreamList; | |
| 341 std::set<FX_FILESIZE> seen_xrefpos; | |
| 342 | |
| 343 CrossRefList.push_back(xrefpos); | |
| 344 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
| 345 seen_xrefpos.insert(xrefpos); | |
| 346 | |
| 347 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not | |
| 348 // numerical, GetDirectInteger() returns 0. Loading will end. | |
| 349 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
| 350 while (xrefpos) { | |
| 351 // Check for circular references. | |
| 352 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
| 353 return FALSE; | |
| 354 | |
| 355 seen_xrefpos.insert(xrefpos); | |
| 356 | |
| 357 // SLOW ... | |
| 358 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
| 359 LoadCrossRefV4(xrefpos, 0, TRUE); | |
| 360 | |
| 361 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 362 LoadTrailerV4()); | |
| 363 if (!pDict) | |
| 364 return FALSE; | |
| 365 | |
| 366 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
| 367 | |
| 368 // SLOW ... | |
| 369 XRefStreamList.insert(XRefStreamList.begin(), | |
| 370 pDict->GetIntegerFor("XRefStm")); | |
| 371 m_Trailers.Add(pDict.release()); | |
| 372 } | |
| 373 | |
| 374 for (size_t i = 0; i < CrossRefList.size(); ++i) { | |
| 375 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
| 376 return FALSE; | |
| 377 if (i == 0 && !VerifyCrossRefV4()) | |
| 378 return FALSE; | |
| 379 } | |
| 380 return TRUE; | |
| 381 } | |
| 382 | |
| 383 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, | |
| 384 uint32_t dwObjCount) { | |
| 385 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) | |
| 386 return FALSE; | |
| 387 | |
| 388 m_pTrailer = LoadTrailerV4(); | |
| 389 if (!m_pTrailer) | |
| 390 return FALSE; | |
| 391 | |
| 392 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
| 393 if (xrefsize == 0) | |
| 394 return FALSE; | |
| 395 | |
| 396 std::vector<FX_FILESIZE> CrossRefList; | |
| 397 std::vector<FX_FILESIZE> XRefStreamList; | |
| 398 std::set<FX_FILESIZE> seen_xrefpos; | |
| 399 | |
| 400 CrossRefList.push_back(xrefpos); | |
| 401 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); | |
| 402 seen_xrefpos.insert(xrefpos); | |
| 403 | |
| 404 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); | |
| 405 while (xrefpos) { | |
| 406 // Check for circular references. | |
| 407 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
| 408 return FALSE; | |
| 409 | |
| 410 seen_xrefpos.insert(xrefpos); | |
| 411 | |
| 412 // SLOW ... | |
| 413 CrossRefList.insert(CrossRefList.begin(), xrefpos); | |
| 414 LoadCrossRefV4(xrefpos, 0, TRUE); | |
| 415 | |
| 416 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 417 LoadTrailerV4()); | |
| 418 if (!pDict) | |
| 419 return FALSE; | |
| 420 | |
| 421 xrefpos = GetDirectInteger(pDict.get(), "Prev"); | |
| 422 | |
| 423 // SLOW ... | |
| 424 XRefStreamList.insert(XRefStreamList.begin(), | |
| 425 pDict->GetIntegerFor("XRefStm")); | |
| 426 m_Trailers.Add(pDict.release()); | |
| 427 } | |
| 428 | |
| 429 for (size_t i = 1; i < CrossRefList.size(); ++i) { | |
| 430 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) | |
| 431 return FALSE; | |
| 432 } | |
| 433 return TRUE; | |
| 434 } | |
| 435 | |
| 436 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, | |
| 437 uint32_t dwObjCount) { | |
| 438 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; | |
| 439 | |
| 440 m_pSyntax->RestorePos(dwStartPos); | |
| 441 m_SortedOffset.insert(pos); | |
| 442 | |
| 443 uint32_t start_objnum = 0; | |
| 444 uint32_t count = dwObjCount; | |
| 445 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 446 | |
| 447 const int32_t recordsize = 20; | |
| 448 std::vector<char> buf(1024 * recordsize + 1); | |
| 449 buf[1024 * recordsize] = '\0'; | |
| 450 | |
| 451 int32_t nBlocks = count / 1024 + 1; | |
| 452 for (int32_t block = 0; block < nBlocks; block++) { | |
| 453 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
| 454 uint32_t dwReadSize = block_size * recordsize; | |
| 455 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) | |
| 456 return FALSE; | |
| 457 | |
| 458 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
| 459 dwReadSize)) { | |
| 460 return FALSE; | |
| 461 } | |
| 462 | |
| 463 for (int32_t i = 0; i < block_size; i++) { | |
| 464 uint32_t objnum = start_objnum + block * 1024 + i; | |
| 465 char* pEntry = &buf[i * recordsize]; | |
| 466 if (pEntry[17] == 'f') { | |
| 467 m_ObjectInfo[objnum].pos = 0; | |
| 468 m_ObjectInfo[objnum].type = 0; | |
| 469 } else { | |
| 470 int32_t offset = FXSYS_atoi(pEntry); | |
| 471 if (offset == 0) { | |
| 472 for (int32_t c = 0; c < 10; c++) { | |
| 473 if (!std::isdigit(pEntry[c])) | |
| 474 return FALSE; | |
| 475 } | |
| 476 } | |
| 477 | |
| 478 m_ObjectInfo[objnum].pos = offset; | |
| 479 int32_t version = FXSYS_atoi(pEntry + 11); | |
| 480 if (version >= 1) | |
| 481 m_bVersionUpdated = true; | |
| 482 | |
| 483 m_ObjectInfo[objnum].gennum = version; | |
| 484 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
| 485 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
| 486 | |
| 487 m_ObjectInfo[objnum].type = 1; | |
| 488 } | |
| 489 } | |
| 490 } | |
| 491 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
| 492 return TRUE; | |
| 493 } | |
| 494 | |
| 495 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, | |
| 496 FX_FILESIZE streampos, | |
| 497 FX_BOOL bSkip) { | |
| 498 m_pSyntax->RestorePos(pos); | |
| 499 if (m_pSyntax->GetKeyword() != "xref") | |
| 500 return false; | |
| 501 | |
| 502 m_SortedOffset.insert(pos); | |
| 503 if (streampos) | |
| 504 m_SortedOffset.insert(streampos); | |
| 505 | |
| 506 while (1) { | |
| 507 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 508 bool bIsNumber; | |
| 509 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 510 if (word.IsEmpty()) | |
| 511 return false; | |
| 512 | |
| 513 if (!bIsNumber) { | |
| 514 m_pSyntax->RestorePos(SavedPos); | |
| 515 break; | |
| 516 } | |
| 517 | |
| 518 uint32_t start_objnum = FXSYS_atoui(word.c_str()); | |
| 519 if (start_objnum >= kMaxObjectNumber) | |
| 520 return false; | |
| 521 | |
| 522 uint32_t count = m_pSyntax->GetDirectNum(); | |
| 523 m_pSyntax->ToNextWord(); | |
| 524 SavedPos = m_pSyntax->SavePos(); | |
| 525 const int32_t recordsize = 20; | |
| 526 | |
| 527 m_dwXrefStartObjNum = start_objnum; | |
| 528 if (!bSkip) { | |
| 529 std::vector<char> buf(1024 * recordsize + 1); | |
| 530 buf[1024 * recordsize] = '\0'; | |
| 531 | |
| 532 int32_t nBlocks = count / 1024 + 1; | |
| 533 for (int32_t block = 0; block < nBlocks; block++) { | |
| 534 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; | |
| 535 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), | |
| 536 block_size * recordsize); | |
| 537 | |
| 538 for (int32_t i = 0; i < block_size; i++) { | |
| 539 uint32_t objnum = start_objnum + block * 1024 + i; | |
| 540 char* pEntry = &buf[i * recordsize]; | |
| 541 if (pEntry[17] == 'f') { | |
| 542 m_ObjectInfo[objnum].pos = 0; | |
| 543 m_ObjectInfo[objnum].type = 0; | |
| 544 } else { | |
| 545 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); | |
| 546 if (offset == 0) { | |
| 547 for (int32_t c = 0; c < 10; c++) { | |
| 548 if (!std::isdigit(pEntry[c])) | |
| 549 return false; | |
| 550 } | |
| 551 } | |
| 552 | |
| 553 m_ObjectInfo[objnum].pos = offset; | |
| 554 int32_t version = FXSYS_atoi(pEntry + 11); | |
| 555 if (version >= 1) | |
| 556 m_bVersionUpdated = true; | |
| 557 | |
| 558 m_ObjectInfo[objnum].gennum = version; | |
| 559 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) | |
| 560 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); | |
| 561 | |
| 562 m_ObjectInfo[objnum].type = 1; | |
| 563 } | |
| 564 } | |
| 565 } | |
| 566 } | |
| 567 m_pSyntax->RestorePos(SavedPos + count * recordsize); | |
| 568 } | |
| 569 return !streampos || LoadCrossRefV5(&streampos, FALSE); | |
| 570 } | |
| 571 | |
| 572 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { | |
| 573 if (!LoadCrossRefV5(&xrefpos, TRUE)) | |
| 574 return FALSE; | |
| 575 | |
| 576 std::set<FX_FILESIZE> seen_xrefpos; | |
| 577 while (xrefpos) { | |
| 578 seen_xrefpos.insert(xrefpos); | |
| 579 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
| 580 return FALSE; | |
| 581 | |
| 582 // Check for circular references. | |
| 583 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
| 584 return FALSE; | |
| 585 } | |
| 586 m_ObjectStreamMap.clear(); | |
| 587 m_bXRefStream = TRUE; | |
| 588 return TRUE; | |
| 589 } | |
| 590 | |
| 591 FX_BOOL CPDF_Parser::RebuildCrossRef() { | |
| 592 m_ObjectInfo.clear(); | |
| 593 m_SortedOffset.clear(); | |
| 594 if (m_pTrailer) { | |
| 595 m_pTrailer->Release(); | |
| 596 m_pTrailer = nullptr; | |
| 597 } | |
| 598 | |
| 599 ParserState state = ParserState::kDefault; | |
| 600 | |
| 601 int32_t inside_index = 0; | |
| 602 uint32_t objnum = 0; | |
| 603 uint32_t gennum = 0; | |
| 604 int32_t depth = 0; | |
| 605 | |
| 606 const uint32_t kBufferSize = 4096; | |
| 607 std::vector<uint8_t> buffer(kBufferSize); | |
| 608 | |
| 609 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; | |
| 610 FX_FILESIZE start_pos = 0; | |
| 611 FX_FILESIZE start_pos1 = 0; | |
| 612 FX_FILESIZE last_obj = -1; | |
| 613 FX_FILESIZE last_xref = -1; | |
| 614 FX_FILESIZE last_trailer = -1; | |
| 615 | |
| 616 while (pos < m_pSyntax->m_FileLen) { | |
| 617 const FX_FILESIZE saved_pos = pos; | |
| 618 bool bOverFlow = false; | |
| 619 uint32_t size = | |
| 620 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize); | |
| 621 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) | |
| 622 break; | |
| 623 | |
| 624 for (uint32_t i = 0; i < size; i++) { | |
| 625 uint8_t byte = buffer[i]; | |
| 626 switch (state) { | |
| 627 case ParserState::kDefault: | |
| 628 if (PDFCharIsWhitespace(byte)) { | |
| 629 state = ParserState::kWhitespace; | |
| 630 } else if (std::isdigit(byte)) { | |
| 631 --i; | |
| 632 state = ParserState::kWhitespace; | |
| 633 } else if (byte == '%') { | |
| 634 inside_index = 0; | |
| 635 state = ParserState::kComment; | |
| 636 } else if (byte == '(') { | |
| 637 state = ParserState::kString; | |
| 638 depth = 1; | |
| 639 } else if (byte == '<') { | |
| 640 inside_index = 1; | |
| 641 state = ParserState::kHexString; | |
| 642 } else if (byte == '\\') { | |
| 643 state = ParserState::kEscapedString; | |
| 644 } else if (byte == 't') { | |
| 645 state = ParserState::kTrailer; | |
| 646 inside_index = 1; | |
| 647 } | |
| 648 break; | |
| 649 | |
| 650 case ParserState::kWhitespace: | |
| 651 if (std::isdigit(byte)) { | |
| 652 start_pos = pos + i; | |
| 653 state = ParserState::kObjNum; | |
| 654 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
| 655 } else if (byte == 't') { | |
| 656 state = ParserState::kTrailer; | |
| 657 inside_index = 1; | |
| 658 } else if (byte == 'x') { | |
| 659 state = ParserState::kXref; | |
| 660 inside_index = 1; | |
| 661 } else if (!PDFCharIsWhitespace(byte)) { | |
| 662 --i; | |
| 663 state = ParserState::kDefault; | |
| 664 } | |
| 665 break; | |
| 666 | |
| 667 case ParserState::kObjNum: | |
| 668 if (std::isdigit(byte)) { | |
| 669 objnum = | |
| 670 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
| 671 } else if (PDFCharIsWhitespace(byte)) { | |
| 672 state = ParserState::kPostObjNum; | |
| 673 } else { | |
| 674 --i; | |
| 675 state = ParserState::kEndObj; | |
| 676 inside_index = 0; | |
| 677 } | |
| 678 break; | |
| 679 | |
| 680 case ParserState::kPostObjNum: | |
| 681 if (std::isdigit(byte)) { | |
| 682 start_pos1 = pos + i; | |
| 683 state = ParserState::kGenNum; | |
| 684 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
| 685 } else if (byte == 't') { | |
| 686 state = ParserState::kTrailer; | |
| 687 inside_index = 1; | |
| 688 } else if (!PDFCharIsWhitespace(byte)) { | |
| 689 --i; | |
| 690 state = ParserState::kDefault; | |
| 691 } | |
| 692 break; | |
| 693 | |
| 694 case ParserState::kGenNum: | |
| 695 if (std::isdigit(byte)) { | |
| 696 gennum = | |
| 697 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
| 698 } else if (PDFCharIsWhitespace(byte)) { | |
| 699 state = ParserState::kPostGenNum; | |
| 700 } else { | |
| 701 --i; | |
| 702 state = ParserState::kDefault; | |
| 703 } | |
| 704 break; | |
| 705 | |
| 706 case ParserState::kPostGenNum: | |
| 707 if (byte == 'o') { | |
| 708 state = ParserState::kBeginObj; | |
| 709 inside_index = 1; | |
| 710 } else if (std::isdigit(byte)) { | |
| 711 objnum = gennum; | |
| 712 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); | |
| 713 start_pos = start_pos1; | |
| 714 start_pos1 = pos + i; | |
| 715 state = ParserState::kGenNum; | |
| 716 } else if (byte == 't') { | |
| 717 state = ParserState::kTrailer; | |
| 718 inside_index = 1; | |
| 719 } else if (!PDFCharIsWhitespace(byte)) { | |
| 720 --i; | |
| 721 state = ParserState::kDefault; | |
| 722 } | |
| 723 break; | |
| 724 | |
| 725 case ParserState::kBeginObj: | |
| 726 switch (inside_index) { | |
| 727 case 1: | |
| 728 if (byte != 'b') { | |
| 729 --i; | |
| 730 state = ParserState::kDefault; | |
| 731 } else { | |
| 732 inside_index++; | |
| 733 } | |
| 734 break; | |
| 735 case 2: | |
| 736 if (byte != 'j') { | |
| 737 --i; | |
| 738 state = ParserState::kDefault; | |
| 739 } else { | |
| 740 inside_index++; | |
| 741 } | |
| 742 break; | |
| 743 case 3: | |
| 744 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
| 745 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; | |
| 746 m_SortedOffset.insert(obj_pos); | |
| 747 last_obj = start_pos; | |
| 748 FX_FILESIZE obj_end = 0; | |
| 749 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( | |
| 750 m_pDocument, obj_pos, objnum, &obj_end); | |
| 751 if (CPDF_Stream* pStream = ToStream(pObject)) { | |
| 752 if (CPDF_Dictionary* pDict = pStream->GetDict()) { | |
| 753 if ((pDict->KeyExist("Type")) && | |
| 754 (pDict->GetStringFor("Type") == "XRef" && | |
| 755 pDict->KeyExist("Size"))) { | |
| 756 CPDF_Object* pRoot = pDict->GetObjectFor("Root"); | |
| 757 if (pRoot && pRoot->GetDict() && | |
| 758 pRoot->GetDict()->GetObjectFor("Pages")) { | |
| 759 if (m_pTrailer) | |
| 760 m_pTrailer->Release(); | |
| 761 m_pTrailer = ToDictionary(pDict->Clone()); | |
| 762 } | |
| 763 } | |
| 764 } | |
| 765 } | |
| 766 | |
| 767 FX_FILESIZE offset = 0; | |
| 768 m_pSyntax->RestorePos(obj_pos); | |
| 769 offset = m_pSyntax->FindTag("obj", 0); | |
| 770 if (offset == -1) | |
| 771 offset = 0; | |
| 772 else | |
| 773 offset += 3; | |
| 774 | |
| 775 FX_FILESIZE nLen = obj_end - obj_pos - offset; | |
| 776 if ((uint32_t)nLen > size - i) { | |
| 777 pos = obj_end + m_pSyntax->m_HeaderOffset; | |
| 778 bOverFlow = true; | |
| 779 } else { | |
| 780 i += (uint32_t)nLen; | |
| 781 } | |
| 782 | |
| 783 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && | |
| 784 m_ObjectInfo[objnum].pos) { | |
| 785 if (pObject) { | |
| 786 uint32_t oldgen = GetObjectGenNum(objnum); | |
| 787 m_ObjectInfo[objnum].pos = obj_pos; | |
| 788 m_ObjectInfo[objnum].gennum = gennum; | |
| 789 if (oldgen != gennum) | |
| 790 m_bVersionUpdated = true; | |
| 791 } | |
| 792 } else { | |
| 793 m_ObjectInfo[objnum].pos = obj_pos; | |
| 794 m_ObjectInfo[objnum].type = 1; | |
| 795 m_ObjectInfo[objnum].gennum = gennum; | |
| 796 } | |
| 797 | |
| 798 if (pObject) | |
| 799 pObject->Release(); | |
| 800 } | |
| 801 --i; | |
| 802 state = ParserState::kDefault; | |
| 803 break; | |
| 804 } | |
| 805 break; | |
| 806 | |
| 807 case ParserState::kTrailer: | |
| 808 if (inside_index == 7) { | |
| 809 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { | |
| 810 last_trailer = pos + i - 7; | |
| 811 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); | |
| 812 | |
| 813 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); | |
| 814 if (pObj) { | |
| 815 if (!pObj->IsDictionary() && !pObj->AsStream()) { | |
| 816 pObj->Release(); | |
| 817 } else { | |
| 818 CPDF_Stream* pStream = pObj->AsStream(); | |
| 819 if (CPDF_Dictionary* pTrailer = | |
| 820 pStream ? pStream->GetDict() : pObj->AsDictionary()) { | |
| 821 if (m_pTrailer) { | |
| 822 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root"); | |
| 823 CPDF_Reference* pRef = ToReference(pRoot); | |
| 824 if (!pRoot || | |
| 825 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && | |
| 826 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { | |
| 827 auto it = pTrailer->begin(); | |
| 828 while (it != pTrailer->end()) { | |
| 829 const CFX_ByteString& key = it->first; | |
| 830 CPDF_Object* pElement = it->second; | |
| 831 ++it; | |
| 832 uint32_t dwObjNum = | |
| 833 pElement ? pElement->GetObjNum() : 0; | |
| 834 if (dwObjNum) { | |
| 835 m_pTrailer->SetReferenceFor(key, m_pDocument, | |
| 836 dwObjNum); | |
| 837 } else { | |
| 838 m_pTrailer->SetFor(key, pElement->Clone()); | |
| 839 } | |
| 840 } | |
| 841 } | |
| 842 pObj->Release(); | |
| 843 } else { | |
| 844 if (pObj->IsStream()) { | |
| 845 m_pTrailer = ToDictionary(pTrailer->Clone()); | |
| 846 pObj->Release(); | |
| 847 } else { | |
| 848 m_pTrailer = pTrailer; | |
| 849 } | |
| 850 | |
| 851 FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); | |
| 852 CFX_ByteString strWord = m_pSyntax->GetKeyword(); | |
| 853 if (!strWord.Compare("startxref")) { | |
| 854 bool bNumber; | |
| 855 CFX_ByteString bsOffset = | |
| 856 m_pSyntax->GetNextWord(&bNumber); | |
| 857 if (bNumber) | |
| 858 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str()); | |
| 859 } | |
| 860 m_pSyntax->RestorePos(dwSavePos); | |
| 861 } | |
| 862 } else { | |
| 863 pObj->Release(); | |
| 864 } | |
| 865 } | |
| 866 } | |
| 867 } | |
| 868 --i; | |
| 869 state = ParserState::kDefault; | |
| 870 } else if (byte == "trailer"[inside_index]) { | |
| 871 inside_index++; | |
| 872 } else { | |
| 873 --i; | |
| 874 state = ParserState::kDefault; | |
| 875 } | |
| 876 break; | |
| 877 | |
| 878 case ParserState::kXref: | |
| 879 if (inside_index == 4) { | |
| 880 last_xref = pos + i - 4; | |
| 881 state = ParserState::kWhitespace; | |
| 882 } else if (byte == "xref"[inside_index]) { | |
| 883 inside_index++; | |
| 884 } else { | |
| 885 --i; | |
| 886 state = ParserState::kDefault; | |
| 887 } | |
| 888 break; | |
| 889 | |
| 890 case ParserState::kComment: | |
| 891 if (PDFCharIsLineEnding(byte)) | |
| 892 state = ParserState::kDefault; | |
| 893 break; | |
| 894 | |
| 895 case ParserState::kString: | |
| 896 if (byte == ')') { | |
| 897 if (depth > 0) | |
| 898 depth--; | |
| 899 } else if (byte == '(') { | |
| 900 depth++; | |
| 901 } | |
| 902 | |
| 903 if (!depth) | |
| 904 state = ParserState::kDefault; | |
| 905 break; | |
| 906 | |
| 907 case ParserState::kHexString: | |
| 908 if (byte == '>' || (byte == '<' && inside_index == 1)) | |
| 909 state = ParserState::kDefault; | |
| 910 inside_index = 0; | |
| 911 break; | |
| 912 | |
| 913 case ParserState::kEscapedString: | |
| 914 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { | |
| 915 --i; | |
| 916 state = ParserState::kDefault; | |
| 917 } | |
| 918 break; | |
| 919 | |
| 920 case ParserState::kEndObj: | |
| 921 if (PDFCharIsWhitespace(byte)) { | |
| 922 state = ParserState::kDefault; | |
| 923 } else if (byte == '%' || byte == '(' || byte == '<' || | |
| 924 byte == '\\') { | |
| 925 state = ParserState::kDefault; | |
| 926 --i; | |
| 927 } else if (inside_index == 6) { | |
| 928 state = ParserState::kDefault; | |
| 929 --i; | |
| 930 } else if (byte == "endobj"[inside_index]) { | |
| 931 inside_index++; | |
| 932 } | |
| 933 break; | |
| 934 } | |
| 935 | |
| 936 if (bOverFlow) { | |
| 937 size = 0; | |
| 938 break; | |
| 939 } | |
| 940 } | |
| 941 pos += size; | |
| 942 | |
| 943 // If the position has not changed at all or went backwards in a loop | |
| 944 // iteration, then break out to prevent infinite looping. | |
| 945 if (pos <= saved_pos) | |
| 946 break; | |
| 947 } | |
| 948 | |
| 949 if (last_xref != -1 && last_xref > last_obj) | |
| 950 last_trailer = last_xref; | |
| 951 else if (last_trailer == -1 || last_xref < last_obj) | |
| 952 last_trailer = m_pSyntax->m_FileLen; | |
| 953 | |
| 954 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); | |
| 955 return m_pTrailer && !m_ObjectInfo.empty(); | |
| 956 } | |
| 957 | |
| 958 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { | |
| 959 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); | |
| 960 if (!pObject) | |
| 961 return FALSE; | |
| 962 | |
| 963 if (m_pDocument) { | |
| 964 CPDF_Dictionary* pRootDict = m_pDocument->GetRoot(); | |
| 965 if (pRootDict && pRootDict->GetObjNum() == pObject->m_ObjNum) { | |
| 966 // If |pObject| has an objnum assigned then this will leak as Release() | |
| 967 // will early exit. | |
| 968 if (pObject->IsStream()) | |
| 969 pObject->Release(); | |
| 970 return FALSE; | |
| 971 } | |
| 972 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(pObject->m_ObjNum, | |
| 973 pObject)) { | |
| 974 return FALSE; | |
| 975 } | |
| 976 } | |
| 977 | |
| 978 CPDF_Stream* pStream = pObject->AsStream(); | |
| 979 if (!pStream) | |
| 980 return FALSE; | |
| 981 | |
| 982 CPDF_Dictionary* pDict = pStream->GetDict(); | |
| 983 *pos = pDict->GetIntegerFor("Prev"); | |
| 984 int32_t size = pDict->GetIntegerFor("Size"); | |
| 985 if (size < 0) { | |
| 986 pStream->Release(); | |
| 987 return FALSE; | |
| 988 } | |
| 989 | |
| 990 CPDF_Dictionary* pNewTrailer = ToDictionary(pDict->Clone()); | |
| 991 if (bMainXRef) { | |
| 992 m_pTrailer = pNewTrailer; | |
| 993 ShrinkObjectMap(size); | |
| 994 for (auto& it : m_ObjectInfo) | |
| 995 it.second.type = 0; | |
| 996 } else { | |
| 997 m_Trailers.Add(pNewTrailer); | |
| 998 } | |
| 999 | |
| 1000 std::vector<std::pair<int32_t, int32_t>> arrIndex; | |
| 1001 CPDF_Array* pArray = pDict->GetArrayFor("Index"); | |
| 1002 if (pArray) { | |
| 1003 for (size_t i = 0; i < pArray->GetCount() / 2; i++) { | |
| 1004 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2); | |
| 1005 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1); | |
| 1006 | |
| 1007 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { | |
| 1008 int nStartNum = pStartNumObj->GetInteger(); | |
| 1009 int nCount = pCountObj->GetInteger(); | |
| 1010 if (nStartNum >= 0 && nCount > 0) | |
| 1011 arrIndex.push_back(std::make_pair(nStartNum, nCount)); | |
| 1012 } | |
| 1013 } | |
| 1014 } | |
| 1015 | |
| 1016 if (arrIndex.size() == 0) | |
| 1017 arrIndex.push_back(std::make_pair(0, size)); | |
| 1018 | |
| 1019 pArray = pDict->GetArrayFor("W"); | |
| 1020 if (!pArray) { | |
| 1021 pStream->Release(); | |
| 1022 return FALSE; | |
| 1023 } | |
| 1024 | |
| 1025 CFX_ArrayTemplate<uint32_t> WidthArray; | |
| 1026 FX_SAFE_UINT32 dwAccWidth = 0; | |
| 1027 for (size_t i = 0; i < pArray->GetCount(); ++i) { | |
| 1028 WidthArray.Add(pArray->GetIntegerAt(i)); | |
| 1029 dwAccWidth += WidthArray[i]; | |
| 1030 } | |
| 1031 | |
| 1032 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { | |
| 1033 pStream->Release(); | |
| 1034 return FALSE; | |
| 1035 } | |
| 1036 | |
| 1037 uint32_t totalWidth = dwAccWidth.ValueOrDie(); | |
| 1038 CPDF_StreamAcc acc; | |
| 1039 acc.LoadAllData(pStream); | |
| 1040 | |
| 1041 const uint8_t* pData = acc.GetData(); | |
| 1042 uint32_t dwTotalSize = acc.GetSize(); | |
| 1043 uint32_t segindex = 0; | |
| 1044 for (uint32_t i = 0; i < arrIndex.size(); i++) { | |
| 1045 int32_t startnum = arrIndex[i].first; | |
| 1046 if (startnum < 0) | |
| 1047 continue; | |
| 1048 | |
| 1049 m_dwXrefStartObjNum = | |
| 1050 pdfium::base::checked_cast<uint32_t, int32_t>(startnum); | |
| 1051 uint32_t count = | |
| 1052 pdfium::base::checked_cast<uint32_t, int32_t>(arrIndex[i].second); | |
| 1053 FX_SAFE_UINT32 dwCaculatedSize = segindex; | |
| 1054 dwCaculatedSize += count; | |
| 1055 dwCaculatedSize *= totalWidth; | |
| 1056 if (!dwCaculatedSize.IsValid() || | |
| 1057 dwCaculatedSize.ValueOrDie() > dwTotalSize) { | |
| 1058 continue; | |
| 1059 } | |
| 1060 | |
| 1061 const uint8_t* segstart = pData + segindex * totalWidth; | |
| 1062 FX_SAFE_UINT32 dwMaxObjNum = startnum; | |
| 1063 dwMaxObjNum += count; | |
| 1064 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; | |
| 1065 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) | |
| 1066 continue; | |
| 1067 | |
| 1068 for (uint32_t j = 0; j < count; j++) { | |
| 1069 int32_t type = 1; | |
| 1070 const uint8_t* entrystart = segstart + j * totalWidth; | |
| 1071 if (WidthArray[0]) | |
| 1072 type = GetVarInt(entrystart, WidthArray[0]); | |
| 1073 | |
| 1074 if (GetObjectType(startnum + j) == 255) { | |
| 1075 FX_FILESIZE offset = | |
| 1076 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
| 1077 m_ObjectInfo[startnum + j].pos = offset; | |
| 1078 m_SortedOffset.insert(offset); | |
| 1079 continue; | |
| 1080 } | |
| 1081 | |
| 1082 if (GetObjectType(startnum + j)) | |
| 1083 continue; | |
| 1084 | |
| 1085 m_ObjectInfo[startnum + j].type = type; | |
| 1086 if (type == 0) { | |
| 1087 m_ObjectInfo[startnum + j].pos = 0; | |
| 1088 } else { | |
| 1089 FX_FILESIZE offset = | |
| 1090 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); | |
| 1091 m_ObjectInfo[startnum + j].pos = offset; | |
| 1092 if (type == 1) { | |
| 1093 m_SortedOffset.insert(offset); | |
| 1094 } else { | |
| 1095 if (offset < 0 || !IsValidObjectNumber(offset)) { | |
| 1096 pStream->Release(); | |
| 1097 return FALSE; | |
| 1098 } | |
| 1099 m_ObjectInfo[offset].type = 255; | |
| 1100 } | |
| 1101 } | |
| 1102 } | |
| 1103 segindex += count; | |
| 1104 } | |
| 1105 pStream->Release(); | |
| 1106 return TRUE; | |
| 1107 } | |
| 1108 | |
| 1109 CPDF_Array* CPDF_Parser::GetIDArray() { | |
| 1110 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetObjectFor("ID") : nullptr; | |
| 1111 if (!pID) | |
| 1112 return nullptr; | |
| 1113 | |
| 1114 if (CPDF_Reference* pRef = pID->AsReference()) { | |
| 1115 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); | |
| 1116 m_pTrailer->SetFor("ID", pID); | |
| 1117 } | |
| 1118 return ToArray(pID); | |
| 1119 } | |
| 1120 | |
| 1121 uint32_t CPDF_Parser::GetRootObjNum() { | |
| 1122 CPDF_Reference* pRef = | |
| 1123 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr); | |
| 1124 return pRef ? pRef->GetRefObjNum() : 0; | |
| 1125 } | |
| 1126 | |
| 1127 uint32_t CPDF_Parser::GetInfoObjNum() { | |
| 1128 CPDF_Reference* pRef = | |
| 1129 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr); | |
| 1130 return pRef ? pRef->GetRefObjNum() : 0; | |
| 1131 } | |
| 1132 | |
| 1133 CPDF_Object* CPDF_Parser::ParseIndirectObject( | |
| 1134 CPDF_IndirectObjectHolder* pObjList, | |
| 1135 uint32_t objnum) { | |
| 1136 if (!IsValidObjectNumber(objnum)) | |
| 1137 return nullptr; | |
| 1138 | |
| 1139 // Prevent circular parsing the same object. | |
| 1140 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) | |
| 1141 return nullptr; | |
| 1142 | |
| 1143 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum); | |
| 1144 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { | |
| 1145 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
| 1146 if (pos <= 0) | |
| 1147 return nullptr; | |
| 1148 return ParseIndirectObjectAt(pObjList, pos, objnum); | |
| 1149 } | |
| 1150 if (GetObjectType(objnum) != 2) | |
| 1151 return nullptr; | |
| 1152 | |
| 1153 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
| 1154 if (!pObjStream) | |
| 1155 return nullptr; | |
| 1156 | |
| 1157 ScopedFileStream file(FX_CreateMemoryStream( | |
| 1158 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); | |
| 1159 CPDF_SyntaxParser syntax; | |
| 1160 syntax.InitParser(file.get(), 0); | |
| 1161 const int32_t offset = GetStreamFirst(pObjStream); | |
| 1162 | |
| 1163 // Read object numbers from |pObjStream| into a cache. | |
| 1164 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { | |
| 1165 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { | |
| 1166 uint32_t thisnum = syntax.GetDirectNum(); | |
| 1167 uint32_t thisoff = syntax.GetDirectNum(); | |
| 1168 m_ObjCache[pObjStream][thisnum] = thisoff; | |
| 1169 } | |
| 1170 } | |
| 1171 | |
| 1172 const auto it = m_ObjCache[pObjStream].find(objnum); | |
| 1173 if (it == m_ObjCache[pObjStream].end()) | |
| 1174 return nullptr; | |
| 1175 | |
| 1176 syntax.RestorePos(offset + it->second); | |
| 1177 return syntax.GetObject(pObjList, 0, 0, true); | |
| 1178 } | |
| 1179 | |
| 1180 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) { | |
| 1181 auto it = m_ObjectStreamMap.find(objnum); | |
| 1182 if (it != m_ObjectStreamMap.end()) | |
| 1183 return it->second.get(); | |
| 1184 | |
| 1185 if (!m_pDocument) | |
| 1186 return nullptr; | |
| 1187 | |
| 1188 const CPDF_Stream* pStream = | |
| 1189 ToStream(m_pDocument->GetOrParseIndirectObject(objnum)); | |
| 1190 if (!pStream) | |
| 1191 return nullptr; | |
| 1192 | |
| 1193 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; | |
| 1194 pStreamAcc->LoadAllData(pStream); | |
| 1195 m_ObjectStreamMap[objnum].reset(pStreamAcc); | |
| 1196 return pStreamAcc; | |
| 1197 } | |
| 1198 | |
| 1199 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const { | |
| 1200 if (!IsValidObjectNumber(objnum)) | |
| 1201 return 0; | |
| 1202 | |
| 1203 if (GetObjectType(objnum) == 2) | |
| 1204 objnum = GetObjectPositionOrZero(objnum); | |
| 1205 | |
| 1206 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) | |
| 1207 return 0; | |
| 1208 | |
| 1209 FX_FILESIZE offset = GetObjectPositionOrZero(objnum); | |
| 1210 if (offset == 0) | |
| 1211 return 0; | |
| 1212 | |
| 1213 auto it = m_SortedOffset.find(offset); | |
| 1214 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) | |
| 1215 return 0; | |
| 1216 | |
| 1217 return *it - offset; | |
| 1218 } | |
| 1219 | |
| 1220 void CPDF_Parser::GetIndirectBinary(uint32_t objnum, | |
| 1221 uint8_t*& pBuffer, | |
| 1222 uint32_t& size) { | |
| 1223 pBuffer = nullptr; | |
| 1224 size = 0; | |
| 1225 if (!IsValidObjectNumber(objnum)) | |
| 1226 return; | |
| 1227 | |
| 1228 if (GetObjectType(objnum) == 2) { | |
| 1229 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); | |
| 1230 if (!pObjStream) | |
| 1231 return; | |
| 1232 | |
| 1233 int32_t offset = GetStreamFirst(pObjStream); | |
| 1234 const uint8_t* pData = pObjStream->GetData(); | |
| 1235 uint32_t totalsize = pObjStream->GetSize(); | |
| 1236 ScopedFileStream file( | |
| 1237 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); | |
| 1238 | |
| 1239 CPDF_SyntaxParser syntax; | |
| 1240 syntax.InitParser(file.get(), 0); | |
| 1241 for (int i = GetStreamNCount(pObjStream); i > 0; --i) { | |
| 1242 uint32_t thisnum = syntax.GetDirectNum(); | |
| 1243 uint32_t thisoff = syntax.GetDirectNum(); | |
| 1244 if (thisnum != objnum) | |
| 1245 continue; | |
| 1246 | |
| 1247 if (i == 1) { | |
| 1248 size = totalsize - (thisoff + offset); | |
| 1249 } else { | |
| 1250 syntax.GetDirectNum(); // Skip nextnum. | |
| 1251 uint32_t nextoff = syntax.GetDirectNum(); | |
| 1252 size = nextoff - thisoff; | |
| 1253 } | |
| 1254 | |
| 1255 pBuffer = FX_Alloc(uint8_t, size); | |
| 1256 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); | |
| 1257 return; | |
| 1258 } | |
| 1259 return; | |
| 1260 } | |
| 1261 | |
| 1262 if (GetObjectType(objnum) != 1) | |
| 1263 return; | |
| 1264 | |
| 1265 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; | |
| 1266 if (pos == 0) | |
| 1267 return; | |
| 1268 | |
| 1269 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 1270 m_pSyntax->RestorePos(pos); | |
| 1271 | |
| 1272 bool bIsNumber; | |
| 1273 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1274 if (!bIsNumber) { | |
| 1275 m_pSyntax->RestorePos(SavedPos); | |
| 1276 return; | |
| 1277 } | |
| 1278 | |
| 1279 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
| 1280 if (parser_objnum && parser_objnum != objnum) { | |
| 1281 m_pSyntax->RestorePos(SavedPos); | |
| 1282 return; | |
| 1283 } | |
| 1284 | |
| 1285 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1286 if (!bIsNumber) { | |
| 1287 m_pSyntax->RestorePos(SavedPos); | |
| 1288 return; | |
| 1289 } | |
| 1290 | |
| 1291 if (m_pSyntax->GetKeyword() != "obj") { | |
| 1292 m_pSyntax->RestorePos(SavedPos); | |
| 1293 return; | |
| 1294 } | |
| 1295 | |
| 1296 auto it = m_SortedOffset.find(pos); | |
| 1297 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { | |
| 1298 m_pSyntax->RestorePos(SavedPos); | |
| 1299 return; | |
| 1300 } | |
| 1301 | |
| 1302 FX_FILESIZE nextoff = *it; | |
| 1303 FX_BOOL bNextOffValid = FALSE; | |
| 1304 if (nextoff != pos) { | |
| 1305 m_pSyntax->RestorePos(nextoff); | |
| 1306 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1307 if (word == "xref") { | |
| 1308 bNextOffValid = TRUE; | |
| 1309 } else if (bIsNumber) { | |
| 1310 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1311 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { | |
| 1312 bNextOffValid = TRUE; | |
| 1313 } | |
| 1314 } | |
| 1315 } | |
| 1316 | |
| 1317 if (!bNextOffValid) { | |
| 1318 m_pSyntax->RestorePos(pos); | |
| 1319 while (1) { | |
| 1320 if (m_pSyntax->GetKeyword() == "endobj") | |
| 1321 break; | |
| 1322 | |
| 1323 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) | |
| 1324 break; | |
| 1325 } | |
| 1326 nextoff = m_pSyntax->SavePos(); | |
| 1327 } | |
| 1328 | |
| 1329 size = (uint32_t)(nextoff - pos); | |
| 1330 pBuffer = FX_Alloc(uint8_t, size); | |
| 1331 m_pSyntax->RestorePos(pos); | |
| 1332 m_pSyntax->ReadBlock(pBuffer, size); | |
| 1333 m_pSyntax->RestorePos(SavedPos); | |
| 1334 } | |
| 1335 | |
| 1336 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( | |
| 1337 CPDF_IndirectObjectHolder* pObjList, | |
| 1338 FX_FILESIZE pos, | |
| 1339 uint32_t objnum) { | |
| 1340 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 1341 m_pSyntax->RestorePos(pos); | |
| 1342 bool bIsNumber; | |
| 1343 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1344 if (!bIsNumber) { | |
| 1345 m_pSyntax->RestorePos(SavedPos); | |
| 1346 return nullptr; | |
| 1347 } | |
| 1348 | |
| 1349 FX_FILESIZE objOffset = m_pSyntax->SavePos(); | |
| 1350 objOffset -= word.GetLength(); | |
| 1351 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
| 1352 if (objnum && parser_objnum != objnum) { | |
| 1353 m_pSyntax->RestorePos(SavedPos); | |
| 1354 return nullptr; | |
| 1355 } | |
| 1356 | |
| 1357 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1358 if (!bIsNumber) { | |
| 1359 m_pSyntax->RestorePos(SavedPos); | |
| 1360 return nullptr; | |
| 1361 } | |
| 1362 | |
| 1363 uint32_t parser_gennum = FXSYS_atoui(word.c_str()); | |
| 1364 if (m_pSyntax->GetKeyword() != "obj") { | |
| 1365 m_pSyntax->RestorePos(SavedPos); | |
| 1366 return nullptr; | |
| 1367 } | |
| 1368 | |
| 1369 CPDF_Object* pObj = | |
| 1370 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); | |
| 1371 m_pSyntax->SavePos(); | |
| 1372 | |
| 1373 CFX_ByteString bsWord = m_pSyntax->GetKeyword(); | |
| 1374 if (bsWord == "endobj") | |
| 1375 m_pSyntax->SavePos(); | |
| 1376 | |
| 1377 m_pSyntax->RestorePos(SavedPos); | |
| 1378 if (pObj) { | |
| 1379 if (!objnum) | |
| 1380 pObj->m_ObjNum = parser_objnum; | |
| 1381 pObj->m_GenNum = parser_gennum; | |
| 1382 } | |
| 1383 return pObj; | |
| 1384 } | |
| 1385 | |
| 1386 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( | |
| 1387 CPDF_IndirectObjectHolder* pObjList, | |
| 1388 FX_FILESIZE pos, | |
| 1389 uint32_t objnum, | |
| 1390 FX_FILESIZE* pResultPos) { | |
| 1391 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 1392 m_pSyntax->RestorePos(pos); | |
| 1393 | |
| 1394 bool bIsNumber; | |
| 1395 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1396 if (!bIsNumber) { | |
| 1397 m_pSyntax->RestorePos(SavedPos); | |
| 1398 return nullptr; | |
| 1399 } | |
| 1400 | |
| 1401 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); | |
| 1402 if (objnum && parser_objnum != objnum) { | |
| 1403 m_pSyntax->RestorePos(SavedPos); | |
| 1404 return nullptr; | |
| 1405 } | |
| 1406 | |
| 1407 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1408 if (!bIsNumber) { | |
| 1409 m_pSyntax->RestorePos(SavedPos); | |
| 1410 return nullptr; | |
| 1411 } | |
| 1412 | |
| 1413 uint32_t gennum = FXSYS_atoui(word.c_str()); | |
| 1414 if (m_pSyntax->GetKeyword() != "obj") { | |
| 1415 m_pSyntax->RestorePos(SavedPos); | |
| 1416 return nullptr; | |
| 1417 } | |
| 1418 | |
| 1419 CPDF_Object* pObj = m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum); | |
| 1420 if (pResultPos) | |
| 1421 *pResultPos = m_pSyntax->m_Pos; | |
| 1422 | |
| 1423 m_pSyntax->RestorePos(SavedPos); | |
| 1424 return pObj; | |
| 1425 } | |
| 1426 | |
| 1427 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { | |
| 1428 if (m_pSyntax->GetKeyword() != "trailer") | |
| 1429 return nullptr; | |
| 1430 | |
| 1431 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( | |
| 1432 m_pSyntax->GetObject(m_pDocument, 0, 0, true)); | |
| 1433 if (!ToDictionary(pObj.get())) | |
| 1434 return nullptr; | |
| 1435 return pObj.release()->AsDictionary(); | |
| 1436 } | |
| 1437 | |
| 1438 uint32_t CPDF_Parser::GetPermissions() const { | |
| 1439 if (!m_pSecurityHandler) | |
| 1440 return 0xFFFFFFFF; | |
| 1441 | |
| 1442 uint32_t dwPermission = m_pSecurityHandler->GetPermissions(); | |
| 1443 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") { | |
| 1444 // See PDF Reference 1.7, page 123, table 3.20. | |
| 1445 dwPermission &= 0xFFFFFFFC; | |
| 1446 dwPermission |= 0xFFFFF0C0; | |
| 1447 } | |
| 1448 return dwPermission; | |
| 1449 } | |
| 1450 | |
| 1451 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, | |
| 1452 uint32_t offset) { | |
| 1453 m_pSyntax->InitParser(pFileAccess, offset); | |
| 1454 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); | |
| 1455 | |
| 1456 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); | |
| 1457 bool bIsNumber; | |
| 1458 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1459 if (!bIsNumber) | |
| 1460 return FALSE; | |
| 1461 | |
| 1462 uint32_t objnum = FXSYS_atoui(word.c_str()); | |
| 1463 word = m_pSyntax->GetNextWord(&bIsNumber); | |
| 1464 if (!bIsNumber) | |
| 1465 return FALSE; | |
| 1466 | |
| 1467 uint32_t gennum = FXSYS_atoui(word.c_str()); | |
| 1468 if (m_pSyntax->GetKeyword() != "obj") { | |
| 1469 m_pSyntax->RestorePos(SavedPos); | |
| 1470 return FALSE; | |
| 1471 } | |
| 1472 | |
| 1473 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); | |
| 1474 if (!m_pLinearized) | |
| 1475 return FALSE; | |
| 1476 | |
| 1477 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); | |
| 1478 if (pDict && pDict->GetObjectFor("Linearized")) { | |
| 1479 m_pSyntax->GetNextWord(nullptr); | |
| 1480 | |
| 1481 CPDF_Object* pLen = pDict->GetObjectFor("L"); | |
| 1482 if (!pLen) { | |
| 1483 m_pLinearized->Release(); | |
| 1484 m_pLinearized = nullptr; | |
| 1485 return FALSE; | |
| 1486 } | |
| 1487 | |
| 1488 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) | |
| 1489 return FALSE; | |
| 1490 | |
| 1491 if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) | |
| 1492 m_dwFirstPageNo = pNo->GetInteger(); | |
| 1493 | |
| 1494 if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T"))) | |
| 1495 m_LastXRefOffset = pTable->GetInteger(); | |
| 1496 | |
| 1497 return TRUE; | |
| 1498 } | |
| 1499 m_pLinearized->Release(); | |
| 1500 m_pLinearized = nullptr; | |
| 1501 return FALSE; | |
| 1502 } | |
| 1503 | |
| 1504 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(IFX_FileRead* pFileAccess, | |
| 1505 CPDF_Document* pDocument) { | |
| 1506 ASSERT(!m_bHasParsed); | |
| 1507 | |
| 1508 m_bXRefStream = FALSE; | |
| 1509 m_LastXRefOffset = 0; | |
| 1510 m_bOwnFileRead = true; | |
| 1511 | |
| 1512 int32_t offset = GetHeaderOffset(pFileAccess); | |
| 1513 if (offset == -1) | |
| 1514 return FORMAT_ERROR; | |
| 1515 | |
| 1516 if (!IsLinearizedFile(pFileAccess, offset)) { | |
| 1517 m_pSyntax->m_pFileAccess = nullptr; | |
| 1518 return StartParse(pFileAccess, std::move(pDocument)); | |
| 1519 } | |
| 1520 m_bHasParsed = true; | |
| 1521 m_pDocument = pDocument; | |
| 1522 | |
| 1523 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); | |
| 1524 | |
| 1525 FX_BOOL bXRefRebuilt = FALSE; | |
| 1526 FX_BOOL bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE); | |
| 1527 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { | |
| 1528 if (!RebuildCrossRef()) | |
| 1529 return FORMAT_ERROR; | |
| 1530 | |
| 1531 bXRefRebuilt = TRUE; | |
| 1532 m_LastXRefOffset = 0; | |
| 1533 } | |
| 1534 | |
| 1535 if (bLoadV4) { | |
| 1536 m_pTrailer = LoadTrailerV4(); | |
| 1537 if (!m_pTrailer) | |
| 1538 return SUCCESS; | |
| 1539 | |
| 1540 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); | |
| 1541 if (xrefsize > 0) | |
| 1542 ShrinkObjectMap(xrefsize); | |
| 1543 } | |
| 1544 | |
| 1545 Error eRet = SetEncryptHandler(); | |
| 1546 if (eRet != SUCCESS) | |
| 1547 return eRet; | |
| 1548 | |
| 1549 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); | |
| 1550 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { | |
| 1551 if (bXRefRebuilt) | |
| 1552 return FORMAT_ERROR; | |
| 1553 | |
| 1554 ReleaseEncryptHandler(); | |
| 1555 if (!RebuildCrossRef()) | |
| 1556 return FORMAT_ERROR; | |
| 1557 | |
| 1558 eRet = SetEncryptHandler(); | |
| 1559 if (eRet != SUCCESS) | |
| 1560 return eRet; | |
| 1561 | |
| 1562 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); | |
| 1563 if (!m_pDocument->GetRoot()) | |
| 1564 return FORMAT_ERROR; | |
| 1565 } | |
| 1566 | |
| 1567 if (GetRootObjNum() == 0) { | |
| 1568 ReleaseEncryptHandler(); | |
| 1569 if (!RebuildCrossRef() || GetRootObjNum() == 0) | |
| 1570 return FORMAT_ERROR; | |
| 1571 | |
| 1572 eRet = SetEncryptHandler(); | |
| 1573 if (eRet != SUCCESS) | |
| 1574 return eRet; | |
| 1575 } | |
| 1576 | |
| 1577 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { | |
| 1578 if (CPDF_Reference* pMetadata = | |
| 1579 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"))) | |
| 1580 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); | |
| 1581 } | |
| 1582 return SUCCESS; | |
| 1583 } | |
| 1584 | |
| 1585 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { | |
| 1586 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
| 1587 return FALSE; | |
| 1588 | |
| 1589 std::set<FX_FILESIZE> seen_xrefpos; | |
| 1590 while (xrefpos) { | |
| 1591 seen_xrefpos.insert(xrefpos); | |
| 1592 if (!LoadCrossRefV5(&xrefpos, FALSE)) | |
| 1593 return FALSE; | |
| 1594 | |
| 1595 // Check for circular references. | |
| 1596 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) | |
| 1597 return FALSE; | |
| 1598 } | |
| 1599 m_ObjectStreamMap.clear(); | |
| 1600 m_bXRefStream = TRUE; | |
| 1601 return TRUE; | |
| 1602 } | |
| 1603 | |
| 1604 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { | |
| 1605 uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; | |
| 1606 m_pSyntax->m_MetadataObjnum = 0; | |
| 1607 if (m_pTrailer) { | |
| 1608 m_pTrailer->Release(); | |
| 1609 m_pTrailer = nullptr; | |
| 1610 } | |
| 1611 | |
| 1612 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); | |
| 1613 uint8_t ch = 0; | |
| 1614 uint32_t dwCount = 0; | |
| 1615 m_pSyntax->GetNextChar(ch); | |
| 1616 while (PDFCharIsWhitespace(ch)) { | |
| 1617 ++dwCount; | |
| 1618 if (m_pSyntax->m_FileLen >= | |
| 1619 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { | |
| 1620 break; | |
| 1621 } | |
| 1622 m_pSyntax->GetNextChar(ch); | |
| 1623 } | |
| 1624 m_LastXRefOffset += dwCount; | |
| 1625 m_ObjectStreamMap.clear(); | |
| 1626 m_ObjCache.clear(); | |
| 1627 | |
| 1628 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && | |
| 1629 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { | |
| 1630 m_LastXRefOffset = 0; | |
| 1631 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
| 1632 return FORMAT_ERROR; | |
| 1633 } | |
| 1634 | |
| 1635 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; | |
| 1636 return SUCCESS; | |
| 1637 } | |
| OLD | NEW |