Index: core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp |
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1871467458ecd9822693d7c6cb2b75b4ebf8cc3f |
--- /dev/null |
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp |
@@ -0,0 +1,1643 @@ |
+// Copyright 2016 PDFium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
+ |
+#include "core/include/fpdfapi/cpdf_parser.h" |
+ |
+#include "core/include/fpdfapi/cpdf_document.h" |
+#include "core/include/fpdfapi/fpdf_parser.h" |
+#include "core/include/fxcrt/fx_ext.h" |
+#include "core/include/fxcrt/fx_safe_types.h" |
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" |
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" |
+#include "third_party/base/stl_util.h" |
+ |
+namespace { |
+ |
+// A limit on the size of the xref table. Theoretical limits are higher, but |
+// this may be large enough in practice. |
+const int32_t kMaxXRefSize = 1048576; |
+ |
+// A limit on the maximum object number in the xref table. Theoretical limits |
+// are higher, but this may be large enough in practice. |
+const FX_DWORD kMaxObjectNumber = 1048576; |
+ |
+FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { |
+ FX_DWORD result = 0; |
+ for (int32_t i = 0; i < n; ++i) |
+ result = result * 256 + p[i]; |
+ return result; |
+} |
+ |
+int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { |
+ return pObjStream->GetDict()->GetIntegerBy("N"); |
+} |
+ |
+int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { |
+ return pObjStream->GetDict()->GetIntegerBy("First"); |
+} |
+ |
+} // namespace |
+ |
+CPDF_Parser::CPDF_Parser() |
+ : m_pDocument(nullptr), |
+ m_bOwnFileRead(true), |
+ m_FileVersion(0), |
+ m_pTrailer(nullptr), |
+ m_pEncryptDict(nullptr), |
+ m_pLinearized(nullptr), |
+ m_dwFirstPageNo(0), |
+ m_dwXrefStartObjNum(0) { |
+ m_pSyntax.reset(new CPDF_SyntaxParser); |
+} |
+ |
+CPDF_Parser::~CPDF_Parser() { |
+ CloseParser(); |
+} |
+ |
+FX_DWORD CPDF_Parser::GetLastObjNum() const { |
+ return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; |
+} |
+ |
+bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { |
+ return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; |
+} |
+ |
+FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { |
+ auto it = m_ObjectInfo.find(objnum); |
+ return it != m_ObjectInfo.end() ? it->second.pos : 0; |
+} |
+ |
+uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const { |
+ ASSERT(IsValidObjectNumber(objnum)); |
+ auto it = m_ObjectInfo.find(objnum); |
+ return it != m_ObjectInfo.end() ? it->second.type : 0; |
+} |
+ |
+uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const { |
+ ASSERT(IsValidObjectNumber(objnum)); |
+ auto it = m_ObjectInfo.find(objnum); |
+ return it != m_ObjectInfo.end() ? it->second.gennum : 0; |
+} |
+ |
+bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { |
+ uint8_t type = GetObjectType(objnum); |
+ return type == 0 || type == 255; |
+} |
+ |
+void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { |
+ m_pEncryptDict = pDict; |
+} |
+ |
+CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { |
+ return m_pSyntax->m_pCryptoHandler.get(); |
+} |
+ |
+IFX_FileRead* CPDF_Parser::GetFileAccess() const { |
+ return m_pSyntax->m_pFileAccess; |
+} |
+ |
+void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { |
+ if (objnum == 0) { |
+ m_ObjectInfo.clear(); |
+ return; |
+ } |
+ |
+ auto it = m_ObjectInfo.lower_bound(objnum); |
+ while (it != m_ObjectInfo.end()) { |
+ auto saved_it = it++; |
+ m_ObjectInfo.erase(saved_it); |
+ } |
+ |
+ if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) |
+ m_ObjectInfo[objnum - 1].pos = 0; |
+} |
+ |
+void CPDF_Parser::CloseParser() { |
+ m_bVersionUpdated = FALSE; |
+ delete m_pDocument; |
+ m_pDocument = nullptr; |
+ |
+ if (m_pTrailer) { |
+ m_pTrailer->Release(); |
+ m_pTrailer = nullptr; |
+ } |
+ ReleaseEncryptHandler(); |
+ SetEncryptDictionary(nullptr); |
+ |
+ if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { |
+ m_pSyntax->m_pFileAccess->Release(); |
+ m_pSyntax->m_pFileAccess = nullptr; |
+ } |
+ |
+ m_ObjectStreamMap.clear(); |
+ m_ObjCache.clear(); |
+ m_SortedOffset.clear(); |
+ m_ObjectInfo.clear(); |
+ |
+ int32_t iLen = m_Trailers.GetSize(); |
+ for (int32_t i = 0; i < iLen; ++i) { |
+ if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) |
+ trailer->Release(); |
+ } |
+ m_Trailers.RemoveAll(); |
+ |
+ if (m_pLinearized) { |
+ m_pLinearized->Release(); |
+ m_pLinearized = nullptr; |
+ } |
+} |
+ |
+CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) { |
+ CloseParser(); |
+ |
+ m_bXRefStream = FALSE; |
+ m_LastXRefOffset = 0; |
+ m_bOwnFileRead = true; |
+ |
+ int32_t offset = GetHeaderOffset(pFileAccess); |
+ if (offset == -1) { |
+ if (pFileAccess) |
+ pFileAccess->Release(); |
+ return FORMAT_ERROR; |
+ } |
+ m_pSyntax->InitParser(pFileAccess, offset); |
+ |
+ uint8_t ch; |
+ if (!m_pSyntax->GetCharAt(5, ch)) |
+ return FORMAT_ERROR; |
+ if (std::isdigit(ch)) |
+ m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; |
+ |
+ if (!m_pSyntax->GetCharAt(7, ch)) |
+ return FORMAT_ERROR; |
+ if (std::isdigit(ch)) |
+ m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
+ |
+ if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) |
+ return FORMAT_ERROR; |
+ |
+ m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); |
+ m_pDocument = new CPDF_Document(this); |
+ |
+ FX_BOOL bXRefRebuilt = FALSE; |
+ if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { |
+ m_SortedOffset.insert(m_pSyntax->SavePos()); |
+ m_pSyntax->GetKeyword(); |
+ |
+ bool bNumber; |
+ CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); |
+ if (!bNumber) |
+ return FORMAT_ERROR; |
+ |
+ m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); |
+ if (!LoadAllCrossRefV4(m_LastXRefOffset) && |
+ !LoadAllCrossRefV5(m_LastXRefOffset)) { |
+ if (!RebuildCrossRef()) |
+ return FORMAT_ERROR; |
+ |
+ bXRefRebuilt = TRUE; |
+ m_LastXRefOffset = 0; |
+ } |
+ } else { |
+ if (!RebuildCrossRef()) |
+ return FORMAT_ERROR; |
+ |
+ bXRefRebuilt = TRUE; |
+ } |
+ Error eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ |
+ m_pDocument->LoadDoc(); |
+ if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { |
+ if (bXRefRebuilt) |
+ return FORMAT_ERROR; |
+ |
+ ReleaseEncryptHandler(); |
+ if (!RebuildCrossRef()) |
+ return FORMAT_ERROR; |
+ |
+ eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ |
+ m_pDocument->LoadDoc(); |
+ if (!m_pDocument->GetRoot()) |
+ return FORMAT_ERROR; |
+ } |
+ if (GetRootObjNum() == 0) { |
+ ReleaseEncryptHandler(); |
+ if (!RebuildCrossRef() || GetRootObjNum() == 0) |
+ return FORMAT_ERROR; |
+ |
+ eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ } |
+ if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { |
+ CPDF_Reference* pMetadata = |
+ ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); |
+ if (pMetadata) |
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); |
+ } |
+ return SUCCESS; |
+} |
+CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { |
+ ReleaseEncryptHandler(); |
+ SetEncryptDictionary(nullptr); |
+ |
+ if (!m_pTrailer) |
+ return FORMAT_ERROR; |
+ |
+ CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); |
+ if (pEncryptObj) { |
+ if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { |
+ SetEncryptDictionary(pEncryptDict); |
+ } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { |
+ pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); |
+ if (pEncryptObj) |
+ SetEncryptDictionary(pEncryptObj->GetDict()); |
+ } |
+ } |
+ |
+ if (m_pEncryptDict) { |
+ CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter"); |
+ std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler; |
+ Error err = HANDLER_ERROR; |
+ if (filter == "Standard") { |
+ pSecurityHandler.reset(new CPDF_StandardSecurityHandler); |
+ err = PASSWORD_ERROR; |
+ } |
+ if (!pSecurityHandler) |
+ return HANDLER_ERROR; |
+ |
+ if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) |
+ return err; |
+ |
+ m_pSecurityHandler = std::move(pSecurityHandler); |
+ std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( |
+ m_pSecurityHandler->CreateCryptoHandler()); |
+ if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) |
+ return HANDLER_ERROR; |
+ m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); |
+ } |
+ return SUCCESS; |
+} |
+ |
+void CPDF_Parser::ReleaseEncryptHandler() { |
+ m_pSyntax->m_pCryptoHandler.reset(); |
+ m_pSecurityHandler.reset(); |
+} |
+ |
+FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { |
+ if (!IsValidObjectNumber(objnum)) |
+ return 0; |
+ |
+ if (GetObjectType(objnum) == 1) |
+ return GetObjectPositionOrZero(objnum); |
+ |
+ if (GetObjectType(objnum) == 2) { |
+ FX_FILESIZE pos = GetObjectPositionOrZero(objnum); |
+ return GetObjectPositionOrZero(pos); |
+ } |
+ return 0; |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { |
+ if (!LoadCrossRefV4(xrefpos, 0, TRUE)) |
+ return FALSE; |
+ |
+ m_pTrailer = LoadTrailerV4(); |
+ if (!m_pTrailer) |
+ return FALSE; |
+ |
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); |
+ if (xrefsize > 0 && xrefsize <= kMaxXRefSize) |
+ ShrinkObjectMap(xrefsize); |
+ |
+ std::vector<FX_FILESIZE> CrossRefList; |
+ std::vector<FX_FILESIZE> XRefStreamList; |
+ std::set<FX_FILESIZE> seen_xrefpos; |
+ |
+ CrossRefList.push_back(xrefpos); |
+ XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); |
+ seen_xrefpos.insert(xrefpos); |
+ |
+ // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not |
+ // numerical, GetDirectInteger() returns 0. Loading will end. |
+ xrefpos = GetDirectInteger(m_pTrailer, "Prev"); |
+ while (xrefpos) { |
+ // Check for circular references. |
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) |
+ return FALSE; |
+ |
+ seen_xrefpos.insert(xrefpos); |
+ |
+ // SLOW ... |
+ CrossRefList.insert(CrossRefList.begin(), xrefpos); |
+ LoadCrossRefV4(xrefpos, 0, TRUE); |
+ |
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
+ LoadTrailerV4()); |
+ if (!pDict) |
+ return FALSE; |
+ |
+ xrefpos = GetDirectInteger(pDict.get(), "Prev"); |
+ |
+ // SLOW ... |
+ XRefStreamList.insert(XRefStreamList.begin(), |
+ pDict->GetIntegerBy("XRefStm")); |
+ m_Trailers.Add(pDict.release()); |
+ } |
+ |
+ for (size_t i = 0; i < CrossRefList.size(); ++i) { |
+ if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) |
+ return FALSE; |
+ } |
+ return TRUE; |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, |
+ FX_DWORD dwObjCount) { |
+ if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) |
+ return FALSE; |
+ |
+ m_pTrailer = LoadTrailerV4(); |
+ if (!m_pTrailer) |
+ return FALSE; |
+ |
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); |
+ if (xrefsize == 0) |
+ return FALSE; |
+ |
+ std::vector<FX_FILESIZE> CrossRefList; |
+ std::vector<FX_FILESIZE> XRefStreamList; |
+ std::set<FX_FILESIZE> seen_xrefpos; |
+ |
+ CrossRefList.push_back(xrefpos); |
+ XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); |
+ seen_xrefpos.insert(xrefpos); |
+ |
+ xrefpos = GetDirectInteger(m_pTrailer, "Prev"); |
+ while (xrefpos) { |
+ // Check for circular references. |
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) |
+ return FALSE; |
+ |
+ seen_xrefpos.insert(xrefpos); |
+ |
+ // SLOW ... |
+ CrossRefList.insert(CrossRefList.begin(), xrefpos); |
+ LoadCrossRefV4(xrefpos, 0, TRUE); |
+ |
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
+ LoadTrailerV4()); |
+ if (!pDict) |
+ return FALSE; |
+ |
+ xrefpos = GetDirectInteger(pDict.get(), "Prev"); |
+ |
+ // SLOW ... |
+ XRefStreamList.insert(XRefStreamList.begin(), |
+ pDict->GetIntegerBy("XRefStm")); |
+ m_Trailers.Add(pDict.release()); |
+ } |
+ |
+ for (size_t i = 1; i < CrossRefList.size(); ++i) { |
+ if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) |
+ return FALSE; |
+ } |
+ return TRUE; |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, |
+ FX_DWORD dwObjCount) { |
+ FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; |
+ |
+ m_pSyntax->RestorePos(dwStartPos); |
+ m_SortedOffset.insert(pos); |
+ |
+ FX_DWORD start_objnum = 0; |
+ FX_DWORD count = dwObjCount; |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ |
+ const int32_t recordsize = 20; |
+ std::vector<char> buf(1024 * recordsize + 1); |
+ buf[1024 * recordsize] = '\0'; |
+ |
+ int32_t nBlocks = count / 1024 + 1; |
+ for (int32_t block = 0; block < nBlocks; block++) { |
+ int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; |
+ FX_DWORD dwReadSize = block_size * recordsize; |
+ if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) |
+ return FALSE; |
+ |
+ if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), |
+ dwReadSize)) { |
+ return FALSE; |
+ } |
+ |
+ for (int32_t i = 0; i < block_size; i++) { |
+ FX_DWORD objnum = start_objnum + block * 1024 + i; |
+ char* pEntry = &buf[i * recordsize]; |
+ if (pEntry[17] == 'f') { |
+ m_ObjectInfo[objnum].pos = 0; |
+ m_ObjectInfo[objnum].type = 0; |
+ } else { |
+ int32_t offset = FXSYS_atoi(pEntry); |
+ if (offset == 0) { |
+ for (int32_t c = 0; c < 10; c++) { |
+ if (!std::isdigit(pEntry[c])) |
+ return FALSE; |
+ } |
+ } |
+ |
+ m_ObjectInfo[objnum].pos = offset; |
+ int32_t version = FXSYS_atoi(pEntry + 11); |
+ if (version >= 1) |
+ m_bVersionUpdated = TRUE; |
+ |
+ m_ObjectInfo[objnum].gennum = version; |
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) |
+ m_SortedOffset.insert(m_ObjectInfo[objnum].pos); |
+ |
+ m_ObjectInfo[objnum].type = 1; |
+ } |
+ } |
+ } |
+ m_pSyntax->RestorePos(SavedPos + count * recordsize); |
+ return TRUE; |
+} |
+ |
+bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, |
+ FX_FILESIZE streampos, |
+ FX_BOOL bSkip) { |
+ m_pSyntax->RestorePos(pos); |
+ if (m_pSyntax->GetKeyword() != "xref") |
+ return false; |
+ |
+ m_SortedOffset.insert(pos); |
+ if (streampos) |
+ m_SortedOffset.insert(streampos); |
+ |
+ while (1) { |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ bool bIsNumber; |
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (word.IsEmpty()) |
+ return false; |
+ |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ break; |
+ } |
+ |
+ FX_DWORD start_objnum = FXSYS_atoui(word); |
+ if (start_objnum >= kMaxObjectNumber) |
+ return false; |
+ |
+ FX_DWORD count = m_pSyntax->GetDirectNum(); |
+ m_pSyntax->ToNextWord(); |
+ SavedPos = m_pSyntax->SavePos(); |
+ const int32_t recordsize = 20; |
+ |
+ m_dwXrefStartObjNum = start_objnum; |
+ if (!bSkip) { |
+ std::vector<char> buf(1024 * recordsize + 1); |
+ buf[1024 * recordsize] = '\0'; |
+ |
+ int32_t nBlocks = count / 1024 + 1; |
+ for (int32_t block = 0; block < nBlocks; block++) { |
+ int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; |
+ m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), |
+ block_size * recordsize); |
+ |
+ for (int32_t i = 0; i < block_size; i++) { |
+ FX_DWORD objnum = start_objnum + block * 1024 + i; |
+ char* pEntry = &buf[i * recordsize]; |
+ if (pEntry[17] == 'f') { |
+ m_ObjectInfo[objnum].pos = 0; |
+ m_ObjectInfo[objnum].type = 0; |
+ } else { |
+ FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); |
+ if (offset == 0) { |
+ for (int32_t c = 0; c < 10; c++) { |
+ if (!std::isdigit(pEntry[c])) |
+ return false; |
+ } |
+ } |
+ |
+ m_ObjectInfo[objnum].pos = offset; |
+ int32_t version = FXSYS_atoi(pEntry + 11); |
+ if (version >= 1) |
+ m_bVersionUpdated = TRUE; |
+ |
+ m_ObjectInfo[objnum].gennum = version; |
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) |
+ m_SortedOffset.insert(m_ObjectInfo[objnum].pos); |
+ |
+ m_ObjectInfo[objnum].type = 1; |
+ } |
+ } |
+ } |
+ } |
+ m_pSyntax->RestorePos(SavedPos + count * recordsize); |
+ } |
+ return !streampos || LoadCrossRefV5(&streampos, FALSE); |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { |
+ if (!LoadCrossRefV5(&xrefpos, TRUE)) |
+ return FALSE; |
+ |
+ std::set<FX_FILESIZE> seen_xrefpos; |
+ while (xrefpos) { |
+ seen_xrefpos.insert(xrefpos); |
+ if (!LoadCrossRefV5(&xrefpos, FALSE)) |
+ return FALSE; |
+ |
+ // Check for circular references. |
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) |
+ return FALSE; |
+ } |
+ m_ObjectStreamMap.clear(); |
+ m_bXRefStream = TRUE; |
+ return TRUE; |
+} |
+ |
+FX_BOOL CPDF_Parser::RebuildCrossRef() { |
+ m_ObjectInfo.clear(); |
+ m_SortedOffset.clear(); |
+ if (m_pTrailer) { |
+ m_pTrailer->Release(); |
+ m_pTrailer = nullptr; |
+ } |
+ |
+ ParserState state = ParserState::kDefault; |
+ |
+ int32_t inside_index = 0; |
+ FX_DWORD objnum = 0; |
+ FX_DWORD gennum = 0; |
+ int32_t depth = 0; |
+ |
+ const FX_DWORD kBufferSize = 4096; |
+ std::vector<uint8_t> buffer(kBufferSize); |
+ |
+ FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; |
+ FX_FILESIZE start_pos = 0; |
+ FX_FILESIZE start_pos1 = 0; |
+ FX_FILESIZE last_obj = -1; |
+ FX_FILESIZE last_xref = -1; |
+ FX_FILESIZE last_trailer = -1; |
+ |
+ while (pos < m_pSyntax->m_FileLen) { |
+ const FX_FILESIZE saved_pos = pos; |
+ bool bOverFlow = false; |
+ FX_DWORD size = |
+ std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize); |
+ if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) |
+ break; |
+ |
+ for (FX_DWORD i = 0; i < size; i++) { |
+ uint8_t byte = buffer[i]; |
+ switch (state) { |
+ case ParserState::kDefault: |
+ if (PDFCharIsWhitespace(byte)) { |
+ state = ParserState::kWhitespace; |
+ } else if (std::isdigit(byte)) { |
+ --i; |
+ state = ParserState::kWhitespace; |
+ } else if (byte == '%') { |
+ inside_index = 0; |
+ state = ParserState::kComment; |
+ } else if (byte == '(') { |
+ state = ParserState::kString; |
+ depth = 1; |
+ } else if (byte == '<') { |
+ inside_index = 1; |
+ state = ParserState::kHexString; |
+ } else if (byte == '\\') { |
+ state = ParserState::kEscapedString; |
+ } else if (byte == 't') { |
+ state = ParserState::kTrailer; |
+ inside_index = 1; |
+ } |
+ break; |
+ |
+ case ParserState::kWhitespace: |
+ if (std::isdigit(byte)) { |
+ start_pos = pos + i; |
+ state = ParserState::kObjNum; |
+ objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); |
+ } else if (byte == 't') { |
+ state = ParserState::kTrailer; |
+ inside_index = 1; |
+ } else if (byte == 'x') { |
+ state = ParserState::kXref; |
+ inside_index = 1; |
+ } else if (!PDFCharIsWhitespace(byte)) { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kObjNum: |
+ if (std::isdigit(byte)) { |
+ objnum = |
+ objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); |
+ } else if (PDFCharIsWhitespace(byte)) { |
+ state = ParserState::kPostObjNum; |
+ } else { |
+ --i; |
+ state = ParserState::kEndObj; |
+ inside_index = 0; |
+ } |
+ break; |
+ |
+ case ParserState::kPostObjNum: |
+ if (std::isdigit(byte)) { |
+ start_pos1 = pos + i; |
+ state = ParserState::kGenNum; |
+ gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); |
+ } else if (byte == 't') { |
+ state = ParserState::kTrailer; |
+ inside_index = 1; |
+ } else if (!PDFCharIsWhitespace(byte)) { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kGenNum: |
+ if (std::isdigit(byte)) { |
+ gennum = |
+ gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); |
+ } else if (PDFCharIsWhitespace(byte)) { |
+ state = ParserState::kPostGenNum; |
+ } else { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kPostGenNum: |
+ if (byte == 'o') { |
+ state = ParserState::kBeginObj; |
+ inside_index = 1; |
+ } else if (std::isdigit(byte)) { |
+ objnum = gennum; |
+ gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); |
+ start_pos = start_pos1; |
+ start_pos1 = pos + i; |
+ state = ParserState::kGenNum; |
+ } else if (byte == 't') { |
+ state = ParserState::kTrailer; |
+ inside_index = 1; |
+ } else if (!PDFCharIsWhitespace(byte)) { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kBeginObj: |
+ switch (inside_index) { |
+ case 1: |
+ if (byte != 'b') { |
+ --i; |
+ state = ParserState::kDefault; |
+ } else { |
+ inside_index++; |
+ } |
+ break; |
+ case 2: |
+ if (byte != 'j') { |
+ --i; |
+ state = ParserState::kDefault; |
+ } else { |
+ inside_index++; |
+ } |
+ break; |
+ case 3: |
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
+ FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; |
+ m_SortedOffset.insert(obj_pos); |
+ last_obj = start_pos; |
+ FX_FILESIZE obj_end = 0; |
+ CPDF_Object* pObject = ParseIndirectObjectAtByStrict( |
+ m_pDocument, obj_pos, objnum, &obj_end); |
+ if (CPDF_Stream* pStream = ToStream(pObject)) { |
+ if (CPDF_Dictionary* pDict = pStream->GetDict()) { |
+ if ((pDict->KeyExist("Type")) && |
+ (pDict->GetStringBy("Type") == "XRef" && |
+ pDict->KeyExist("Size"))) { |
+ CPDF_Object* pRoot = pDict->GetElement("Root"); |
+ if (pRoot && pRoot->GetDict() && |
+ pRoot->GetDict()->GetElement("Pages")) { |
+ if (m_pTrailer) |
+ m_pTrailer->Release(); |
+ m_pTrailer = ToDictionary(pDict->Clone()); |
+ } |
+ } |
+ } |
+ } |
+ |
+ FX_FILESIZE offset = 0; |
+ m_pSyntax->RestorePos(obj_pos); |
+ offset = m_pSyntax->FindTag("obj", 0); |
+ if (offset == -1) |
+ offset = 0; |
+ else |
+ offset += 3; |
+ |
+ FX_FILESIZE nLen = obj_end - obj_pos - offset; |
+ if ((FX_DWORD)nLen > size - i) { |
+ pos = obj_end + m_pSyntax->m_HeaderOffset; |
+ bOverFlow = true; |
+ } else { |
+ i += (FX_DWORD)nLen; |
+ } |
+ |
+ if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && |
+ m_ObjectInfo[objnum].pos) { |
+ if (pObject) { |
+ FX_DWORD oldgen = GetObjectGenNum(objnum); |
+ m_ObjectInfo[objnum].pos = obj_pos; |
+ m_ObjectInfo[objnum].gennum = gennum; |
+ if (oldgen != gennum) |
+ m_bVersionUpdated = TRUE; |
+ } |
+ } else { |
+ m_ObjectInfo[objnum].pos = obj_pos; |
+ m_ObjectInfo[objnum].type = 1; |
+ m_ObjectInfo[objnum].gennum = gennum; |
+ } |
+ |
+ if (pObject) |
+ pObject->Release(); |
+ } |
+ --i; |
+ state = ParserState::kDefault; |
+ break; |
+ } |
+ break; |
+ |
+ case ParserState::kTrailer: |
+ if (inside_index == 7) { |
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { |
+ last_trailer = pos + i - 7; |
+ m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); |
+ |
+ CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); |
+ if (pObj) { |
+ if (!pObj->IsDictionary() && !pObj->AsStream()) { |
+ pObj->Release(); |
+ } else { |
+ CPDF_Stream* pStream = pObj->AsStream(); |
+ if (CPDF_Dictionary* pTrailer = |
+ pStream ? pStream->GetDict() : pObj->AsDictionary()) { |
+ if (m_pTrailer) { |
+ CPDF_Object* pRoot = pTrailer->GetElement("Root"); |
+ CPDF_Reference* pRef = ToReference(pRoot); |
+ if (!pRoot || |
+ (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && |
+ m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { |
+ auto it = pTrailer->begin(); |
+ while (it != pTrailer->end()) { |
+ const CFX_ByteString& key = it->first; |
+ CPDF_Object* pElement = it->second; |
+ ++it; |
+ FX_DWORD dwObjNum = |
+ pElement ? pElement->GetObjNum() : 0; |
+ if (dwObjNum) { |
+ m_pTrailer->SetAtReference(key, m_pDocument, |
+ dwObjNum); |
+ } else { |
+ m_pTrailer->SetAt(key, pElement->Clone()); |
+ } |
+ } |
+ } |
+ pObj->Release(); |
+ } else { |
+ if (pObj->IsStream()) { |
+ m_pTrailer = ToDictionary(pTrailer->Clone()); |
+ pObj->Release(); |
+ } else { |
+ m_pTrailer = pTrailer; |
+ } |
+ |
+ FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); |
+ CFX_ByteString strWord = m_pSyntax->GetKeyword(); |
+ if (!strWord.Compare("startxref")) { |
+ bool bNumber; |
+ CFX_ByteString bsOffset = |
+ m_pSyntax->GetNextWord(&bNumber); |
+ if (bNumber) |
+ m_LastXRefOffset = FXSYS_atoi(bsOffset); |
+ } |
+ m_pSyntax->RestorePos(dwSavePos); |
+ } |
+ } else { |
+ pObj->Release(); |
+ } |
+ } |
+ } |
+ } |
+ --i; |
+ state = ParserState::kDefault; |
+ } else if (byte == "trailer"[inside_index]) { |
+ inside_index++; |
+ } else { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kXref: |
+ if (inside_index == 4) { |
+ last_xref = pos + i - 4; |
+ state = ParserState::kWhitespace; |
+ } else if (byte == "xref"[inside_index]) { |
+ inside_index++; |
+ } else { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kComment: |
+ if (PDFCharIsLineEnding(byte)) |
+ state = ParserState::kDefault; |
+ break; |
+ |
+ case ParserState::kString: |
+ if (byte == ')') { |
+ if (depth > 0) |
+ depth--; |
+ } else if (byte == '(') { |
+ depth++; |
+ } |
+ |
+ if (!depth) |
+ state = ParserState::kDefault; |
+ break; |
+ |
+ case ParserState::kHexString: |
+ if (byte == '>' || (byte == '<' && inside_index == 1)) |
+ state = ParserState::kDefault; |
+ inside_index = 0; |
+ break; |
+ |
+ case ParserState::kEscapedString: |
+ if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { |
+ --i; |
+ state = ParserState::kDefault; |
+ } |
+ break; |
+ |
+ case ParserState::kEndObj: |
+ if (PDFCharIsWhitespace(byte)) { |
+ state = ParserState::kDefault; |
+ } else if (byte == '%' || byte == '(' || byte == '<' || |
+ byte == '\\') { |
+ state = ParserState::kDefault; |
+ --i; |
+ } else if (inside_index == 6) { |
+ state = ParserState::kDefault; |
+ --i; |
+ } else if (byte == "endobj"[inside_index]) { |
+ inside_index++; |
+ } |
+ break; |
+ } |
+ |
+ if (bOverFlow) { |
+ size = 0; |
+ break; |
+ } |
+ } |
+ pos += size; |
+ |
+ // If the position has not changed at all in a loop iteration, then break |
+ // out to prevent infinite looping. |
+ if (pos == saved_pos) |
+ break; |
+ } |
+ |
+ if (last_xref != -1 && last_xref > last_obj) |
+ last_trailer = last_xref; |
+ else if (last_trailer == -1 || last_xref < last_obj) |
+ last_trailer = m_pSyntax->m_FileLen; |
+ |
+ m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); |
+ return m_pTrailer && !m_ObjectInfo.empty(); |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { |
+ CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); |
+ if (!pObject) |
+ return FALSE; |
+ |
+ if (m_pDocument) { |
+ FX_BOOL bInserted = FALSE; |
+ CPDF_Dictionary* pDict = m_pDocument->GetRoot(); |
+ if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { |
+ bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); |
+ } else { |
+ if (pObject->IsStream()) |
+ pObject->Release(); |
+ } |
+ |
+ if (!bInserted) |
+ return FALSE; |
+ } |
+ |
+ CPDF_Stream* pStream = pObject->AsStream(); |
+ if (!pStream) |
+ return FALSE; |
+ |
+ *pos = pStream->GetDict()->GetIntegerBy("Prev"); |
+ int32_t size = pStream->GetDict()->GetIntegerBy("Size"); |
+ if (size < 0) { |
+ pStream->Release(); |
+ return FALSE; |
+ } |
+ |
+ if (bMainXRef) { |
+ m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); |
+ ShrinkObjectMap(size); |
+ for (auto& it : m_ObjectInfo) |
+ it.second.type = 0; |
+ } else { |
+ m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); |
+ } |
+ |
+ std::vector<std::pair<int32_t, int32_t>> arrIndex; |
+ CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index"); |
+ if (pArray) { |
+ FX_DWORD nPairSize = pArray->GetCount() / 2; |
+ for (FX_DWORD i = 0; i < nPairSize; i++) { |
+ CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); |
+ CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); |
+ |
+ if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { |
+ int nStartNum = pStartNumObj->GetInteger(); |
+ int nCount = pCountObj->GetInteger(); |
+ if (nStartNum >= 0 && nCount > 0) |
+ arrIndex.push_back(std::make_pair(nStartNum, nCount)); |
+ } |
+ } |
+ } |
+ |
+ if (arrIndex.size() == 0) |
+ arrIndex.push_back(std::make_pair(0, size)); |
+ |
+ pArray = pStream->GetDict()->GetArrayBy("W"); |
+ if (!pArray) { |
+ pStream->Release(); |
+ return FALSE; |
+ } |
+ |
+ CFX_DWordArray WidthArray; |
+ FX_SAFE_DWORD dwAccWidth = 0; |
+ for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { |
+ WidthArray.Add(pArray->GetIntegerAt(i)); |
+ dwAccWidth += WidthArray[i]; |
+ } |
+ |
+ if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { |
+ pStream->Release(); |
+ return FALSE; |
+ } |
+ |
+ FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); |
+ CPDF_StreamAcc acc; |
+ acc.LoadAllData(pStream); |
+ |
+ const uint8_t* pData = acc.GetData(); |
+ FX_DWORD dwTotalSize = acc.GetSize(); |
+ FX_DWORD segindex = 0; |
+ for (FX_DWORD i = 0; i < arrIndex.size(); i++) { |
+ int32_t startnum = arrIndex[i].first; |
+ if (startnum < 0) |
+ continue; |
+ |
+ m_dwXrefStartObjNum = |
+ pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); |
+ FX_DWORD count = |
+ pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); |
+ FX_SAFE_DWORD dwCaculatedSize = segindex; |
+ dwCaculatedSize += count; |
+ dwCaculatedSize *= totalWidth; |
+ if (!dwCaculatedSize.IsValid() || |
+ dwCaculatedSize.ValueOrDie() > dwTotalSize) { |
+ continue; |
+ } |
+ |
+ const uint8_t* segstart = pData + segindex * totalWidth; |
+ FX_SAFE_DWORD dwMaxObjNum = startnum; |
+ dwMaxObjNum += count; |
+ FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; |
+ if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) |
+ continue; |
+ |
+ for (FX_DWORD j = 0; j < count; j++) { |
+ int32_t type = 1; |
+ const uint8_t* entrystart = segstart + j * totalWidth; |
+ if (WidthArray[0]) |
+ type = GetVarInt(entrystart, WidthArray[0]); |
+ |
+ if (GetObjectType(startnum + j) == 255) { |
+ FX_FILESIZE offset = |
+ GetVarInt(entrystart + WidthArray[0], WidthArray[1]); |
+ m_ObjectInfo[startnum + j].pos = offset; |
+ m_SortedOffset.insert(offset); |
+ continue; |
+ } |
+ |
+ if (GetObjectType(startnum + j)) |
+ continue; |
+ |
+ m_ObjectInfo[startnum + j].type = type; |
+ if (type == 0) { |
+ m_ObjectInfo[startnum + j].pos = 0; |
+ } else { |
+ FX_FILESIZE offset = |
+ GetVarInt(entrystart + WidthArray[0], WidthArray[1]); |
+ m_ObjectInfo[startnum + j].pos = offset; |
+ if (type == 1) { |
+ m_SortedOffset.insert(offset); |
+ } else { |
+ if (offset < 0 || !IsValidObjectNumber(offset)) { |
+ pStream->Release(); |
+ return FALSE; |
+ } |
+ m_ObjectInfo[offset].type = 255; |
+ } |
+ } |
+ } |
+ segindex += count; |
+ } |
+ pStream->Release(); |
+ return TRUE; |
+} |
+ |
+CPDF_Array* CPDF_Parser::GetIDArray() { |
+ CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr; |
+ if (!pID) |
+ return nullptr; |
+ |
+ if (CPDF_Reference* pRef = pID->AsReference()) { |
+ pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); |
+ m_pTrailer->SetAt("ID", pID); |
+ } |
+ return ToArray(pID); |
+} |
+ |
+FX_DWORD CPDF_Parser::GetRootObjNum() { |
+ CPDF_Reference* pRef = |
+ ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); |
+ return pRef ? pRef->GetRefObjNum() : 0; |
+} |
+ |
+FX_DWORD CPDF_Parser::GetInfoObjNum() { |
+ CPDF_Reference* pRef = |
+ ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); |
+ return pRef ? pRef->GetRefObjNum() : 0; |
+} |
+ |
+FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { |
+ bForm = FALSE; |
+ if (!IsValidObjectNumber(objnum)) |
+ return TRUE; |
+ |
+ if (GetObjectType(objnum) == 0) |
+ return TRUE; |
+ |
+ if (GetObjectType(objnum) == 2) |
+ return TRUE; |
+ |
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos; |
+ auto it = m_SortedOffset.find(pos); |
+ if (it == m_SortedOffset.end()) |
+ return TRUE; |
+ |
+ if (++it == m_SortedOffset.end()) |
+ return FALSE; |
+ |
+ FX_FILESIZE size = *it - pos; |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ m_pSyntax->RestorePos(pos); |
+ |
+ const char kFormStream[] = "/Form\0stream"; |
+ const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); |
+ bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0; |
+ m_pSyntax->RestorePos(SavedPos); |
+ return TRUE; |
+} |
+ |
+CPDF_Object* CPDF_Parser::ParseIndirectObject( |
+ CPDF_IndirectObjectHolder* pObjList, |
+ FX_DWORD objnum) { |
+ if (!IsValidObjectNumber(objnum)) |
+ return nullptr; |
+ |
+ // Prevent circular parsing the same object. |
+ if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) |
+ return nullptr; |
+ ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); |
+ |
+ if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { |
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos; |
+ if (pos <= 0) |
+ return nullptr; |
+ return ParseIndirectObjectAt(pObjList, pos, objnum); |
+ } |
+ if (GetObjectType(objnum) != 2) |
+ return nullptr; |
+ |
+ CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); |
+ if (!pObjStream) |
+ return nullptr; |
+ |
+ ScopedFileStream file(FX_CreateMemoryStream( |
+ (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); |
+ CPDF_SyntaxParser syntax; |
+ syntax.InitParser(file.get(), 0); |
+ const int32_t offset = GetStreamFirst(pObjStream); |
+ |
+ // Read object numbers from |pObjStream| into a cache. |
+ if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { |
+ for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { |
+ FX_DWORD thisnum = syntax.GetDirectNum(); |
+ FX_DWORD thisoff = syntax.GetDirectNum(); |
+ m_ObjCache[pObjStream][thisnum] = thisoff; |
+ } |
+ } |
+ |
+ const auto it = m_ObjCache[pObjStream].find(objnum); |
+ if (it == m_ObjCache[pObjStream].end()) |
+ return nullptr; |
+ |
+ syntax.RestorePos(offset + it->second); |
+ return syntax.GetObject(pObjList, 0, 0, true); |
+} |
+ |
+CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { |
+ auto it = m_ObjectStreamMap.find(objnum); |
+ if (it != m_ObjectStreamMap.end()) |
+ return it->second.get(); |
+ |
+ if (!m_pDocument) |
+ return nullptr; |
+ |
+ const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum)); |
+ if (!pStream) |
+ return nullptr; |
+ |
+ CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; |
+ pStreamAcc->LoadAllData(pStream); |
+ m_ObjectStreamMap[objnum].reset(pStreamAcc); |
+ return pStreamAcc; |
+} |
+ |
+FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { |
+ if (!IsValidObjectNumber(objnum)) |
+ return 0; |
+ |
+ if (GetObjectType(objnum) == 2) |
+ objnum = GetObjectPositionOrZero(objnum); |
+ |
+ if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) |
+ return 0; |
+ |
+ FX_FILESIZE offset = GetObjectPositionOrZero(objnum); |
+ if (offset == 0) |
+ return 0; |
+ |
+ auto it = m_SortedOffset.find(offset); |
+ if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) |
+ return 0; |
+ |
+ return *it - offset; |
+} |
+ |
+void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, |
+ uint8_t*& pBuffer, |
+ FX_DWORD& size) { |
+ pBuffer = nullptr; |
+ size = 0; |
+ if (!IsValidObjectNumber(objnum)) |
+ return; |
+ |
+ if (GetObjectType(objnum) == 2) { |
+ CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); |
+ if (!pObjStream) |
+ return; |
+ |
+ int32_t offset = GetStreamFirst(pObjStream); |
+ const uint8_t* pData = pObjStream->GetData(); |
+ FX_DWORD totalsize = pObjStream->GetSize(); |
+ ScopedFileStream file( |
+ FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); |
+ |
+ CPDF_SyntaxParser syntax; |
+ syntax.InitParser(file.get(), 0); |
+ for (int i = GetStreamNCount(pObjStream); i > 0; --i) { |
+ FX_DWORD thisnum = syntax.GetDirectNum(); |
+ FX_DWORD thisoff = syntax.GetDirectNum(); |
+ if (thisnum != objnum) |
+ continue; |
+ |
+ if (i == 1) { |
+ size = totalsize - (thisoff + offset); |
+ } else { |
+ syntax.GetDirectNum(); // Skip nextnum. |
+ FX_DWORD nextoff = syntax.GetDirectNum(); |
+ size = nextoff - thisoff; |
+ } |
+ |
+ pBuffer = FX_Alloc(uint8_t, size); |
+ FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); |
+ return; |
+ } |
+ return; |
+ } |
+ |
+ if (GetObjectType(objnum) != 1) |
+ return; |
+ |
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos; |
+ if (pos == 0) |
+ return; |
+ |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ m_pSyntax->RestorePos(pos); |
+ |
+ bool bIsNumber; |
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return; |
+ } |
+ |
+ FX_DWORD parser_objnum = FXSYS_atoui(word); |
+ if (parser_objnum && parser_objnum != objnum) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return; |
+ } |
+ |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return; |
+ } |
+ |
+ if (m_pSyntax->GetKeyword() != "obj") { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return; |
+ } |
+ |
+ auto it = m_SortedOffset.find(pos); |
+ if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return; |
+ } |
+ |
+ FX_FILESIZE nextoff = *it; |
+ FX_BOOL bNextOffValid = FALSE; |
+ if (nextoff != pos) { |
+ m_pSyntax->RestorePos(nextoff); |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (word == "xref") { |
+ bNextOffValid = TRUE; |
+ } else if (bIsNumber) { |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { |
+ bNextOffValid = TRUE; |
+ } |
+ } |
+ } |
+ |
+ if (!bNextOffValid) { |
+ m_pSyntax->RestorePos(pos); |
+ while (1) { |
+ if (m_pSyntax->GetKeyword() == "endobj") |
+ break; |
+ |
+ if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) |
+ break; |
+ } |
+ nextoff = m_pSyntax->SavePos(); |
+ } |
+ |
+ size = (FX_DWORD)(nextoff - pos); |
+ pBuffer = FX_Alloc(uint8_t, size); |
+ m_pSyntax->RestorePos(pos); |
+ m_pSyntax->ReadBlock(pBuffer, size); |
+ m_pSyntax->RestorePos(SavedPos); |
+} |
+ |
+CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( |
+ CPDF_IndirectObjectHolder* pObjList, |
+ FX_FILESIZE pos, |
+ FX_DWORD objnum) { |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ m_pSyntax->RestorePos(pos); |
+ bool bIsNumber; |
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ FX_FILESIZE objOffset = m_pSyntax->SavePos(); |
+ objOffset -= word.GetLength(); |
+ FX_DWORD parser_objnum = FXSYS_atoui(word); |
+ if (objnum && parser_objnum != objnum) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ FX_DWORD parser_gennum = FXSYS_atoui(word); |
+ if (m_pSyntax->GetKeyword() != "obj") { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ CPDF_Object* pObj = |
+ m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); |
+ m_pSyntax->SavePos(); |
+ |
+ CFX_ByteString bsWord = m_pSyntax->GetKeyword(); |
+ if (bsWord == "endobj") |
+ m_pSyntax->SavePos(); |
+ |
+ m_pSyntax->RestorePos(SavedPos); |
+ if (pObj) { |
+ if (!objnum) |
+ pObj->m_ObjNum = parser_objnum; |
+ pObj->m_GenNum = parser_gennum; |
+ } |
+ return pObj; |
+} |
+ |
+CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( |
+ CPDF_IndirectObjectHolder* pObjList, |
+ FX_FILESIZE pos, |
+ FX_DWORD objnum, |
+ FX_FILESIZE* pResultPos) { |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ m_pSyntax->RestorePos(pos); |
+ |
+ bool bIsNumber; |
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ FX_DWORD parser_objnum = FXSYS_atoui(word); |
+ if (objnum && parser_objnum != objnum) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ FX_DWORD gennum = FXSYS_atoui(word); |
+ if (m_pSyntax->GetKeyword() != "obj") { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return nullptr; |
+ } |
+ |
+ CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum); |
+ if (pResultPos) |
+ *pResultPos = m_pSyntax->m_Pos; |
+ |
+ m_pSyntax->RestorePos(SavedPos); |
+ return pObj; |
+} |
+ |
+CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { |
+ if (m_pSyntax->GetKeyword() != "trailer") |
+ return nullptr; |
+ |
+ std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( |
+ m_pSyntax->GetObject(m_pDocument, 0, 0, true)); |
+ if (!ToDictionary(pObj.get())) |
+ return nullptr; |
+ return pObj.release()->AsDictionary(); |
+} |
+ |
+FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { |
+ if (!m_pSecurityHandler) |
+ return (FX_DWORD)-1; |
+ |
+ FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); |
+ if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { |
+ dwPermission &= 0xFFFFFFFC; |
+ dwPermission |= 0xFFFFF0C0; |
+ if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) |
+ dwPermission &= 0xFFFFF0FF; |
+ } |
+ return dwPermission; |
+} |
+ |
+FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, |
+ FX_DWORD offset) { |
+ m_pSyntax->InitParser(pFileAccess, offset); |
+ m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); |
+ |
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos(); |
+ bool bIsNumber; |
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) |
+ return FALSE; |
+ |
+ FX_DWORD objnum = FXSYS_atoui(word); |
+ word = m_pSyntax->GetNextWord(&bIsNumber); |
+ if (!bIsNumber) |
+ return FALSE; |
+ |
+ FX_DWORD gennum = FXSYS_atoui(word); |
+ if (m_pSyntax->GetKeyword() != "obj") { |
+ m_pSyntax->RestorePos(SavedPos); |
+ return FALSE; |
+ } |
+ |
+ m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); |
+ if (!m_pLinearized) |
+ return FALSE; |
+ |
+ CPDF_Dictionary* pDict = m_pLinearized->GetDict(); |
+ if (pDict && pDict->GetElement("Linearized")) { |
+ m_pSyntax->GetNextWord(nullptr); |
+ |
+ CPDF_Object* pLen = pDict->GetElement("L"); |
+ if (!pLen) { |
+ m_pLinearized->Release(); |
+ m_pLinearized = nullptr; |
+ return FALSE; |
+ } |
+ |
+ if (pLen->GetInteger() != (int)pFileAccess->GetSize()) |
+ return FALSE; |
+ |
+ if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) |
+ m_dwFirstPageNo = pNo->GetInteger(); |
+ |
+ if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) |
+ m_LastXRefOffset = pTable->GetInteger(); |
+ |
+ return TRUE; |
+ } |
+ m_pLinearized->Release(); |
+ m_pLinearized = nullptr; |
+ return FALSE; |
+} |
+ |
+CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) { |
+ CloseParser(); |
+ m_bXRefStream = FALSE; |
+ m_LastXRefOffset = 0; |
+ m_bOwnFileRead = true; |
+ |
+ int32_t offset = GetHeaderOffset(pFileAccess); |
+ if (offset == -1) |
+ return FORMAT_ERROR; |
+ |
+ if (!IsLinearizedFile(pFileAccess, offset)) { |
+ m_pSyntax->m_pFileAccess = nullptr; |
+ return StartParse(pFileAccess); |
+ } |
+ |
+ m_pDocument = new CPDF_Document(this); |
+ FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); |
+ |
+ FX_BOOL bXRefRebuilt = FALSE; |
+ FX_BOOL bLoadV4 = FALSE; |
+ if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && |
+ !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { |
+ if (!RebuildCrossRef()) |
+ return FORMAT_ERROR; |
+ |
+ bXRefRebuilt = TRUE; |
+ m_LastXRefOffset = 0; |
+ } |
+ |
+ if (bLoadV4) { |
+ m_pTrailer = LoadTrailerV4(); |
+ if (!m_pTrailer) |
+ return SUCCESS; |
+ |
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); |
+ if (xrefsize > 0) |
+ ShrinkObjectMap(xrefsize); |
+ } |
+ |
+ Error eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ |
+ m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); |
+ if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { |
+ if (bXRefRebuilt) |
+ return FORMAT_ERROR; |
+ |
+ ReleaseEncryptHandler(); |
+ if (!RebuildCrossRef()) |
+ return FORMAT_ERROR; |
+ |
+ eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ |
+ m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); |
+ if (!m_pDocument->GetRoot()) |
+ return FORMAT_ERROR; |
+ } |
+ |
+ if (GetRootObjNum() == 0) { |
+ ReleaseEncryptHandler(); |
+ if (!RebuildCrossRef() || GetRootObjNum() == 0) |
+ return FORMAT_ERROR; |
+ |
+ eRet = SetEncryptHandler(); |
+ if (eRet != SUCCESS) |
+ return eRet; |
+ } |
+ |
+ if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { |
+ if (CPDF_Reference* pMetadata = |
+ ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) |
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); |
+ } |
+ return SUCCESS; |
+} |
+ |
+FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { |
+ if (!LoadCrossRefV5(&xrefpos, FALSE)) |
+ return FALSE; |
+ |
+ std::set<FX_FILESIZE> seen_xrefpos; |
+ while (xrefpos) { |
+ seen_xrefpos.insert(xrefpos); |
+ if (!LoadCrossRefV5(&xrefpos, FALSE)) |
+ return FALSE; |
+ |
+ // Check for circular references. |
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) |
+ return FALSE; |
+ } |
+ m_ObjectStreamMap.clear(); |
+ m_bXRefStream = TRUE; |
+ return TRUE; |
+} |
+ |
+CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { |
+ FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; |
+ m_pSyntax->m_MetadataObjnum = 0; |
+ if (m_pTrailer) { |
+ m_pTrailer->Release(); |
+ m_pTrailer = nullptr; |
+ } |
+ |
+ m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); |
+ uint8_t ch = 0; |
+ FX_DWORD dwCount = 0; |
+ m_pSyntax->GetNextChar(ch); |
+ while (PDFCharIsWhitespace(ch)) { |
+ ++dwCount; |
+ if (m_pSyntax->m_FileLen >= |
+ (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { |
+ break; |
+ } |
+ m_pSyntax->GetNextChar(ch); |
+ } |
+ m_LastXRefOffset += dwCount; |
+ m_ObjectStreamMap.clear(); |
+ m_ObjCache.clear(); |
+ |
+ if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && |
+ !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { |
+ m_LastXRefOffset = 0; |
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; |
+ return FORMAT_ERROR; |
+ } |
+ |
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; |
+ return SUCCESS; |
+} |