core/fpdfapi/parser/cpdf_document.cpp - Issue 2470803003: Traverse PDF page tree only once in CPDF_Document Try 3

Unified Diff: core/fpdfapi/parser/cpdf_document.cpp

Issue 2470803003: Traverse PDF page tree only once in CPDF_Document Try 3 (Closed)

Patch Set: Rebase and test Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: core/fpdfapi/parser/cpdf_document.cpp

diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp

index c5f64a790ca1d31f6cb4357f5a01b18d6b0585e1..da28277167a9b10b23d4fae5b4b3ddb0f0009837 100644

--- a/core/fpdfapi/parser/cpdf_document.cpp

+++ b/core/fpdfapi/parser/cpdf_document.cpp

@@ -240,83 +240,6 @@ void InsertWidthArray1(CFX_Font* pFont,

InsertWidthArrayImpl(widths, size, pWidthArray);

}

-int InsertDeletePDFPage(CPDF_Document* pDoc,

- CPDF_Dictionary* pPages,

- int nPagesToGo,

- CPDF_Dictionary* pPage,

- FX_BOOL bInsert,

- std::set<CPDF_Dictionary*>* pVisited) {

- CPDF_Array* pKidList = pPages->GetArrayFor("Kids");

- if (!pKidList)

- return -1;

- for (size_t i = 0; i < pKidList->GetCount(); i++) {

- CPDF_Dictionary* pKid = pKidList->GetDictAt(i);

- if (pKid->GetStringFor("Type") == "Page") {

- if (nPagesToGo == 0) {

- if (bInsert) {

- pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum()));

- pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());

- } else {

- pKidList->RemoveAt(i);

- }

- pPages->SetIntegerFor(

- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));

- return 1;

- }

- nPagesToGo--;

- } else {

- int nPages = pKid->GetIntegerFor("Count");

- if (nPagesToGo < nPages) {

- if (pdfium::ContainsKey(*pVisited, pKid))

- return -1;

- pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);

- if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert,

- pVisited) < 0) {

- return -1;

- }

- pPages->SetIntegerFor(

- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));

- return 1;

- }

- nPagesToGo -= nPages;

- }

- return 0;

-int InsertNewPage(CPDF_Document* pDoc,

- int iPage,

- CPDF_Dictionary* pPageDict,

- CFX_ArrayTemplate<uint32_t>& pageList) {

- CPDF_Dictionary* pRoot = pDoc->GetRoot();

- CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;

- if (!pPages)

- return -1;

- int nPages = pDoc->GetPageCount();

- if (iPage < 0 || iPage > nPages)

- return -1;

- if (iPage == nPages) {

- CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");

- if (!pPagesList) {

- pPagesList = new CPDF_Array;

- pPages->SetFor("Kids", pPagesList);

- }

- pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum()));

- pPages->SetIntegerFor("Count", nPages + 1);

- pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());

- } else {

- std::set<CPDF_Dictionary*> stack = {pPages};

- if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0)

- return -1;

- }

- pageList.InsertAt(iPage, pPageDict->GetObjNum());

- return iPage;

int CountPages(CPDF_Dictionary* pPages,

std::set<CPDF_Dictionary*>* visited_pages) {

int count = pPages->GetIntegerFor("Count");

@@ -413,6 +336,7 @@ CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser)

m_pParser(std::move(pParser)),

m_pRootDict(nullptr),

m_pInfoDict(nullptr),

+ m_iLastPageTraversed(-1),

Tom Sepez 2016/11/02 17:37:37 nit: without actually looking, my hunch is that it

npm 2016/11/03 00:18:25 If that makes the code clearer, I can do that. It

m_bLinearized(false),

m_iFirstPageNo(0),

m_dwFirstPageObjNum(0),

@@ -477,40 +401,67 @@ void CPDF_Document::LoadPages() {

m_PageList.SetSize(RetrievePageCount());

}

-CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages,

- int iPage,

- int nPagesToGo,

- int level) {

+// When this is called, m_pTreeTraversal[level] exists

+CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage,

+ int& nPagesToGo,

Tom Sepez 2016/11/02 17:37:37 I can't help but think this might be easier if we

npm 2016/11/03 00:18:25 If I understand correctly what you mean by current

+ int level) {

+ CPDF_Dictionary* pPages = m_pTreeTraversal[level].first;

CPDF_Array* pKidList = pPages->GetArrayFor("Kids");

- if (!pKidList)

- return nPagesToGo == 0 ? pPages : nullptr;

+ if (!pKidList) {

+ if (nPagesToGo != 1)

+ return nullptr;

+ m_PageList.SetAt(iPage, pPages->GetObjNum());

+ return pPages;

+ }

- if (level >= FX_MAX_PAGE_LEVEL)

+ if (level >= FX_MAX_PAGE_LEVEL) {

+ m_pTreeTraversal.pop_back();

return nullptr;

+ }

- for (size_t i = 0; i < pKidList->GetCount(); i++) {

+ CPDF_Dictionary* page = nullptr;

+ for (size_t i = m_pTreeTraversal[level].second; i < pKidList->GetCount();

+ i++) {

CPDF_Dictionary* pKid = pKidList->GetDictAt(i);

if (!pKid) {

nPagesToGo--;

Tom Sepez 2016/11/02 17:37:37 I worry about this going negative here ...

npm 2016/11/03 00:18:25 Yes, this can go negative. This wouldn't be a prob

+ m_pTreeTraversal[level].second++;

continue;

}

- if (pKid == pPages)

+ if (pKid == pPages) {

+ m_pTreeTraversal[level].second++;

continue;

+ }

if (!pKid->KeyExist("Kids")) {

- if (nPagesToGo == 0)

- return pKid;

- m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());

+ m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum());

nPagesToGo--;

+ m_pTreeTraversal[level].second++;

+ if (nPagesToGo == 0) {

+ page = pKid;

+ break;

+ }

} else {

- int nPages = pKid->GetIntegerFor("Count");

- if (nPagesToGo < nPages)

- return FindPDFPage(pKid, iPage, nPagesToGo, level + 1);

- nPagesToGo -= nPages;

+ // If the vector has size level+1, the child is not in yet

+ if (static_cast<int>(m_pTreeTraversal.size()) == level + 1)

+ m_pTreeTraversal.push_back(std::make_pair(pKid, 0));

+ // Now m_pTreeTraversal[level+1] should exist and be equal to pKid.

+ CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo, level + 1);

+ // Check if child was completely processed, i.e. it popped itself out

+ if (static_cast<int>(m_pTreeTraversal.size()) == level + 1)

+ m_pTreeTraversal[level].second++;

+ // If child did not finish or if no pages to go, we are done

+ if (static_cast<int>(m_pTreeTraversal.size()) != level + 1 ||

+ nPagesToGo <= 0) {

+ page = pageKid;

+ break;

+ }

}

- return nullptr;

+ if (m_pTreeTraversal[level].second ==

+ static_cast<int>(pKidList->GetCount())) {

+ m_pTreeTraversal.pop_back();

+ }

+ return page;

}

CPDF_Dictionary* CPDF_Document::GetPagesDict() const {

@@ -537,17 +488,18 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) {

if (objnum) {

if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))

return pDict;

+ return nullptr;

}

CPDF_Dictionary* pPages = GetPagesDict();

if (!pPages)

return nullptr;

- CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0);

- if (!pPage)

- return nullptr;

- m_PageList.SetAt(iPage, pPage->GetObjNum());

+ if (m_pTreeTraversal.empty())

+ m_pTreeTraversal.push_back(std::make_pair(pPages, 0));

+ int nPagesToGo = iPage - m_iLastPageTraversed;

+ CPDF_Dictionary* pPage = TraversePDFPages(iPage, nPagesToGo, 0);

+ m_iLastPageTraversed = iPage;

return pPage;

}

@@ -710,13 +662,94 @@ CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) {

CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool);

pDict->SetNameFor("Type", "Page");

uint32_t dwObjNum = AddIndirectObject(pDict);

- if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) {

+ if (InsertNewPage(iPage, pDict, m_PageList) < 0) {

ReleaseIndirectObject(dwObjNum);

return nullptr;

}

return pDict;

}

+int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,

Tom Sepez 2016/11/02 17:37:37 can we do this refactoring first as a separate CL?

npm 2016/11/03 00:18:25 Sounds good, will do.

+ int nPagesToGo,

+ CPDF_Dictionary* pPage,

+ FX_BOOL bInsert,

+ std::set<CPDF_Dictionary*>* pVisited) {

+ CPDF_Array* pKidList = pPages->GetArrayFor("Kids");

+ if (!pKidList)

+ return -1;

+ for (size_t i = 0; i < pKidList->GetCount(); i++) {

+ CPDF_Dictionary* pKid = pKidList->GetDictAt(i);

+ if (pKid->GetStringFor("Type") == "Page") {

+ if (nPagesToGo == 0) {

+ if (bInsert) {

+ pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum()));

+ pPage->SetReferenceFor("Parent", this, pPages->GetObjNum());

+ } else {

+ pKidList->RemoveAt(i);

+ }

+ pPages->SetIntegerFor(

+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));

+ // Tree will change, so reset tree transversal variables

+ m_iLastPageTraversed = -1;

+ m_pTreeTraversal.clear();

+ return 1;

+ }

+ nPagesToGo--;

+ } else {

+ int nPages = pKid->GetIntegerFor("Count");

+ if (nPagesToGo < nPages) {

+ if (pdfium::ContainsKey(*pVisited, pKid))

+ return -1;

+ pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);

+ if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) <

+ 0) {

+ return -1;

+ }

+ pPages->SetIntegerFor(

+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));

+ return 1;

+ }

+ nPagesToGo -= nPages;

+ }

+ return 0;

+int CPDF_Document::InsertNewPage(int iPage,

+ CPDF_Dictionary* pPageDict,

+ CFX_ArrayTemplate<uint32_t>& pageList) {

+ CPDF_Dictionary* pRoot = GetRoot();

+ CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;

+ if (!pPages)

+ return -1;

+ int nPages = GetPageCount();

+ if (iPage < 0 || iPage > nPages)

+ return -1;

+ if (iPage == nPages) {

+ CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");

+ if (!pPagesList) {

+ pPagesList = new CPDF_Array;

+ pPages->SetFor("Kids", pPagesList);

+ }

+ pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum()));

+ pPages->SetIntegerFor("Count", nPages + 1);

+ pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum());

+ // Reset tree transversal variables

+ m_iLastPageTraversed = -1;

+ m_pTreeTraversal.clear();

+ } else {

+ std::set<CPDF_Dictionary*> stack = {pPages};

+ if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0)

+ return -1;

+ }

+ pageList.InsertAt(iPage, pPageDict->GetObjNum());

+ return iPage;

void CPDF_Document::DeletePage(int iPage) {

CPDF_Dictionary* pPages = GetPagesDict();

if (!pPages)

@@ -727,7 +760,7 @@ void CPDF_Document::DeletePage(int iPage) {

return;

std::set<CPDF_Dictionary*> stack = {pPages};

- if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0)

+ if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0)

return;

m_PageList.RemoveAt(iPage);

« no previous file with comments | « core/fpdfapi/parser/cpdf_document.h ('k') | core/fpdfapi/parser/cpdf_document_unittest.cpp » ('j') | no next file with comments »