Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10346)

Unified Diff: core/fpdfapi/parser/cpdf_document.cpp

Issue 2470803003: Traverse PDF page tree only once in CPDF_Document Try 3 (Closed)
Patch Set: Rebase and test Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « core/fpdfapi/parser/cpdf_document.h ('k') | core/fpdfapi/parser/cpdf_document_unittest.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: core/fpdfapi/parser/cpdf_document.cpp
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index c5f64a790ca1d31f6cb4357f5a01b18d6b0585e1..da28277167a9b10b23d4fae5b4b3ddb0f0009837 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -240,83 +240,6 @@ void InsertWidthArray1(CFX_Font* pFont,
InsertWidthArrayImpl(widths, size, pWidthArray);
}
-int InsertDeletePDFPage(CPDF_Document* pDoc,
- CPDF_Dictionary* pPages,
- int nPagesToGo,
- CPDF_Dictionary* pPage,
- FX_BOOL bInsert,
- std::set<CPDF_Dictionary*>* pVisited) {
- CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
- if (!pKidList)
- return -1;
-
- for (size_t i = 0; i < pKidList->GetCount(); i++) {
- CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
- if (pKid->GetStringFor("Type") == "Page") {
- if (nPagesToGo == 0) {
- if (bInsert) {
- pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum()));
- pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
- } else {
- pKidList->RemoveAt(i);
- }
- pPages->SetIntegerFor(
- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
- return 1;
- }
- nPagesToGo--;
- } else {
- int nPages = pKid->GetIntegerFor("Count");
- if (nPagesToGo < nPages) {
- if (pdfium::ContainsKey(*pVisited, pKid))
- return -1;
-
- pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
- if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert,
- pVisited) < 0) {
- return -1;
- }
- pPages->SetIntegerFor(
- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
- return 1;
- }
- nPagesToGo -= nPages;
- }
- }
- return 0;
-}
-
-int InsertNewPage(CPDF_Document* pDoc,
- int iPage,
- CPDF_Dictionary* pPageDict,
- CFX_ArrayTemplate<uint32_t>& pageList) {
- CPDF_Dictionary* pRoot = pDoc->GetRoot();
- CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
- if (!pPages)
- return -1;
-
- int nPages = pDoc->GetPageCount();
- if (iPage < 0 || iPage > nPages)
- return -1;
-
- if (iPage == nPages) {
- CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
- if (!pPagesList) {
- pPagesList = new CPDF_Array;
- pPages->SetFor("Kids", pPagesList);
- }
- pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum()));
- pPages->SetIntegerFor("Count", nPages + 1);
- pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
- } else {
- std::set<CPDF_Dictionary*> stack = {pPages};
- if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0)
- return -1;
- }
- pageList.InsertAt(iPage, pPageDict->GetObjNum());
- return iPage;
-}
-
int CountPages(CPDF_Dictionary* pPages,
std::set<CPDF_Dictionary*>* visited_pages) {
int count = pPages->GetIntegerFor("Count");
@@ -413,6 +336,7 @@ CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser)
m_pParser(std::move(pParser)),
m_pRootDict(nullptr),
m_pInfoDict(nullptr),
+ m_iLastPageTraversed(-1),
Tom Sepez 2016/11/02 17:37:37 nit: without actually looking, my hunch is that it
npm 2016/11/03 00:18:25 If that makes the code clearer, I can do that. It
m_bLinearized(false),
m_iFirstPageNo(0),
m_dwFirstPageObjNum(0),
@@ -477,40 +401,67 @@ void CPDF_Document::LoadPages() {
m_PageList.SetSize(RetrievePageCount());
}
-CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages,
- int iPage,
- int nPagesToGo,
- int level) {
+// When this is called, m_pTreeTraversal[level] exists
+CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage,
+ int& nPagesToGo,
Tom Sepez 2016/11/02 17:37:37 I can't help but think this might be easier if we
npm 2016/11/03 00:18:25 If I understand correctly what you mean by current
+ int level) {
+ CPDF_Dictionary* pPages = m_pTreeTraversal[level].first;
CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
- if (!pKidList)
- return nPagesToGo == 0 ? pPages : nullptr;
+ if (!pKidList) {
+ if (nPagesToGo != 1)
+ return nullptr;
+ m_PageList.SetAt(iPage, pPages->GetObjNum());
+ return pPages;
+ }
- if (level >= FX_MAX_PAGE_LEVEL)
+ if (level >= FX_MAX_PAGE_LEVEL) {
+ m_pTreeTraversal.pop_back();
return nullptr;
+ }
- for (size_t i = 0; i < pKidList->GetCount(); i++) {
+ CPDF_Dictionary* page = nullptr;
+ for (size_t i = m_pTreeTraversal[level].second; i < pKidList->GetCount();
+ i++) {
CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
if (!pKid) {
nPagesToGo--;
Tom Sepez 2016/11/02 17:37:37 I worry about this going negative here ...
npm 2016/11/03 00:18:25 Yes, this can go negative. This wouldn't be a prob
+ m_pTreeTraversal[level].second++;
continue;
}
- if (pKid == pPages)
+ if (pKid == pPages) {
+ m_pTreeTraversal[level].second++;
continue;
+ }
if (!pKid->KeyExist("Kids")) {
- if (nPagesToGo == 0)
- return pKid;
-
- m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());
+ m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum());
nPagesToGo--;
+ m_pTreeTraversal[level].second++;
+ if (nPagesToGo == 0) {
+ page = pKid;
+ break;
+ }
} else {
- int nPages = pKid->GetIntegerFor("Count");
- if (nPagesToGo < nPages)
- return FindPDFPage(pKid, iPage, nPagesToGo, level + 1);
-
- nPagesToGo -= nPages;
+ // If the vector has size level+1, the child is not in yet
+ if (static_cast<int>(m_pTreeTraversal.size()) == level + 1)
+ m_pTreeTraversal.push_back(std::make_pair(pKid, 0));
+ // Now m_pTreeTraversal[level+1] should exist and be equal to pKid.
+ CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo, level + 1);
+ // Check if child was completely processed, i.e. it popped itself out
+ if (static_cast<int>(m_pTreeTraversal.size()) == level + 1)
+ m_pTreeTraversal[level].second++;
+ // If child did not finish or if no pages to go, we are done
+ if (static_cast<int>(m_pTreeTraversal.size()) != level + 1 ||
+ nPagesToGo <= 0) {
+ page = pageKid;
+ break;
+ }
}
}
- return nullptr;
+ if (m_pTreeTraversal[level].second ==
+ static_cast<int>(pKidList->GetCount())) {
+ m_pTreeTraversal.pop_back();
+ }
+ return page;
}
CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
@@ -537,17 +488,18 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) {
if (objnum) {
if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
return pDict;
+ return nullptr;
}
CPDF_Dictionary* pPages = GetPagesDict();
if (!pPages)
return nullptr;
- CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0);
- if (!pPage)
- return nullptr;
-
- m_PageList.SetAt(iPage, pPage->GetObjNum());
+ if (m_pTreeTraversal.empty())
+ m_pTreeTraversal.push_back(std::make_pair(pPages, 0));
+ int nPagesToGo = iPage - m_iLastPageTraversed;
+ CPDF_Dictionary* pPage = TraversePDFPages(iPage, nPagesToGo, 0);
+ m_iLastPageTraversed = iPage;
return pPage;
}
@@ -710,13 +662,94 @@ CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) {
CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool);
pDict->SetNameFor("Type", "Page");
uint32_t dwObjNum = AddIndirectObject(pDict);
- if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) {
+ if (InsertNewPage(iPage, pDict, m_PageList) < 0) {
ReleaseIndirectObject(dwObjNum);
return nullptr;
}
return pDict;
}
+int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,
Tom Sepez 2016/11/02 17:37:37 can we do this refactoring first as a separate CL?
npm 2016/11/03 00:18:25 Sounds good, will do.
+ int nPagesToGo,
+ CPDF_Dictionary* pPage,
+ FX_BOOL bInsert,
+ std::set<CPDF_Dictionary*>* pVisited) {
+ CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
+ if (!pKidList)
+ return -1;
+
+ for (size_t i = 0; i < pKidList->GetCount(); i++) {
+ CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
+ if (pKid->GetStringFor("Type") == "Page") {
+ if (nPagesToGo == 0) {
+ if (bInsert) {
+ pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum()));
+ pPage->SetReferenceFor("Parent", this, pPages->GetObjNum());
+ } else {
+ pKidList->RemoveAt(i);
+ }
+ pPages->SetIntegerFor(
+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+ // Tree will change, so reset tree transversal variables
+ m_iLastPageTraversed = -1;
+ m_pTreeTraversal.clear();
+ return 1;
+ }
+ nPagesToGo--;
+ } else {
+ int nPages = pKid->GetIntegerFor("Count");
+ if (nPagesToGo < nPages) {
+ if (pdfium::ContainsKey(*pVisited, pKid))
+ return -1;
+
+ pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
+ if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) <
+ 0) {
+ return -1;
+ }
+ pPages->SetIntegerFor(
+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+ return 1;
+ }
+ nPagesToGo -= nPages;
+ }
+ }
+ return 0;
+}
+
+int CPDF_Document::InsertNewPage(int iPage,
+ CPDF_Dictionary* pPageDict,
+ CFX_ArrayTemplate<uint32_t>& pageList) {
+ CPDF_Dictionary* pRoot = GetRoot();
+ CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
+ if (!pPages)
+ return -1;
+
+ int nPages = GetPageCount();
+ if (iPage < 0 || iPage > nPages)
+ return -1;
+
+ if (iPage == nPages) {
+ CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
+ if (!pPagesList) {
+ pPagesList = new CPDF_Array;
+ pPages->SetFor("Kids", pPagesList);
+ }
+ pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum()));
+ pPages->SetIntegerFor("Count", nPages + 1);
+ pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum());
+ // Reset tree transversal variables
+ m_iLastPageTraversed = -1;
+ m_pTreeTraversal.clear();
+ } else {
+ std::set<CPDF_Dictionary*> stack = {pPages};
+ if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0)
+ return -1;
+ }
+ pageList.InsertAt(iPage, pPageDict->GetObjNum());
+ return iPage;
+}
+
void CPDF_Document::DeletePage(int iPage) {
CPDF_Dictionary* pPages = GetPagesDict();
if (!pPages)
@@ -727,7 +760,7 @@ void CPDF_Document::DeletePage(int iPage) {
return;
std::set<CPDF_Dictionary*> stack = {pPages};
- if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0)
+ if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0)
return;
m_PageList.RemoveAt(iPage);
« no previous file with comments | « core/fpdfapi/parser/cpdf_document.h ('k') | core/fpdfapi/parser/cpdf_document_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698