Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: core/fpdfapi/parser/cpdf_data_avail.cpp

Issue 2483633002: Do not load main cross refs for first page in linearized pdf. (Closed)
Patch Set: Fix XFA pages availability.` Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 PDFium Authors. All rights reserved. 1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h" 7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 m_arrayAcroforms.push_back(m_pAcroForm); 244 m_arrayAcroforms.push_back(m_pAcroForm);
245 m_docStatus = PDF_DATAAVAIL_PAGETREE; 245 m_docStatus = PDF_DATAAVAIL_PAGETREE;
246 return true; 246 return true;
247 } 247 }
248 248
249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { 249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
250 switch (m_docStatus) { 250 switch (m_docStatus) {
251 case PDF_DATAAVAIL_HEADER: 251 case PDF_DATAAVAIL_HEADER:
252 return CheckHeader(pHints); 252 return CheckHeader(pHints);
253 case PDF_DATAAVAIL_FIRSTPAGE: 253 case PDF_DATAAVAIL_FIRSTPAGE:
254 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
255 return CheckFirstPage(pHints); 254 return CheckFirstPage(pHints);
256 case PDF_DATAAVAIL_HINTTABLE: 255 case PDF_DATAAVAIL_HINTTABLE:
257 return CheckHintTables(pHints); 256 return CheckHintTables(pHints);
258 case PDF_DATAAVAIL_END: 257 case PDF_DATAAVAIL_END:
259 return CheckEnd(pHints); 258 return CheckEnd(pHints);
260 case PDF_DATAAVAIL_CROSSREF: 259 case PDF_DATAAVAIL_CROSSREF:
261 return CheckCrossRef(pHints); 260 return CheckCrossRef(pHints);
262 case PDF_DATAAVAIL_CROSSREF_ITEM: 261 case PDF_DATAAVAIL_CROSSREF_ITEM:
263 return CheckCrossRefItem(pHints); 262 return CheckCrossRefItem(pHints);
264 case PDF_DATAAVAIL_CROSSREF_STREAM: 263 case PDF_DATAAVAIL_CROSSREF_STREAM:
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 return false; 586 return false;
588 } 587 }
589 588
590 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { 589 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
591 if (!m_pLinearized->GetFirstPageEndOffset() || 590 if (!m_pLinearized->GetFirstPageEndOffset() ||
592 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) { 591 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
593 m_docStatus = PDF_DATAAVAIL_ERROR; 592 m_docStatus = PDF_DATAAVAIL_ERROR;
594 return false; 593 return false;
595 } 594 }
596 595
597 bool bNeedDownLoad = false;
598 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); 596 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
599 dwEnd += 512; 597 dwEnd += 512;
600 if ((FX_FILESIZE)dwEnd > m_dwFileLen) 598 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
601 dwEnd = (uint32_t)m_dwFileLen; 599 dwEnd = (uint32_t)m_dwFileLen;
602 600
603 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); 601 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
604 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; 602 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
605 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { 603 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
606 pHints->AddSegment(iStartPos, iSize); 604 pHints->AddSegment(iStartPos, iSize);
607 bNeedDownLoad = true;
608 }
609
610 m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset();
611 FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize();
612 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
613 (uint32_t)(dwFileLen - m_dwLastXRefOffset))) {
614 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
615 uint32_t dwSize = (uint32_t)(dwFileLen - m_dwLastXRefOffset);
616 FX_FILESIZE offset = m_dwLastXRefOffset;
617 if (dwSize < 512 && dwFileLen > 512) {
618 dwSize = 512;
619 offset = dwFileLen - 512;
620 }
621 pHints->AddSegment(offset, dwSize);
622 }
623 } else {
624 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
625 }
626
627 if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
628 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
629 return false; 605 return false;
630 } 606 }
631 607
632 m_docStatus = 608 m_docStatus =
633 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; 609 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
634 return true; 610 return true;
635 } 611 }
636 612
637 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, 613 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
638 uint32_t size, 614 uint32_t size,
639 DownloadHints* pHints) { 615 DownloadHints* pHints) {
640 if (offset < 0 || offset > m_dwFileLen) 616 if (offset < 0 || offset > m_dwFileLen)
641 return true; 617 return true;
642 618
643 FX_SAFE_FILESIZE safeSize = offset; 619 FX_SAFE_FILESIZE safeSize = offset;
644 safeSize += size; 620 safeSize += size;
645 safeSize += 512; 621 safeSize += 512;
646 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) 622 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
647 size = m_dwFileLen - offset; 623 size = m_dwFileLen - offset;
648 else 624 else
649 size += 512; 625 size += 512;
650 626
651 if (!m_pFileAvail->IsDataAvail(offset, size)) { 627 if (!m_pFileAvail->IsDataAvail(offset, size)) {
652 pHints->AddSegment(offset, size); 628 if (pHints)
629 pHints->AddSegment(offset, size);
653 return false; 630 return false;
654 } 631 }
655 return true; 632 return true;
656 } 633 }
657 634
658 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { 635 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
659 if (m_pLinearized->GetPageCount() <= 1) { 636 if (m_pLinearized->GetPageCount() <= 1) {
660 m_docStatus = PDF_DATAAVAIL_DONE; 637 m_docStatus = PDF_DATAAVAIL_DONE;
661 return true; 638 return true;
662 } 639 }
(...skipping 702 matching lines...) Expand 10 before | Expand all | Expand 10 after
1365 return true; 1342 return true;
1366 1343
1367 m_pDocument->LoadPages(); 1344 m_pDocument->LoadPages();
1368 return false; 1345 return false;
1369 } 1346 }
1370 1347
1371 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( 1348 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
1372 DownloadHints* pHints) { 1349 DownloadHints* pHints) {
1373 if (m_bLinearedDataOK) 1350 if (m_bLinearedDataOK)
1374 return DataAvailable; 1351 return DataAvailable;
1352 if (!m_pLinearized || !m_pLinearized->GetLastXRefOffset())
Lei Zhang 2016/11/09 08:51:11 Should be safe to remove the !m_pLinearized check.
snake 2016/11/09 12:41:49 Done.
1353 return DataError;
1375 1354
1376 if (!m_bMainXRefLoadTried) { 1355 if (!m_bMainXRefLoadTried) {
1377 FX_SAFE_UINT32 data_size = m_dwFileLen; 1356 FX_SAFE_UINT32 data_size = m_dwFileLen;
1378 data_size -= m_dwLastXRefOffset; 1357 data_size -= m_pLinearized->GetLastXRefOffset();
1379 if (!data_size.IsValid()) 1358 if (!data_size.IsValid())
1380 return DataError; 1359 return DataError;
1381 1360
1382 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, 1361 if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
1383 data_size.ValueOrDie())) { 1362 data_size.ValueOrDie())) {
1384 pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); 1363 pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
1364 data_size.ValueOrDie());
1385 return DataNotAvailable; 1365 return DataNotAvailable;
1386 } 1366 }
1387 1367
1388 CPDF_Parser::Error eRet = 1368 CPDF_Parser::Error eRet =
1389 m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); 1369 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
1390 m_bMainXRefLoadTried = true; 1370 m_bMainXRefLoadTried = true;
1391 if (eRet != CPDF_Parser::SUCCESS) 1371 if (eRet != CPDF_Parser::SUCCESS)
1392 return DataError; 1372 return DataError;
1393 1373
1394 if (!PreparePageItem()) 1374 if (!PreparePageItem())
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1433 } 1413 }
1434 1414
1435 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( 1415 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
1436 uint32_t dwPage, 1416 uint32_t dwPage,
1437 DownloadHints* pHints) { 1417 DownloadHints* pHints) {
1438 if (!m_bAnnotsLoad) { 1418 if (!m_bAnnotsLoad) {
1439 if (!CheckPageAnnots(dwPage, pHints)) 1419 if (!CheckPageAnnots(dwPage, pHints))
1440 return DataNotAvailable; 1420 return DataNotAvailable;
1441 m_bAnnotsLoad = true; 1421 m_bAnnotsLoad = true;
1442 } 1422 }
1443 1423 ASSERT(ValidatePage(dwPage));
1444 DocAvailStatus nRet = CheckLinearizedData(pHints); 1424 return DataAvailable;
1445 if (nRet == DataAvailable)
1446 m_bPageLoadedOK = false;
1447 return nRet;
1448 } 1425 }
1449 1426
1450 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { 1427 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
1451 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth); 1428 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
1452 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) 1429 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth)
1453 return false; 1430 return false;
1454 1431
1455 CPDF_Object* pParent = pDict->GetObjectFor("Parent"); 1432 CPDF_Object* pParent = pDict->GetObjectFor("Parent");
1456 if (!pParent) 1433 if (!pParent)
1457 return false; 1434 return false;
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1536 } 1513 }
1537 1514
1538 if (!m_bPageLoadedOK) { 1515 if (!m_bPageLoadedOK) {
1539 if (m_objs_array.empty()) { 1516 if (m_objs_array.empty()) {
1540 m_ObjectSet.clear(); 1517 m_ObjectSet.clear();
1541 1518
1542 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1519 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1543 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1520 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1544 if (!m_pPageDict) { 1521 if (!m_pPageDict) {
1545 ResetFirstCheck(dwPage); 1522 ResetFirstCheck(dwPage);
1523 // This is XFA page.
1546 return DataAvailable; 1524 return DataAvailable;
snake 2016/11/08 23:37:09 I back this line, because it is related to XFA pag
1547 } 1525 }
1548 1526
1549 std::vector<CPDF_Object*> obj_array; 1527 std::vector<CPDF_Object*> obj_array;
1550 obj_array.push_back(m_pPageDict); 1528 obj_array.push_back(m_pPageDict);
1551 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1529 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1552 return DataNotAvailable; 1530 return DataNotAvailable;
1553 1531
1554 m_objs_array.clear(); 1532 m_objs_array.clear();
1555 } else { 1533 } else {
1556 std::vector<CPDF_Object*> new_objs_array; 1534 std::vector<CPDF_Object*> new_objs_array;
(...skipping 23 matching lines...) Expand all
1580 return DataNotAvailable; 1558 return DataNotAvailable;
1581 m_bNeedDownLoadResource = false; 1559 m_bNeedDownLoadResource = false;
1582 } 1560 }
1583 1561
1584 m_bPageLoadedOK = false; 1562 m_bPageLoadedOK = false;
1585 m_bAnnotsLoad = false; 1563 m_bAnnotsLoad = false;
1586 m_bCurPageDictLoadOK = false; 1564 m_bCurPageDictLoadOK = false;
1587 1565
1588 ResetFirstCheck(dwPage); 1566 ResetFirstCheck(dwPage);
1589 m_pagesLoadState.insert(dwPage); 1567 m_pagesLoadState.insert(dwPage);
1568 ASSERT(ValidatePage(dwPage));
1590 return DataAvailable; 1569 return DataAvailable;
1591 } 1570 }
1592 1571
1593 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) { 1572 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
1594 if (m_objs_array.empty()) { 1573 if (m_objs_array.empty()) {
1595 std::vector<CPDF_Object*> obj_array; 1574 std::vector<CPDF_Object*> obj_array;
1596 obj_array.push_back(m_pPageResource); 1575 obj_array.push_back(m_pPageResource);
1597 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1576 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1598 return false; 1577 return false;
1599 1578
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1643 return nullptr; 1622 return nullptr;
1644 // We should say to the document, which object is the page. 1623 // We should say to the document, which object is the page.
1645 m_pDocument->SetPageObjNum(index, dwObjNum); 1624 m_pDocument->SetPageObjNum(index, dwObjNum);
1646 // Page object already can be parsed in document. 1625 // Page object already can be parsed in document.
1647 if (!m_pDocument->GetIndirectObject(dwObjNum)) { 1626 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
1648 m_syntaxParser.InitParser( 1627 m_syntaxParser.InitParser(
1649 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos)); 1628 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
1650 m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 1629 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1651 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); 1630 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
1652 } 1631 }
1632 ASSERT(ValidatePage(index));
1653 return m_pDocument->GetPage(index); 1633 return m_pDocument->GetPage(index);
1654 } 1634 }
1655 1635
1656 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( 1636 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
1657 DownloadHints* pHints) { 1637 DownloadHints* pHints) {
1658 if (!m_pDocument) 1638 if (!m_pDocument)
1659 return FormAvailable; 1639 return FormAvailable;
1640 if (m_pLinearized) {
1641 DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
1642 if (nDocStatus == DataError)
1643 return FormError;
1644 if (nDocStatus == DataNotAvailable)
1645 return FormNotAvailable;
1646 }
1660 1647
1661 if (!m_bLinearizedFormParamLoad) { 1648 if (!m_bLinearizedFormParamLoad) {
1662 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 1649 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1663 if (!pRoot) 1650 if (!pRoot)
1664 return FormAvailable; 1651 return FormAvailable;
1665 1652
1666 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); 1653 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1667 if (!pAcroForm) 1654 if (!pAcroForm)
1668 return FormNotExist; 1655 return FormNotExist;
1669 1656
1670 DocAvailStatus nDocStatus = CheckLinearizedData(pHints); 1657 m_objs_array.push_back(pAcroForm->GetDict());
1671 if (nDocStatus == DataError)
1672 return FormError;
1673 if (nDocStatus == DataNotAvailable)
1674 return FormNotAvailable;
1675
1676 if (m_objs_array.empty())
1677 m_objs_array.push_back(pAcroForm->GetDict());
1678 m_bLinearizedFormParamLoad = true; 1658 m_bLinearizedFormParamLoad = true;
1679 } 1659 }
1680 1660
1681 std::vector<CPDF_Object*> new_objs_array; 1661 std::vector<CPDF_Object*> new_objs_array;
1682 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1662 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1683 m_objs_array = new_objs_array; 1663 m_objs_array = new_objs_array;
1684 return FormNotAvailable; 1664 return FormNotAvailable;
1685 } 1665 }
1686 1666
1687 m_objs_array.clear(); 1667 m_objs_array.clear();
1668 ASSERT(ValidateForm());
1688 return FormAvailable; 1669 return FormAvailable;
1689 } 1670 }
1690 1671
1672 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1673 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1674 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1675 if (!pPageDict)
1676 return false;
1677 std::vector<CPDF_Object*> obj_array;
1678 obj_array.push_back(pPageDict);
1679 std::vector<CPDF_Object*> dummy;
1680 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1681 }
1682
1683 bool CPDF_DataAvail::ValidateForm() {
1684 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1685 if (!pRoot)
1686 return true;
1687 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1688 if (!pAcroForm)
1689 return false;
1690 std::vector<CPDF_Object*> obj_array;
1691 obj_array.push_back(pAcroForm);
1692 std::vector<CPDF_Object*> dummy;
1693 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1694 }
1695
1691 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} 1696 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1692 1697
1693 CPDF_DataAvail::PageNode::~PageNode() { 1698 CPDF_DataAvail::PageNode::~PageNode() {
1694 for (int32_t i = 0; i < m_childNode.GetSize(); ++i) 1699 for (int32_t i = 0; i < m_childNode.GetSize(); ++i)
1695 delete m_childNode[i]; 1700 delete m_childNode[i];
1696 m_childNode.RemoveAll(); 1701 m_childNode.RemoveAll();
1697 } 1702 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698