Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(345)

Side by Side Diff: core/fpdfapi/parser/cpdf_data_avail.cpp

Issue 2483633002: Do not load main cross refs for first page in linearized pdf. (Closed)
Patch Set: fix compilation. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/fpdfapi/parser/cpdf_data_avail.h ('k') | core/fpdfapi/parser/cpdf_parser.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 PDFium Authors. All rights reserved. 1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h" 7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 m_arrayAcroforms.push_back(m_pAcroForm); 244 m_arrayAcroforms.push_back(m_pAcroForm);
245 m_docStatus = PDF_DATAAVAIL_PAGETREE; 245 m_docStatus = PDF_DATAAVAIL_PAGETREE;
246 return true; 246 return true;
247 } 247 }
248 248
249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { 249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
250 switch (m_docStatus) { 250 switch (m_docStatus) {
251 case PDF_DATAAVAIL_HEADER: 251 case PDF_DATAAVAIL_HEADER:
252 return CheckHeader(pHints); 252 return CheckHeader(pHints);
253 case PDF_DATAAVAIL_FIRSTPAGE: 253 case PDF_DATAAVAIL_FIRSTPAGE:
254 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
255 return CheckFirstPage(pHints); 254 return CheckFirstPage(pHints);
256 case PDF_DATAAVAIL_HINTTABLE: 255 case PDF_DATAAVAIL_HINTTABLE:
257 return CheckHintTables(pHints); 256 return CheckHintTables(pHints);
258 case PDF_DATAAVAIL_END: 257 case PDF_DATAAVAIL_END:
259 return CheckEnd(pHints); 258 return CheckEnd(pHints);
260 case PDF_DATAAVAIL_CROSSREF: 259 case PDF_DATAAVAIL_CROSSREF:
261 return CheckCrossRef(pHints); 260 return CheckCrossRef(pHints);
262 case PDF_DATAAVAIL_CROSSREF_ITEM: 261 case PDF_DATAAVAIL_CROSSREF_ITEM:
263 return CheckCrossRefItem(pHints); 262 return CheckCrossRefItem(pHints);
264 case PDF_DATAAVAIL_CROSSREF_STREAM: 263 case PDF_DATAAVAIL_CROSSREF_STREAM:
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 return false; 586 return false;
588 } 587 }
589 588
590 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { 589 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
591 if (!m_pLinearized->GetFirstPageEndOffset() || 590 if (!m_pLinearized->GetFirstPageEndOffset() ||
592 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) { 591 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
593 m_docStatus = PDF_DATAAVAIL_ERROR; 592 m_docStatus = PDF_DATAAVAIL_ERROR;
594 return false; 593 return false;
595 } 594 }
596 595
597 bool bNeedDownLoad = false;
598 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); 596 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
599 dwEnd += 512; 597 dwEnd += 512;
600 if ((FX_FILESIZE)dwEnd > m_dwFileLen) 598 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
601 dwEnd = (uint32_t)m_dwFileLen; 599 dwEnd = (uint32_t)m_dwFileLen;
602 600
603 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); 601 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
604 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; 602 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
605 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { 603 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
606 pHints->AddSegment(iStartPos, iSize); 604 pHints->AddSegment(iStartPos, iSize);
607 bNeedDownLoad = true;
608 }
609
610 m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset();
611 FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize();
612 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
613 (uint32_t)(dwFileLen - m_dwLastXRefOffset))) {
614 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
615 uint32_t dwSize = (uint32_t)(dwFileLen - m_dwLastXRefOffset);
616 FX_FILESIZE offset = m_dwLastXRefOffset;
617 if (dwSize < 512 && dwFileLen > 512) {
618 dwSize = 512;
619 offset = dwFileLen - 512;
620 }
621 pHints->AddSegment(offset, dwSize);
622 }
623 } else {
624 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
625 }
626
627 if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
628 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
629 return false; 605 return false;
630 } 606 }
631 607
632 m_docStatus = 608 m_docStatus =
633 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; 609 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
634 return true; 610 return true;
635 } 611 }
636 612
637 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, 613 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
638 uint32_t size, 614 uint32_t size,
639 DownloadHints* pHints) { 615 DownloadHints* pHints) {
640 if (offset < 0 || offset > m_dwFileLen) 616 if (offset < 0 || offset > m_dwFileLen)
641 return true; 617 return true;
642 618
643 FX_SAFE_FILESIZE safeSize = offset; 619 FX_SAFE_FILESIZE safeSize = offset;
644 safeSize += size; 620 safeSize += size;
645 safeSize += 512; 621 safeSize += 512;
646 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) 622 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
647 size = m_dwFileLen - offset; 623 size = m_dwFileLen - offset;
648 else 624 else
649 size += 512; 625 size += 512;
650 626
651 if (!m_pFileAvail->IsDataAvail(offset, size)) { 627 if (!m_pFileAvail->IsDataAvail(offset, size)) {
652 pHints->AddSegment(offset, size); 628 if (pHints)
629 pHints->AddSegment(offset, size);
653 return false; 630 return false;
654 } 631 }
655 return true; 632 return true;
656 } 633 }
657 634
658 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { 635 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
659 if (m_pLinearized->GetPageCount() <= 1) { 636 if (m_pLinearized->GetPageCount() <= 1) {
660 m_docStatus = PDF_DATAAVAIL_DONE; 637 m_docStatus = PDF_DATAAVAIL_DONE;
661 return true; 638 return true;
662 } 639 }
(...skipping 702 matching lines...) Expand 10 before | Expand all | Expand 10 after
1365 return true; 1342 return true;
1366 1343
1367 m_pDocument->LoadPages(); 1344 m_pDocument->LoadPages();
1368 return false; 1345 return false;
1369 } 1346 }
1370 1347
1371 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( 1348 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
1372 DownloadHints* pHints) { 1349 DownloadHints* pHints) {
1373 if (m_bLinearedDataOK) 1350 if (m_bLinearedDataOK)
1374 return DataAvailable; 1351 return DataAvailable;
1352 ASSERT(m_pLinearized);
1353 if (!m_pLinearized->GetLastXRefOffset())
1354 return DataError;
1375 1355
1376 if (!m_bMainXRefLoadTried) { 1356 if (!m_bMainXRefLoadTried) {
1377 FX_SAFE_UINT32 data_size = m_dwFileLen; 1357 FX_SAFE_UINT32 data_size = m_dwFileLen;
1378 data_size -= m_dwLastXRefOffset; 1358 data_size -= m_pLinearized->GetLastXRefOffset();
1379 if (!data_size.IsValid()) 1359 if (!data_size.IsValid())
1380 return DataError; 1360 return DataError;
1381 1361
1382 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, 1362 if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
1383 data_size.ValueOrDie())) { 1363 data_size.ValueOrDie())) {
1384 pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); 1364 pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
1365 data_size.ValueOrDie());
1385 return DataNotAvailable; 1366 return DataNotAvailable;
1386 } 1367 }
1387 1368
1388 CPDF_Parser::Error eRet = 1369 CPDF_Parser::Error eRet =
1389 m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); 1370 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
1390 m_bMainXRefLoadTried = true; 1371 m_bMainXRefLoadTried = true;
1391 if (eRet != CPDF_Parser::SUCCESS) 1372 if (eRet != CPDF_Parser::SUCCESS)
1392 return DataError; 1373 return DataError;
1393 1374
1394 if (!PreparePageItem()) 1375 if (!PreparePageItem())
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1433 } 1414 }
1434 1415
1435 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( 1416 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
1436 uint32_t dwPage, 1417 uint32_t dwPage,
1437 DownloadHints* pHints) { 1418 DownloadHints* pHints) {
1438 if (!m_bAnnotsLoad) { 1419 if (!m_bAnnotsLoad) {
1439 if (!CheckPageAnnots(dwPage, pHints)) 1420 if (!CheckPageAnnots(dwPage, pHints))
1440 return DataNotAvailable; 1421 return DataNotAvailable;
1441 m_bAnnotsLoad = true; 1422 m_bAnnotsLoad = true;
1442 } 1423 }
1443 1424 const bool is_page_valid = ValidatePage(dwPage);
1444 DocAvailStatus nRet = CheckLinearizedData(pHints); 1425 (void)is_page_valid;
1445 if (nRet == DataAvailable) 1426 ASSERT(is_page_valid);
1446 m_bPageLoadedOK = false; 1427 return DataAvailable;
1447 return nRet;
1448 } 1428 }
1449 1429
1450 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { 1430 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
1451 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth); 1431 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
1452 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) 1432 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth)
1453 return false; 1433 return false;
1454 1434
1455 CPDF_Object* pParent = pDict->GetObjectFor("Parent"); 1435 CPDF_Object* pParent = pDict->GetObjectFor("Parent");
1456 if (!pParent) 1436 if (!pParent)
1457 return false; 1437 return false;
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1536 } 1516 }
1537 1517
1538 if (!m_bPageLoadedOK) { 1518 if (!m_bPageLoadedOK) {
1539 if (m_objs_array.empty()) { 1519 if (m_objs_array.empty()) {
1540 m_ObjectSet.clear(); 1520 m_ObjectSet.clear();
1541 1521
1542 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1522 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1543 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1523 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1544 if (!m_pPageDict) { 1524 if (!m_pPageDict) {
1545 ResetFirstCheck(dwPage); 1525 ResetFirstCheck(dwPage);
1526 // This is XFA page.
1546 return DataAvailable; 1527 return DataAvailable;
1547 } 1528 }
1548 1529
1549 std::vector<CPDF_Object*> obj_array; 1530 std::vector<CPDF_Object*> obj_array;
1550 obj_array.push_back(m_pPageDict); 1531 obj_array.push_back(m_pPageDict);
1551 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1532 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1552 return DataNotAvailable; 1533 return DataNotAvailable;
1553 1534
1554 m_objs_array.clear(); 1535 m_objs_array.clear();
1555 } else { 1536 } else {
(...skipping 24 matching lines...) Expand all
1580 return DataNotAvailable; 1561 return DataNotAvailable;
1581 m_bNeedDownLoadResource = false; 1562 m_bNeedDownLoadResource = false;
1582 } 1563 }
1583 1564
1584 m_bPageLoadedOK = false; 1565 m_bPageLoadedOK = false;
1585 m_bAnnotsLoad = false; 1566 m_bAnnotsLoad = false;
1586 m_bCurPageDictLoadOK = false; 1567 m_bCurPageDictLoadOK = false;
1587 1568
1588 ResetFirstCheck(dwPage); 1569 ResetFirstCheck(dwPage);
1589 m_pagesLoadState.insert(dwPage); 1570 m_pagesLoadState.insert(dwPage);
1571 const bool is_page_valid = ValidatePage(dwPage);
1572 (void)is_page_valid;
1573 ASSERT(is_page_valid);
1590 return DataAvailable; 1574 return DataAvailable;
1591 } 1575 }
1592 1576
1593 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) { 1577 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
1594 if (m_objs_array.empty()) { 1578 if (m_objs_array.empty()) {
1595 std::vector<CPDF_Object*> obj_array; 1579 std::vector<CPDF_Object*> obj_array;
1596 obj_array.push_back(m_pPageResource); 1580 obj_array.push_back(m_pPageResource);
1597 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1581 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1598 return false; 1582 return false;
1599 1583
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1643 return nullptr; 1627 return nullptr;
1644 // We should say to the document, which object is the page. 1628 // We should say to the document, which object is the page.
1645 m_pDocument->SetPageObjNum(index, dwObjNum); 1629 m_pDocument->SetPageObjNum(index, dwObjNum);
1646 // Page object already can be parsed in document. 1630 // Page object already can be parsed in document.
1647 if (!m_pDocument->GetIndirectObject(dwObjNum)) { 1631 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
1648 m_syntaxParser.InitParser( 1632 m_syntaxParser.InitParser(
1649 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos)); 1633 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
1650 m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 1634 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1651 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); 1635 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
1652 } 1636 }
1637 const bool is_page_valid = ValidatePage(index);
1638 (void)is_page_valid;
1639 ASSERT(is_page_valid);
1653 return m_pDocument->GetPage(index); 1640 return m_pDocument->GetPage(index);
1654 } 1641 }
1655 1642
1656 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( 1643 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
1657 DownloadHints* pHints) { 1644 DownloadHints* pHints) {
1658 if (!m_pDocument) 1645 if (!m_pDocument)
1659 return FormAvailable; 1646 return FormAvailable;
1647 if (m_pLinearized) {
1648 DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
1649 if (nDocStatus == DataError)
1650 return FormError;
1651 if (nDocStatus == DataNotAvailable)
1652 return FormNotAvailable;
1653 }
1660 1654
1661 if (!m_bLinearizedFormParamLoad) { 1655 if (!m_bLinearizedFormParamLoad) {
1662 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 1656 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1663 if (!pRoot) 1657 if (!pRoot)
1664 return FormAvailable; 1658 return FormAvailable;
1665 1659
1666 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); 1660 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1667 if (!pAcroForm) 1661 if (!pAcroForm)
1668 return FormNotExist; 1662 return FormNotExist;
1669 1663
1670 DocAvailStatus nDocStatus = CheckLinearizedData(pHints); 1664 m_objs_array.push_back(pAcroForm->GetDict());
1671 if (nDocStatus == DataError)
1672 return FormError;
1673 if (nDocStatus == DataNotAvailable)
1674 return FormNotAvailable;
1675
1676 if (m_objs_array.empty())
1677 m_objs_array.push_back(pAcroForm->GetDict());
1678 m_bLinearizedFormParamLoad = true; 1665 m_bLinearizedFormParamLoad = true;
1679 } 1666 }
1680 1667
1681 std::vector<CPDF_Object*> new_objs_array; 1668 std::vector<CPDF_Object*> new_objs_array;
1682 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1669 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1683 m_objs_array = new_objs_array; 1670 m_objs_array = new_objs_array;
1684 return FormNotAvailable; 1671 return FormNotAvailable;
1685 } 1672 }
1686 1673
1687 m_objs_array.clear(); 1674 m_objs_array.clear();
1675 const bool is_form_valid = ValidateForm();
1676 (void)is_form_valid;
1677 ASSERT(is_form_valid);
1688 return FormAvailable; 1678 return FormAvailable;
1689 } 1679 }
1690 1680
1681 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1682 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1683 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1684 if (!pPageDict)
1685 return false;
1686 std::vector<CPDF_Object*> obj_array;
1687 obj_array.push_back(pPageDict);
1688 std::vector<CPDF_Object*> dummy;
1689 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1690 }
1691
1692 bool CPDF_DataAvail::ValidateForm() {
1693 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1694 if (!pRoot)
1695 return true;
1696 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1697 if (!pAcroForm)
1698 return false;
1699 std::vector<CPDF_Object*> obj_array;
1700 obj_array.push_back(pAcroForm);
1701 std::vector<CPDF_Object*> dummy;
1702 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1703 }
1704
1691 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} 1705 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1692 1706
1693 CPDF_DataAvail::PageNode::~PageNode() { 1707 CPDF_DataAvail::PageNode::~PageNode() {
1694 for (int32_t i = 0; i < m_childNode.GetSize(); ++i) 1708 for (int32_t i = 0; i < m_childNode.GetSize(); ++i)
1695 delete m_childNode[i]; 1709 delete m_childNode[i];
1696 m_childNode.RemoveAll(); 1710 m_childNode.RemoveAll();
1697 } 1711 }
OLDNEW
« no previous file with comments | « core/fpdfapi/parser/cpdf_data_avail.h ('k') | core/fpdfapi/parser/cpdf_parser.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698