OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
136 } | 136 } |
137 if (pDst) { | 137 if (pDst) { |
138 FX_WCHAR n = wFind; | 138 FX_WCHAR n = wFind; |
139 while (n--) { | 139 while (n--) { |
140 *pDst++ = *pMap++; | 140 *pDst++ = *pMap++; |
141 } | 141 } |
142 } | 142 } |
143 return (FX_STRSIZE)wFind; | 143 return (FX_STRSIZE)wFind; |
144 } | 144 } |
145 | 145 |
| 146 float MaskPercentFilled(const std::vector<bool>& mask, |
| 147 int32_t start, |
| 148 int32_t end) { |
| 149 if (start >= end) |
| 150 return 0; |
| 151 float count = std::count_if(mask.begin() + start, mask.begin() + end, |
| 152 [](bool r) { return r; }); |
| 153 return count / (end - start); |
| 154 } |
| 155 |
146 } // namespace | 156 } // namespace |
147 | 157 |
148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) | 158 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) |
149 : m_pPage(pPage), | 159 : m_pPage(pPage), |
150 m_parserflag(flags), | 160 m_parserflag(flags), |
151 m_pPreTextObj(nullptr), | 161 m_pPreTextObj(nullptr), |
152 m_bIsParsed(false), | 162 m_bIsParsed(false), |
153 m_TextlineDir(TextOrientation::Unknown) { | 163 m_TextlineDir(TextOrientation::Unknown) { |
154 m_TextBuf.EstimateSize(0, 10240); | 164 m_TextBuf.EstimateSize(0, 10240); |
155 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 165 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
(...skipping 515 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
671 segmentStatus = 2; | 681 segmentStatus = 2; |
672 m_Segments.Add(segment); | 682 m_Segments.Add(segment); |
673 segment.m_Start = 0; | 683 segment.m_Start = 0; |
674 segment.m_nCount = 0; | 684 segment.m_nCount = 0; |
675 } | 685 } |
676 return m_Segments.GetSize(); | 686 return m_Segments.GetSize(); |
677 } | 687 } |
678 | 688 |
679 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() | 689 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() |
680 const { | 690 const { |
681 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | |
682 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | |
683 std::vector<uint8_t> nHorizontalMask(nPageWidth); | |
684 std::vector<uint8_t> nVerticalMask(nPageHeight); | |
685 uint8_t* pDataH = nHorizontalMask.data(); | |
686 uint8_t* pDataV = nVerticalMask.data(); | |
687 int32_t index = 0; | |
688 FX_FLOAT fLineHeight = 0.0f; | |
689 if (m_pPage->GetPageObjectList()->empty()) | 691 if (m_pPage->GetPageObjectList()->empty()) |
690 return TextOrientation::Unknown; | 692 return TextOrientation::Unknown; |
691 | 693 |
692 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { | 694 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
693 if (!pPageObj || !pPageObj->IsText()) | 695 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
| 696 std::vector<bool> nHorizontalMask(nPageWidth); |
| 697 std::vector<bool> nVerticalMask(nPageHeight); |
| 698 FX_FLOAT fLineHeight = 0.0f; |
| 699 int32_t nStartH = nPageWidth; |
| 700 int32_t nEndH = 0; |
| 701 int32_t nStartV = nPageHeight; |
| 702 int32_t nEndV = 0; |
| 703 for (const auto& pPageObj : *m_pPage->GetPageObjectList()) { |
| 704 if (!pPageObj->IsText()) |
694 continue; | 705 continue; |
695 | 706 |
696 int32_t minH = | 707 int32_t minH = std::max(static_cast<int32_t>(pPageObj->m_Left), 0); |
697 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 708 int32_t maxH = |
698 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 709 std::min(static_cast<int32_t>(pPageObj->m_Right), nPageWidth); |
699 ? nPageWidth | 710 int32_t minV = std::max(static_cast<int32_t>(pPageObj->m_Bottom), 0); |
700 : (int32_t)pPageObj->m_Right; | 711 int32_t maxV = std::min(static_cast<int32_t>(pPageObj->m_Top), nPageHeight); |
701 int32_t minV = | |
702 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; | |
703 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight | |
704 ? nPageHeight | |
705 : (int32_t)pPageObj->m_Top; | |
706 if (minH >= maxH || minV >= maxV) | 712 if (minH >= maxH || minV >= maxV) |
707 continue; | 713 continue; |
708 | 714 |
709 FXSYS_memset(pDataH + minH, 1, maxH - minH); | 715 for (int32_t i = minH; i < maxH; ++i) |
710 FXSYS_memset(pDataV + minV, 1, maxV - minV); | 716 nHorizontalMask[i] = true; |
| 717 for (int32_t i = minV; i < maxV; ++i) |
| 718 nVerticalMask[i] = true; |
| 719 |
| 720 nStartH = std::min(nStartH, minH); |
| 721 nEndH = std::max(nEndH, maxH); |
| 722 nStartV = std::min(nStartV, minV); |
| 723 nEndV = std::max(nEndV, maxV); |
| 724 |
711 if (fLineHeight <= 0.0f) | 725 if (fLineHeight <= 0.0f) |
712 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; | 726 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; |
713 } | 727 } |
714 int32_t nStartH = 0; | 728 const int32_t nDoubleLineHeight = 2 * fLineHeight; |
715 int32_t nEndH = 0; | 729 if ((nEndV - nStartV) < nDoubleLineHeight) |
716 FX_FLOAT nSumH = 0.0f; | |
717 for (index = 0; index < nPageWidth; index++) { | |
718 if (1 == nHorizontalMask[index]) | |
719 break; | |
720 } | |
721 nStartH = index; | |
722 for (index = nPageWidth; index > 0; index--) { | |
723 if (1 == nHorizontalMask[index - 1]) | |
724 break; | |
725 } | |
726 nEndH = index; | |
727 for (index = nStartH; index < nEndH; index++) { | |
728 nSumH += nHorizontalMask[index]; | |
729 } | |
730 nSumH /= nEndH - nStartH; | |
731 int32_t nStartV = 0; | |
732 int32_t nEndV = 0; | |
733 FX_FLOAT nSumV = 0.0f; | |
734 for (index = 0; index < nPageHeight; index++) { | |
735 if (1 == nVerticalMask[index]) | |
736 break; | |
737 } | |
738 nStartV = index; | |
739 for (index = nPageHeight; index > 0; index--) { | |
740 if (1 == nVerticalMask[index - 1]) | |
741 break; | |
742 } | |
743 nEndV = index; | |
744 for (index = nStartV; index < nEndV; index++) | |
745 nSumV += nVerticalMask[index]; | |
746 nSumV /= nEndV - nStartV; | |
747 | |
748 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) | |
749 return TextOrientation::Horizontal; | 730 return TextOrientation::Horizontal; |
750 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) | 731 if ((nEndH - nStartH) < nDoubleLineHeight) |
751 return TextOrientation::Vertical; | 732 return TextOrientation::Vertical; |
752 | 733 |
| 734 const FX_FLOAT nSumH = MaskPercentFilled(nHorizontalMask, nStartH, nEndH); |
753 if (nSumH > 0.8f) | 735 if (nSumH > 0.8f) |
754 return TextOrientation::Horizontal; | 736 return TextOrientation::Horizontal; |
755 | 737 |
| 738 const FX_FLOAT nSumV = MaskPercentFilled(nVerticalMask, nStartV, nEndV); |
756 if (nSumH > nSumV) | 739 if (nSumH > nSumV) |
757 return TextOrientation::Horizontal; | 740 return TextOrientation::Horizontal; |
758 if (nSumH < nSumV) | 741 if (nSumH < nSumV) |
759 return TextOrientation::Vertical; | 742 return TextOrientation::Vertical; |
760 return TextOrientation::Unknown; | 743 return TextOrientation::Unknown; |
761 } | 744 } |
762 | 745 |
763 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, | 746 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, |
764 const CFX_Matrix& formMatrix) { | 747 const CFX_Matrix& formMatrix) { |
765 PAGECHAR_INFO generateChar; | 748 PAGECHAR_INFO generateChar; |
(...skipping 1570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2336 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; | 2319 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
2337 } | 2320 } |
2338 | 2321 |
2339 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { | 2322 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { |
2340 if (index >= m_LinkArray.size()) | 2323 if (index >= m_LinkArray.size()) |
2341 return std::vector<CFX_FloatRect>(); | 2324 return std::vector<CFX_FloatRect>(); |
2342 | 2325 |
2343 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, | 2326 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
2344 m_LinkArray[index].m_Count); | 2327 m_LinkArray[index].m_Count); |
2345 } | 2328 } |
OLD | NEW |