OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
136 } | 136 } |
137 if (pDst) { | 137 if (pDst) { |
138 FX_WCHAR n = wFind; | 138 FX_WCHAR n = wFind; |
139 while (n--) { | 139 while (n--) { |
140 *pDst++ = *pMap++; | 140 *pDst++ = *pMap++; |
141 } | 141 } |
142 } | 142 } |
143 return (FX_STRSIZE)wFind; | 143 return (FX_STRSIZE)wFind; |
144 } | 144 } |
145 | 145 |
146 float MaskPercentFilled(const std::vector<bool>& mask, | |
147 int32_t start, | |
148 int32_t end) { | |
149 if (start >= end) | |
150 return 0; | |
151 float count = std::count_if(mask.begin() + start, mask.begin() + end, | |
152 [](bool r) { return r; }); | |
153 return count / (end - start); | |
154 } | |
155 | |
146 } // namespace | 156 } // namespace |
147 | 157 |
148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) | 158 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) |
149 : m_pPage(pPage), | 159 : m_pPage(pPage), |
150 m_parserflag(flags), | 160 m_parserflag(flags), |
151 m_pPreTextObj(nullptr), | 161 m_pPreTextObj(nullptr), |
152 m_bIsParsed(false), | 162 m_bIsParsed(false), |
153 m_TextlineDir(TextOrientation::Unknown), | 163 m_TextlineDir(TextOrientation::Unknown), |
154 m_CurlineRect(0, 0, 0, 0) { | 164 m_CurlineRect(0, 0, 0, 0) { |
155 m_TextBuf.EstimateSize(0, 10240); | 165 m_TextBuf.EstimateSize(0, 10240); |
(...skipping 514 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
670 segmentStatus = 2; | 680 segmentStatus = 2; |
671 m_Segments.Add(segment); | 681 m_Segments.Add(segment); |
672 segment.m_Start = 0; | 682 segment.m_Start = 0; |
673 segment.m_nCount = 0; | 683 segment.m_nCount = 0; |
674 } | 684 } |
675 return m_Segments.GetSize(); | 685 return m_Segments.GetSize(); |
676 } | 686 } |
677 | 687 |
678 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() | 688 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() |
679 const { | 689 const { |
680 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | |
681 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | |
682 std::vector<uint8_t> nHorizontalMask(nPageWidth); | |
683 std::vector<uint8_t> nVerticalMask(nPageHeight); | |
684 uint8_t* pDataH = nHorizontalMask.data(); | |
685 uint8_t* pDataV = nVerticalMask.data(); | |
686 int32_t index = 0; | |
687 FX_FLOAT fLineHeight = 0.0f; | |
688 if (m_pPage->GetPageObjectList()->empty()) | 690 if (m_pPage->GetPageObjectList()->empty()) |
689 return TextOrientation::Unknown; | 691 return TextOrientation::Unknown; |
690 | 692 |
691 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { | 693 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
692 if (!pPageObj || !pPageObj->IsText()) | 694 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
695 std::vector<bool> nHorizontalMask(nPageWidth); | |
696 std::vector<bool> nVerticalMask(nPageHeight); | |
697 FX_FLOAT fLineHeight = 0.0f; | |
698 for (const auto& pPageObj : *m_pPage->GetPageObjectList()) { | |
699 if (!pPageObj->IsText()) | |
693 continue; | 700 continue; |
694 | 701 |
695 int32_t minH = | 702 int32_t minH = std::max(static_cast<int32_t>(pPageObj->m_Left), 0); |
696 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 703 int32_t maxH = |
697 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 704 std::min(static_cast<int32_t>(pPageObj->m_Right), nPageWidth); |
698 ? nPageWidth | 705 int32_t minV = std::max(static_cast<int32_t>(pPageObj->m_Bottom), 0); |
699 : (int32_t)pPageObj->m_Right; | 706 int32_t maxV = std::min(static_cast<int32_t>(pPageObj->m_Top), nPageHeight); |
700 int32_t minV = | |
701 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; | |
702 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight | |
703 ? nPageHeight | |
704 : (int32_t)pPageObj->m_Top; | |
705 if (minH >= maxH || minV >= maxV) | 707 if (minH >= maxH || minV >= maxV) |
706 continue; | 708 continue; |
707 | 709 |
708 FXSYS_memset(pDataH + minH, 1, maxH - minH); | 710 for (int32_t i = minH; i < maxH; ++i) |
709 FXSYS_memset(pDataV + minV, 1, maxV - minV); | 711 nHorizontalMask[i] = true; |
712 for (int32_t i = minV; i < maxV; ++i) | |
713 nVerticalMask[i] = true; | |
710 if (fLineHeight <= 0.0f) | 714 if (fLineHeight <= 0.0f) |
711 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; | 715 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; |
dsinclair
2016/06/15 13:37:50
What happens if we have different line heights on
Lei Zhang
2016/06/15 23:46:33
Everything here is heuristics based, so this could
dsinclair
2016/06/16 15:06:10
Acknowledged.
| |
712 } | 716 } |
713 int32_t nStartH = 0; | 717 int32_t index = 0; |
714 int32_t nEndH = 0; | |
715 FX_FLOAT nSumH = 0.0f; | |
716 for (index = 0; index < nPageWidth; index++) { | 718 for (index = 0; index < nPageWidth; index++) { |
717 if (1 == nHorizontalMask[index]) | 719 if (nHorizontalMask[index]) |
718 break; | 720 break; |
dsinclair
2016/06/15 13:37:50
Am I misreading this or, isn't index after this fo
Lei Zhang
2016/06/15 23:46:33
Which |minH|? It's the smallest |minH| value that
dsinclair
2016/06/16 15:06:10
Crap, there is another level of for() loop on 698
Lei Zhang
2016/06/16 22:47:03
Done.
| |
719 } | 721 } |
720 nStartH = index; | 722 const int32_t nStartH = index; |
721 for (index = nPageWidth; index > 0; index--) { | 723 for (index = nPageWidth; index > 0; index--) { |
722 if (1 == nHorizontalMask[index - 1]) | 724 if (nHorizontalMask[index - 1]) |
723 break; | 725 break; |
724 } | 726 } |
725 nEndH = index; | 727 const int32_t nEndH = index; |
dsinclair
2016/06/15 13:37:50
Shoudn't this be index - 1? Or, should the mask ve
Lei Zhang
2016/06/15 23:46:33
Hmm, this probably helps prevent a divide by 0? It
| |
726 for (index = nStartH; index < nEndH; index++) { | |
727 nSumH += nHorizontalMask[index]; | |
728 } | |
729 nSumH /= nEndH - nStartH; | |
730 int32_t nStartV = 0; | |
731 int32_t nEndV = 0; | |
732 FX_FLOAT nSumV = 0.0f; | |
733 for (index = 0; index < nPageHeight; index++) { | 728 for (index = 0; index < nPageHeight; index++) { |
734 if (1 == nVerticalMask[index]) | 729 if (nVerticalMask[index]) |
735 break; | 730 break; |
736 } | 731 } |
737 nStartV = index; | 732 const int32_t nStartV = index; |
738 for (index = nPageHeight; index > 0; index--) { | 733 for (index = nPageHeight; index > 0; index--) { |
739 if (1 == nVerticalMask[index - 1]) | 734 if (nVerticalMask[index - 1]) |
740 break; | 735 break; |
741 } | 736 } |
742 nEndV = index; | 737 const int32_t nEndV = index; |
743 for (index = nStartV; index < nEndV; index++) | 738 const int32_t nDoubleLineHeight = 2 * fLineHeight; |
744 nSumV += nVerticalMask[index]; | 739 if ((nEndV - nStartV) < nDoubleLineHeight) |
745 nSumV /= nEndV - nStartV; | |
746 | |
747 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) | |
748 return TextOrientation::Horizontal; | 740 return TextOrientation::Horizontal; |
749 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) | 741 if ((nEndH - nStartH) < nDoubleLineHeight) |
750 return TextOrientation::Vertical; | 742 return TextOrientation::Vertical; |
751 | 743 |
744 const FX_FLOAT nSumH = MaskPercentFilled(nHorizontalMask, nStartH, nEndH); | |
dsinclair
2016/06/15 13:37:50
Doesn't this always come out to 1? Didn't we set e
Lei Zhang
2016/06/15 23:46:33
No, we didn't. There's gaps in between because the
dsinclair
2016/06/16 15:06:10
Right, missed the outer for() above, though we alw
Lei Zhang
2016/06/16 22:47:03
Not sure what you mean exactly. Are you still tryi
dsinclair
2016/06/16 23:39:05
Sorry, was trying to say my comment was wrong, and
| |
752 if (nSumH > 0.8f) | 745 if (nSumH > 0.8f) |
753 return TextOrientation::Horizontal; | 746 return TextOrientation::Horizontal; |
754 | 747 |
748 const FX_FLOAT nSumV = MaskPercentFilled(nVerticalMask, nStartV, nEndV); | |
755 if (nSumH > nSumV) | 749 if (nSumH > nSumV) |
756 return TextOrientation::Horizontal; | 750 return TextOrientation::Horizontal; |
757 if (nSumH < nSumV) | 751 if (nSumH < nSumV) |
758 return TextOrientation::Vertical; | 752 return TextOrientation::Vertical; |
759 return TextOrientation::Unknown; | 753 return TextOrientation::Unknown; |
760 } | 754 } |
761 | 755 |
762 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, | 756 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, |
763 const CFX_Matrix& formMatrix) { | 757 const CFX_Matrix& formMatrix) { |
764 PAGECHAR_INFO generateChar; | 758 PAGECHAR_INFO generateChar; |
(...skipping 1568 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2333 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; | 2327 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
2334 } | 2328 } |
2335 | 2329 |
2336 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { | 2330 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { |
2337 if (index >= m_LinkArray.size()) | 2331 if (index >= m_LinkArray.size()) |
2338 return std::vector<CFX_FloatRect>(); | 2332 return std::vector<CFX_FloatRect>(); |
2339 | 2333 |
2340 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, | 2334 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
2341 m_LinkArray[index].m_Count); | 2335 m_LinkArray[index].m_Count); |
2342 } | 2336 } |
OLD | NEW |