Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #include <cwctype> | 9 #include <cwctype> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 136 } | 136 } |
| 137 if (pDst) { | 137 if (pDst) { |
| 138 FX_WCHAR n = wFind; | 138 FX_WCHAR n = wFind; |
| 139 while (n--) { | 139 while (n--) { |
| 140 *pDst++ = *pMap++; | 140 *pDst++ = *pMap++; |
| 141 } | 141 } |
| 142 } | 142 } |
| 143 return (FX_STRSIZE)wFind; | 143 return (FX_STRSIZE)wFind; |
| 144 } | 144 } |
| 145 | 145 |
| 146 float MaskPercentFilled(const std::vector<bool>& mask, | |
| 147 int32_t start, | |
| 148 int32_t end) { | |
| 149 if (start >= end) | |
| 150 return 0; | |
| 151 float count = std::count_if(mask.begin() + start, mask.begin() + end, | |
| 152 [](bool r) { return r; }); | |
| 153 return count / (end - start); | |
| 154 } | |
| 155 | |
| 146 } // namespace | 156 } // namespace |
| 147 | 157 |
| 148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) | 158 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) |
| 149 : m_pPage(pPage), | 159 : m_pPage(pPage), |
| 150 m_parserflag(flags), | 160 m_parserflag(flags), |
| 151 m_pPreTextObj(nullptr), | 161 m_pPreTextObj(nullptr), |
| 152 m_bIsParsed(false), | 162 m_bIsParsed(false), |
| 153 m_TextlineDir(TextOrientation::Unknown), | 163 m_TextlineDir(TextOrientation::Unknown), |
| 154 m_CurlineRect(0, 0, 0, 0) { | 164 m_CurlineRect(0, 0, 0, 0) { |
| 155 m_TextBuf.EstimateSize(0, 10240); | 165 m_TextBuf.EstimateSize(0, 10240); |
| (...skipping 514 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 670 segmentStatus = 2; | 680 segmentStatus = 2; |
| 671 m_Segments.Add(segment); | 681 m_Segments.Add(segment); |
| 672 segment.m_Start = 0; | 682 segment.m_Start = 0; |
| 673 segment.m_nCount = 0; | 683 segment.m_nCount = 0; |
| 674 } | 684 } |
| 675 return m_Segments.GetSize(); | 685 return m_Segments.GetSize(); |
| 676 } | 686 } |
| 677 | 687 |
| 678 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() | 688 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() |
| 679 const { | 689 const { |
| 680 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | |
| 681 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | |
| 682 std::vector<uint8_t> nHorizontalMask(nPageWidth); | |
| 683 std::vector<uint8_t> nVerticalMask(nPageHeight); | |
| 684 uint8_t* pDataH = nHorizontalMask.data(); | |
| 685 uint8_t* pDataV = nVerticalMask.data(); | |
| 686 int32_t index = 0; | |
| 687 FX_FLOAT fLineHeight = 0.0f; | |
| 688 if (m_pPage->GetPageObjectList()->empty()) | 690 if (m_pPage->GetPageObjectList()->empty()) |
| 689 return TextOrientation::Unknown; | 691 return TextOrientation::Unknown; |
| 690 | 692 |
| 691 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { | 693 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
| 692 if (!pPageObj || !pPageObj->IsText()) | 694 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
| 695 std::vector<bool> nHorizontalMask(nPageWidth); | |
| 696 std::vector<bool> nVerticalMask(nPageHeight); | |
| 697 FX_FLOAT fLineHeight = 0.0f; | |
| 698 for (const auto& pPageObj : *m_pPage->GetPageObjectList()) { | |
| 699 if (!pPageObj->IsText()) | |
| 693 continue; | 700 continue; |
| 694 | 701 |
| 695 int32_t minH = | 702 int32_t minH = std::max(static_cast<int32_t>(pPageObj->m_Left), 0); |
| 696 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 703 int32_t maxH = |
| 697 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 704 std::min(static_cast<int32_t>(pPageObj->m_Right), nPageWidth); |
| 698 ? nPageWidth | 705 int32_t minV = std::max(static_cast<int32_t>(pPageObj->m_Bottom), 0); |
| 699 : (int32_t)pPageObj->m_Right; | 706 int32_t maxV = std::min(static_cast<int32_t>(pPageObj->m_Top), nPageHeight); |
| 700 int32_t minV = | |
| 701 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; | |
| 702 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight | |
| 703 ? nPageHeight | |
| 704 : (int32_t)pPageObj->m_Top; | |
| 705 if (minH >= maxH || minV >= maxV) | 707 if (minH >= maxH || minV >= maxV) |
| 706 continue; | 708 continue; |
| 707 | 709 |
| 708 FXSYS_memset(pDataH + minH, 1, maxH - minH); | 710 for (int32_t i = minH; i < maxH; ++i) |
| 709 FXSYS_memset(pDataV + minV, 1, maxV - minV); | 711 nHorizontalMask[i] = true; |
| 712 for (int32_t i = minV; i < maxV; ++i) | |
| 713 nVerticalMask[i] = true; | |
| 710 if (fLineHeight <= 0.0f) | 714 if (fLineHeight <= 0.0f) |
| 711 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; | 715 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; |
|
dsinclair
2016/06/15 13:37:50
What happens if we have different line heights on
Lei Zhang
2016/06/15 23:46:33
Everything here is heuristics based, so this could
dsinclair
2016/06/16 15:06:10
Acknowledged.
| |
| 712 } | 716 } |
| 713 int32_t nStartH = 0; | 717 int32_t index = 0; |
| 714 int32_t nEndH = 0; | |
| 715 FX_FLOAT nSumH = 0.0f; | |
| 716 for (index = 0; index < nPageWidth; index++) { | 718 for (index = 0; index < nPageWidth; index++) { |
| 717 if (1 == nHorizontalMask[index]) | 719 if (nHorizontalMask[index]) |
| 718 break; | 720 break; |
|
dsinclair
2016/06/15 13:37:50
Am I misreading this or, isn't index after this fo
Lei Zhang
2016/06/15 23:46:33
Which |minH|? It's the smallest |minH| value that
dsinclair
2016/06/16 15:06:10
Crap, there is another level of for() loop on 698
Lei Zhang
2016/06/16 22:47:03
Done.
| |
| 719 } | 721 } |
| 720 nStartH = index; | 722 const int32_t nStartH = index; |
| 721 for (index = nPageWidth; index > 0; index--) { | 723 for (index = nPageWidth; index > 0; index--) { |
| 722 if (1 == nHorizontalMask[index - 1]) | 724 if (nHorizontalMask[index - 1]) |
| 723 break; | 725 break; |
| 724 } | 726 } |
| 725 nEndH = index; | 727 const int32_t nEndH = index; |
|
dsinclair
2016/06/15 13:37:50
Shoudn't this be index - 1? Or, should the mask ve
Lei Zhang
2016/06/15 23:46:33
Hmm, this probably helps prevent a divide by 0? It
| |
| 726 for (index = nStartH; index < nEndH; index++) { | |
| 727 nSumH += nHorizontalMask[index]; | |
| 728 } | |
| 729 nSumH /= nEndH - nStartH; | |
| 730 int32_t nStartV = 0; | |
| 731 int32_t nEndV = 0; | |
| 732 FX_FLOAT nSumV = 0.0f; | |
| 733 for (index = 0; index < nPageHeight; index++) { | 728 for (index = 0; index < nPageHeight; index++) { |
| 734 if (1 == nVerticalMask[index]) | 729 if (nVerticalMask[index]) |
| 735 break; | 730 break; |
| 736 } | 731 } |
| 737 nStartV = index; | 732 const int32_t nStartV = index; |
| 738 for (index = nPageHeight; index > 0; index--) { | 733 for (index = nPageHeight; index > 0; index--) { |
| 739 if (1 == nVerticalMask[index - 1]) | 734 if (nVerticalMask[index - 1]) |
| 740 break; | 735 break; |
| 741 } | 736 } |
| 742 nEndV = index; | 737 const int32_t nEndV = index; |
| 743 for (index = nStartV; index < nEndV; index++) | 738 const int32_t nDoubleLineHeight = 2 * fLineHeight; |
| 744 nSumV += nVerticalMask[index]; | 739 if ((nEndV - nStartV) < nDoubleLineHeight) |
| 745 nSumV /= nEndV - nStartV; | |
| 746 | |
| 747 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) | |
| 748 return TextOrientation::Horizontal; | 740 return TextOrientation::Horizontal; |
| 749 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) | 741 if ((nEndH - nStartH) < nDoubleLineHeight) |
| 750 return TextOrientation::Vertical; | 742 return TextOrientation::Vertical; |
| 751 | 743 |
| 744 const FX_FLOAT nSumH = MaskPercentFilled(nHorizontalMask, nStartH, nEndH); | |
|
dsinclair
2016/06/15 13:37:50
Doesn't this always come out to 1? Didn't we set e
Lei Zhang
2016/06/15 23:46:33
No, we didn't. There's gaps in between because the
dsinclair
2016/06/16 15:06:10
Right, missed the outer for() above, though we alw
Lei Zhang
2016/06/16 22:47:03
Not sure what you mean exactly. Are you still tryi
dsinclair
2016/06/16 23:39:05
Sorry, was trying to say my comment was wrong, and
| |
| 752 if (nSumH > 0.8f) | 745 if (nSumH > 0.8f) |
| 753 return TextOrientation::Horizontal; | 746 return TextOrientation::Horizontal; |
| 754 | 747 |
| 748 const FX_FLOAT nSumV = MaskPercentFilled(nVerticalMask, nStartV, nEndV); | |
| 755 if (nSumH > nSumV) | 749 if (nSumH > nSumV) |
| 756 return TextOrientation::Horizontal; | 750 return TextOrientation::Horizontal; |
| 757 if (nSumH < nSumV) | 751 if (nSumH < nSumV) |
| 758 return TextOrientation::Vertical; | 752 return TextOrientation::Vertical; |
| 759 return TextOrientation::Unknown; | 753 return TextOrientation::Unknown; |
| 760 } | 754 } |
| 761 | 755 |
| 762 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, | 756 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, |
| 763 const CFX_Matrix& formMatrix) { | 757 const CFX_Matrix& formMatrix) { |
| 764 PAGECHAR_INFO generateChar; | 758 PAGECHAR_INFO generateChar; |
| (...skipping 1568 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2333 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; | 2327 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
| 2334 } | 2328 } |
| 2335 | 2329 |
| 2336 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { | 2330 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { |
| 2337 if (index >= m_LinkArray.size()) | 2331 if (index >= m_LinkArray.size()) |
| 2338 return std::vector<CFX_FloatRect>(); | 2332 return std::vector<CFX_FloatRect>(); |
| 2339 | 2333 |
| 2340 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, | 2334 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
| 2341 m_LinkArray[index].m_Count); | 2335 m_LinkArray[index].m_Count); |
| 2342 } | 2336 } |
| OLD | NEW |