Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(146)

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 2066043002: Simplify CPDF_TextPage::FindTextlineFlowOrientation(). (Closed) Base URL: https://pdfium.googlesource.com/pdfium@master
Patch Set: Better variable name Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | fpdfsdk/fpdf_flatten.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <cwctype> 9 #include <cwctype>
10 #include <memory> 10 #include <memory>
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 } 136 }
137 if (pDst) { 137 if (pDst) {
138 FX_WCHAR n = wFind; 138 FX_WCHAR n = wFind;
139 while (n--) { 139 while (n--) {
140 *pDst++ = *pMap++; 140 *pDst++ = *pMap++;
141 } 141 }
142 } 142 }
143 return (FX_STRSIZE)wFind; 143 return (FX_STRSIZE)wFind;
144 } 144 }
145 145
146 float MaskPercentFilled(const std::vector<bool>& mask,
147 int32_t start,
148 int32_t end) {
149 if (start >= end)
150 return 0;
151 float count = std::count_if(mask.begin() + start, mask.begin() + end,
152 [](bool r) { return r; });
153 return count / (end - start);
154 }
155
146 } // namespace 156 } // namespace
147 157
148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) 158 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags)
149 : m_pPage(pPage), 159 : m_pPage(pPage),
150 m_parserflag(flags), 160 m_parserflag(flags),
151 m_pPreTextObj(nullptr), 161 m_pPreTextObj(nullptr),
152 m_bIsParsed(false), 162 m_bIsParsed(false),
153 m_TextlineDir(TextOrientation::Unknown), 163 m_TextlineDir(TextOrientation::Unknown),
154 m_CurlineRect(0, 0, 0, 0) { 164 m_CurlineRect(0, 0, 0, 0) {
155 m_TextBuf.EstimateSize(0, 10240); 165 m_TextBuf.EstimateSize(0, 10240);
(...skipping 514 matching lines...) Expand 10 before | Expand all | Expand 10 after
670 segmentStatus = 2; 680 segmentStatus = 2;
671 m_Segments.Add(segment); 681 m_Segments.Add(segment);
672 segment.m_Start = 0; 682 segment.m_Start = 0;
673 segment.m_nCount = 0; 683 segment.m_nCount = 0;
674 } 684 }
675 return m_Segments.GetSize(); 685 return m_Segments.GetSize();
676 } 686 }
677 687
678 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() 688 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation()
679 const { 689 const {
680 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
681 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
682 std::vector<uint8_t> nHorizontalMask(nPageWidth);
683 std::vector<uint8_t> nVerticalMask(nPageHeight);
684 uint8_t* pDataH = nHorizontalMask.data();
685 uint8_t* pDataV = nVerticalMask.data();
686 int32_t index = 0;
687 FX_FLOAT fLineHeight = 0.0f;
688 if (m_pPage->GetPageObjectList()->empty()) 690 if (m_pPage->GetPageObjectList()->empty())
689 return TextOrientation::Unknown; 691 return TextOrientation::Unknown;
690 692
691 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { 693 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
692 if (!pPageObj || !pPageObj->IsText()) 694 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
695 std::vector<bool> nHorizontalMask(nPageWidth);
696 std::vector<bool> nVerticalMask(nPageHeight);
697 FX_FLOAT fLineHeight = 0.0f;
698 for (const auto& pPageObj : *m_pPage->GetPageObjectList()) {
699 if (!pPageObj->IsText())
693 continue; 700 continue;
694 701
695 int32_t minH = 702 int32_t minH = std::max(static_cast<int32_t>(pPageObj->m_Left), 0);
696 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; 703 int32_t maxH =
697 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth 704 std::min(static_cast<int32_t>(pPageObj->m_Right), nPageWidth);
698 ? nPageWidth 705 int32_t minV = std::max(static_cast<int32_t>(pPageObj->m_Bottom), 0);
699 : (int32_t)pPageObj->m_Right; 706 int32_t maxV = std::min(static_cast<int32_t>(pPageObj->m_Top), nPageHeight);
700 int32_t minV =
701 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom;
702 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight
703 ? nPageHeight
704 : (int32_t)pPageObj->m_Top;
705 if (minH >= maxH || minV >= maxV) 707 if (minH >= maxH || minV >= maxV)
706 continue; 708 continue;
707 709
708 FXSYS_memset(pDataH + minH, 1, maxH - minH); 710 for (int32_t i = minH; i < maxH; ++i)
709 FXSYS_memset(pDataV + minV, 1, maxV - minV); 711 nHorizontalMask[i] = true;
712 for (int32_t i = minV; i < maxV; ++i)
713 nVerticalMask[i] = true;
710 if (fLineHeight <= 0.0f) 714 if (fLineHeight <= 0.0f)
711 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; 715 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom;
dsinclair 2016/06/15 13:37:50 What happens if we have different line heights on
Lei Zhang 2016/06/15 23:46:33 Everything here is heuristics based, so this could
dsinclair 2016/06/16 15:06:10 Acknowledged.
712 } 716 }
713 int32_t nStartH = 0; 717 int32_t index = 0;
714 int32_t nEndH = 0;
715 FX_FLOAT nSumH = 0.0f;
716 for (index = 0; index < nPageWidth; index++) { 718 for (index = 0; index < nPageWidth; index++) {
717 if (1 == nHorizontalMask[index]) 719 if (nHorizontalMask[index])
718 break; 720 break;
dsinclair 2016/06/15 13:37:50 Am I misreading this or, isn't index after this fo
Lei Zhang 2016/06/15 23:46:33 Which |minH|? It's the smallest |minH| value that
dsinclair 2016/06/16 15:06:10 Crap, there is another level of for() loop on 698
Lei Zhang 2016/06/16 22:47:03 Done.
719 } 721 }
720 nStartH = index; 722 const int32_t nStartH = index;
721 for (index = nPageWidth; index > 0; index--) { 723 for (index = nPageWidth; index > 0; index--) {
722 if (1 == nHorizontalMask[index - 1]) 724 if (nHorizontalMask[index - 1])
723 break; 725 break;
724 } 726 }
725 nEndH = index; 727 const int32_t nEndH = index;
dsinclair 2016/06/15 13:37:50 Shoudn't this be index - 1? Or, should the mask ve
Lei Zhang 2016/06/15 23:46:33 Hmm, this probably helps prevent a divide by 0? It
726 for (index = nStartH; index < nEndH; index++) {
727 nSumH += nHorizontalMask[index];
728 }
729 nSumH /= nEndH - nStartH;
730 int32_t nStartV = 0;
731 int32_t nEndV = 0;
732 FX_FLOAT nSumV = 0.0f;
733 for (index = 0; index < nPageHeight; index++) { 728 for (index = 0; index < nPageHeight; index++) {
734 if (1 == nVerticalMask[index]) 729 if (nVerticalMask[index])
735 break; 730 break;
736 } 731 }
737 nStartV = index; 732 const int32_t nStartV = index;
738 for (index = nPageHeight; index > 0; index--) { 733 for (index = nPageHeight; index > 0; index--) {
739 if (1 == nVerticalMask[index - 1]) 734 if (nVerticalMask[index - 1])
740 break; 735 break;
741 } 736 }
742 nEndV = index; 737 const int32_t nEndV = index;
743 for (index = nStartV; index < nEndV; index++) 738 const int32_t nDoubleLineHeight = 2 * fLineHeight;
744 nSumV += nVerticalMask[index]; 739 if ((nEndV - nStartV) < nDoubleLineHeight)
745 nSumV /= nEndV - nStartV;
746
747 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight))
748 return TextOrientation::Horizontal; 740 return TextOrientation::Horizontal;
749 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) 741 if ((nEndH - nStartH) < nDoubleLineHeight)
750 return TextOrientation::Vertical; 742 return TextOrientation::Vertical;
751 743
744 const FX_FLOAT nSumH = MaskPercentFilled(nHorizontalMask, nStartH, nEndH);
dsinclair 2016/06/15 13:37:50 Doesn't this always come out to 1? Didn't we set e
Lei Zhang 2016/06/15 23:46:33 No, we didn't. There's gaps in between because the
dsinclair 2016/06/16 15:06:10 Right, missed the outer for() above, though we alw
Lei Zhang 2016/06/16 22:47:03 Not sure what you mean exactly. Are you still tryi
dsinclair 2016/06/16 23:39:05 Sorry, was trying to say my comment was wrong, and
752 if (nSumH > 0.8f) 745 if (nSumH > 0.8f)
753 return TextOrientation::Horizontal; 746 return TextOrientation::Horizontal;
754 747
748 const FX_FLOAT nSumV = MaskPercentFilled(nVerticalMask, nStartV, nEndV);
755 if (nSumH > nSumV) 749 if (nSumH > nSumV)
756 return TextOrientation::Horizontal; 750 return TextOrientation::Horizontal;
757 if (nSumH < nSumV) 751 if (nSumH < nSumV)
758 return TextOrientation::Vertical; 752 return TextOrientation::Vertical;
759 return TextOrientation::Unknown; 753 return TextOrientation::Unknown;
760 } 754 }
761 755
762 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, 756 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode,
763 const CFX_Matrix& formMatrix) { 757 const CFX_Matrix& formMatrix) {
764 PAGECHAR_INFO generateChar; 758 PAGECHAR_INFO generateChar;
(...skipping 1568 matching lines...) Expand 10 before | Expand all | Expand 10 after
2333 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; 2327 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
2334 } 2328 }
2335 2329
2336 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { 2330 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const {
2337 if (index >= m_LinkArray.size()) 2331 if (index >= m_LinkArray.size())
2338 return std::vector<CFX_FloatRect>(); 2332 return std::vector<CFX_FloatRect>();
2339 2333
2340 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, 2334 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
2341 m_LinkArray[index].m_Count); 2335 m_LinkArray[index].m_Count);
2342 } 2336 }
OLDNEW
« no previous file with comments | « no previous file | fpdfsdk/fpdf_flatten.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698