Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(852)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1701073002: Split CPDF_PageObjectHolder off from CPDF_PageObjectList (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Few other minor renames. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdftext/text_int.h" 7 #include "core/src/fpdftext/text_int.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <cctype> 10 #include <cctype>
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 #define TEXT_BLANK_CHAR L' ' 101 #define TEXT_BLANK_CHAR L' '
102 #define TEXT_LINEFEED_CHAR L'\n' 102 #define TEXT_LINEFEED_CHAR L'\n'
103 #define TEXT_RETURN_CHAR L'\r' 103 #define TEXT_RETURN_CHAR L'\r'
104 #define TEXT_EMPTY L"" 104 #define TEXT_EMPTY L""
105 #define TEXT_BLANK L" " 105 #define TEXT_BLANK L" "
106 #define TEXT_RETURN_LINEFEED L"\r\n" 106 #define TEXT_RETURN_LINEFEED L"\r\n"
107 #define TEXT_LINEFEED L"\n" 107 #define TEXT_LINEFEED L"\n"
108 #define TEXT_CHARRATIO_GAPDELTA 0.070 108 #define TEXT_CHARRATIO_GAPDELTA 0.070
109 109
110 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 110 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
111 : m_pPage(pPage), 111 : m_pPageObjectHolder(pPage),
112 m_parserflag(flags), 112 m_parserflag(flags),
113 m_pPreTextObj(nullptr), 113 m_pPreTextObj(nullptr),
114 m_bIsParsed(false), 114 m_bIsParsed(false),
115 m_TextlineDir(-1), 115 m_TextlineDir(-1),
116 m_CurlineRect(0, 0, 0, 0) { 116 m_CurlineRect(0, 0, 0, 0) {
117 m_TextBuf.EstimateSize(0, 10240); 117 m_TextBuf.EstimateSize(0, 10240);
118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
119 (int)pPage->GetPageHeight(), 0); 119 (int)pPage->GetPageHeight(), 0);
120 } 120 }
121 121
122 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 122 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
123 switch (charInfo.m_Unicode) { 123 switch (charInfo.m_Unicode) {
124 case 0x2: 124 case 0x2:
125 case 0x3: 125 case 0x3:
126 case 0x93: 126 case 0x93:
127 case 0x94: 127 case 0x94:
128 case 0x96: 128 case 0x96:
129 case 0x97: 129 case 0x97:
130 case 0x98: 130 case 0x98:
131 case 0xfffe: 131 case 0xfffe:
132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
133 default: 133 default:
134 return false; 134 return false;
135 } 135 }
136 } 136 }
137 137
138 FX_BOOL CPDF_TextPage::ParseTextPage() { 138 FX_BOOL CPDF_TextPage::ParseTextPage() {
139 m_bIsParsed = false; 139 m_bIsParsed = false;
140 if (!m_pPage) 140 if (!m_pPageObjectHolder)
141 return FALSE; 141 return FALSE;
142 142
143 m_TextBuf.Clear(); 143 m_TextBuf.Clear();
144 m_CharList.clear(); 144 m_CharList.clear();
145 m_pPreTextObj = NULL; 145 m_pPreTextObj = NULL;
146 ProcessObject(); 146 ProcessObject();
147 m_bIsParsed = true; 147 m_bIsParsed = true;
148 m_CharIndex.clear(); 148 m_CharIndex.clear();
149 int nCount = pdfium::CollectionSize<int>(m_CharList); 149 int nCount = pdfium::CollectionSize<int>(m_CharList);
150 if (nCount) { 150 if (nCount) {
(...skipping 598 matching lines...) Expand 10 before | Expand all | Expand 10 after
749 } else if (direction == FPDFTEXT_RIGHT) { 749 } else if (direction == FPDFTEXT_RIGHT) {
750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { 750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) {
751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) 751 if (!IsLetter(m_CharList[breakPos].m_Unicode))
752 break; 752 break;
753 } 753 }
754 } 754 }
755 return breakPos; 755 return breakPos;
756 } 756 }
757 757
758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { 758 int32_t CPDF_TextPage::FindTextlineFlowDirection() {
759 if (!m_pPage) { 759 if (!m_pPageObjectHolder) {
760 return -1; 760 return -1;
761 } 761 }
762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); 762 const int32_t nPageWidth = static_cast<int32_t>(
763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); 763 static_cast<const CPDF_Page*>(m_pPageObjectHolder)->GetPageWidth());
764 const int32_t nPageHeight = static_cast<int32_t>(
765 static_cast<const CPDF_Page*>(m_pPageObjectHolder)->GetPageHeight());
764 std::vector<uint8_t> nHorizontalMask(nPageWidth); 766 std::vector<uint8_t> nHorizontalMask(nPageWidth);
765 std::vector<uint8_t> nVerticalMask(nPageHeight); 767 std::vector<uint8_t> nVerticalMask(nPageHeight);
766 uint8_t* pDataH = nHorizontalMask.data(); 768 uint8_t* pDataH = nHorizontalMask.data();
767 uint8_t* pDataV = nVerticalMask.data(); 769 uint8_t* pDataV = nVerticalMask.data();
768 int32_t index = 0; 770 int32_t index = 0;
769 FX_FLOAT fLineHeight = 0.0f; 771 FX_FLOAT fLineHeight = 0.0f;
770 CPDF_PageObject* pPageObj = NULL; 772 CPDF_PageObject* pPageObj = NULL;
771 FX_POSITION pos = NULL; 773 FX_POSITION pos = NULL;
772 pos = m_pPage->GetFirstObjectPosition(); 774 pos = m_pPageObjectHolder->GetPageObjectList()->GetHeadPosition();
773 if (!pos) { 775 if (!pos) {
774 return -1; 776 return -1;
775 } 777 }
776 while (pos) { 778 while (pos) {
777 pPageObj = m_pPage->GetNextObject(pos); 779 pPageObj = m_pPageObjectHolder->GetPageObjectList()->GetNextObject(pos);
778 if (NULL == pPageObj) { 780 if (NULL == pPageObj) {
779 continue; 781 continue;
780 } 782 }
781 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { 783 if (CPDF_PageObject::TEXT != pPageObj->m_Type) {
782 continue; 784 continue;
783 } 785 }
784 int32_t minH = 786 int32_t minH =
785 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; 787 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left;
786 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth 788 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth
787 ? nPageWidth 789 ? nPageWidth
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
848 return 0; 850 return 0;
849 } 851 }
850 if (nSumV - nSumH > 0.0f) { 852 if (nSumV - nSumH > 0.0f) {
851 return 1; 853 return 1;
852 } 854 }
853 return -1; 855 return -1;
854 } 856 }
855 857
856 void CPDF_TextPage::ProcessObject() { 858 void CPDF_TextPage::ProcessObject() {
857 CPDF_PageObject* pPageObj = NULL; 859 CPDF_PageObject* pPageObj = NULL;
858 if (!m_pPage) { 860 if (!m_pPageObjectHolder) {
859 return; 861 return;
860 } 862 }
861 FX_POSITION pos; 863 FX_POSITION pos;
862 pos = m_pPage->GetFirstObjectPosition(); 864 pos = m_pPageObjectHolder->GetPageObjectList()->GetHeadPosition();
863 if (!pos) { 865 if (!pos) {
864 return; 866 return;
865 } 867 }
866 m_TextlineDir = FindTextlineFlowDirection(); 868 m_TextlineDir = FindTextlineFlowDirection();
867 int nCount = 0; 869 int nCount = 0;
868 while (pos) { 870 while (pos) {
869 pPageObj = m_pPage->GetNextObject(pos); 871 pPageObj = m_pPageObjectHolder->GetPageObjectList()->GetNextObject(pos);
870 if (pPageObj) { 872 if (pPageObj) {
871 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { 873 if (pPageObj->m_Type == CPDF_PageObject::TEXT) {
872 CFX_Matrix matrix; 874 CFX_Matrix matrix;
873 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); 875 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos);
874 nCount++; 876 nCount++;
875 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { 877 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) {
876 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); 878 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0);
877 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); 879 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix);
878 } 880 }
879 } 881 }
880 pPageObj = NULL; 882 pPageObj = NULL;
881 } 883 }
882 int count = m_LineObj.GetSize(); 884 int count = m_LineObj.GetSize();
883 for (int i = 0; i < count; i++) { 885 for (int i = 0; i < count; i++) {
884 ProcessTextObject(m_LineObj.GetAt(i)); 886 ProcessTextObject(m_LineObj.GetAt(i));
885 } 887 }
886 m_LineObj.RemoveAll(); 888 m_LineObj.RemoveAll();
887 CloseTempLine(); 889 CloseTempLine();
888 } 890 }
889 891
890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, 892 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
891 const CFX_Matrix& formMatrix) { 893 const CFX_Matrix& formMatrix) {
892 CPDF_PageObject* pPageObj = NULL; 894 CPDF_PageObject* pPageObj = NULL;
893 FX_POSITION pos; 895 FX_POSITION pos;
894 if (!pFormObj) { 896 if (!pFormObj) {
895 return; 897 return;
896 } 898 }
897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); 899 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition();
898 if (!pos) { 900 if (!pos) {
899 return; 901 return;
900 } 902 }
901 CFX_Matrix curFormMatrix; 903 CFX_Matrix curFormMatrix;
902 curFormMatrix.Copy(pFormObj->m_FormMatrix); 904 curFormMatrix.Copy(pFormObj->m_FormMatrix);
903 curFormMatrix.Concat(formMatrix); 905 curFormMatrix.Concat(formMatrix);
904 while (pos) { 906 while (pos) {
905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); 907 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos);
906 if (pPageObj) { 908 if (pPageObj) {
907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { 909 if (pPageObj->m_Type == CPDF_PageObject::TEXT) {
908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); 910 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos);
909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { 911 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) {
910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); 912 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix);
911 } 913 }
912 } 914 }
913 pPageObj = NULL; 915 pPageObj = NULL;
914 } 916 }
915 } 917 }
(...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after
1844 return TRUE; 1846 return TRUE;
1845 } 1847 }
1846 1848
1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, 1849 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
1848 FX_POSITION ObjPos) { 1850 FX_POSITION ObjPos) {
1849 if (!pTextObj) { 1851 if (!pTextObj) {
1850 return FALSE; 1852 return FALSE;
1851 } 1853 }
1852 int i = 0; 1854 int i = 0;
1853 if (!ObjPos) { 1855 if (!ObjPos) {
1854 ObjPos = m_pPage->GetLastObjectPosition(); 1856 ObjPos = m_pPageObjectHolder->GetPageObjectList()->GetTailPosition();
1855 } 1857 }
1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); 1858 CPDF_PageObject* pObj =
1859 m_pPageObjectHolder->GetPageObjectList()->GetPrevObject(ObjPos);
1857 while (i < 5 && ObjPos) { 1860 while (i < 5 && ObjPos) {
1858 pObj = m_pPage->GetPrevObject(ObjPos); 1861 pObj = m_pPageObjectHolder->GetPageObjectList()->GetPrevObject(ObjPos);
1859 if (pObj == pTextObj) { 1862 if (pObj == pTextObj) {
1860 continue; 1863 continue;
1861 } 1864 }
1862 if (pObj->m_Type != CPDF_PageObject::TEXT) { 1865 if (pObj->m_Type != CPDF_PageObject::TEXT) {
1863 continue; 1866 continue;
1864 } 1867 }
1865 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { 1868 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) {
1866 return TRUE; 1869 return TRUE;
1867 } 1870 }
1868 i++; 1871 i++;
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after
2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2538 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2536 return; 2539 return;
2537 } 2540 }
2538 CPDF_LinkExt* link = NULL; 2541 CPDF_LinkExt* link = NULL;
2539 link = m_LinkList.GetAt(index); 2542 link = m_LinkList.GetAt(index);
2540 if (!link) { 2543 if (!link) {
2541 return; 2544 return;
2542 } 2545 }
2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2546 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2544 } 2547 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698