Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(426)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1701073002: Split CPDF_PageObjectHolder off from CPDF_PageObjectList (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: m_pPage can never be null, remove checks. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/src/fpdfapi/fpdf_render/render_int.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdftext/text_int.h" 7 #include "core/src/fpdftext/text_int.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <cctype> 10 #include <cctype>
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 case 0x96: 128 case 0x96:
129 case 0x97: 129 case 0x97:
130 case 0x98: 130 case 0x98:
131 case 0xfffe: 131 case 0xfffe:
132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
133 default: 133 default:
134 return false; 134 return false;
135 } 135 }
136 } 136 }
137 137
138 FX_BOOL CPDF_TextPage::ParseTextPage() { 138 void CPDF_TextPage::ParseTextPage() {
139 m_bIsParsed = false; 139 m_bIsParsed = false;
140 if (!m_pPage)
141 return FALSE;
142
143 m_TextBuf.Clear(); 140 m_TextBuf.Clear();
144 m_CharList.clear(); 141 m_CharList.clear();
145 m_pPreTextObj = NULL; 142 m_pPreTextObj = NULL;
146 ProcessObject(); 143 ProcessObject();
144
147 m_bIsParsed = true; 145 m_bIsParsed = true;
148 m_CharIndex.clear(); 146 m_CharIndex.clear();
149 int nCount = pdfium::CollectionSize<int>(m_CharList); 147 int nCount = pdfium::CollectionSize<int>(m_CharList);
150 if (nCount) { 148 if (nCount) {
151 m_CharIndex.push_back(0); 149 m_CharIndex.push_back(0);
152 } 150 }
153 for (int i = 0; i < nCount; i++) { 151 for (int i = 0; i < nCount; i++) {
154 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 152 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
155 FX_BOOL bNormal = FALSE; 153 FX_BOOL bNormal = FALSE;
156 const PAGECHAR_INFO& charinfo = m_CharList[i]; 154 const PAGECHAR_INFO& charinfo = m_CharList[i];
(...skipping 21 matching lines...) Expand all
178 m_CharIndex[indexSize - 1] = i + 1; 176 m_CharIndex[indexSize - 1] = i + 1;
179 } else { 177 } else {
180 m_CharIndex.push_back(i + 1); 178 m_CharIndex.push_back(i + 1);
181 } 179 }
182 } 180 }
183 } 181 }
184 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 182 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
185 if (indexSize % 2) { 183 if (indexSize % 2) {
186 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); 184 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
187 } 185 }
188 return TRUE;
189 } 186 }
190 187
191 int CPDF_TextPage::CountChars() const { 188 int CPDF_TextPage::CountChars() const {
192 return pdfium::CollectionSize<int>(m_CharList); 189 return pdfium::CollectionSize<int>(m_CharList);
193 } 190 }
194 191
195 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { 192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
196 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
197 int count = 0; 194 int count = 0;
198 for (int i = 0; i < indexSize; i += 2) { 195 for (int i = 0; i < indexSize; i += 2) {
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after
749 } else if (direction == FPDFTEXT_RIGHT) { 746 } else if (direction == FPDFTEXT_RIGHT) {
750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { 747 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) {
751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) 748 if (!IsLetter(m_CharList[breakPos].m_Unicode))
752 break; 749 break;
753 } 750 }
754 } 751 }
755 return breakPos; 752 return breakPos;
756 } 753 }
757 754
758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { 755 int32_t CPDF_TextPage::FindTextlineFlowDirection() {
759 if (!m_pPage) { 756 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
760 return -1; 757 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
761 }
762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth();
763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight();
764 std::vector<uint8_t> nHorizontalMask(nPageWidth); 758 std::vector<uint8_t> nHorizontalMask(nPageWidth);
765 std::vector<uint8_t> nVerticalMask(nPageHeight); 759 std::vector<uint8_t> nVerticalMask(nPageHeight);
766 uint8_t* pDataH = nHorizontalMask.data(); 760 uint8_t* pDataH = nHorizontalMask.data();
767 uint8_t* pDataV = nVerticalMask.data(); 761 uint8_t* pDataV = nVerticalMask.data();
768 int32_t index = 0; 762 int32_t index = 0;
769 FX_FLOAT fLineHeight = 0.0f; 763 FX_FLOAT fLineHeight = 0.0f;
770 CPDF_PageObject* pPageObj = NULL; 764 CPDF_PageObject* pPageObj = NULL;
771 FX_POSITION pos = NULL; 765 FX_POSITION pos = NULL;
772 pos = m_pPage->GetFirstObjectPosition(); 766 pos = m_pPage->GetPageObjectList()->GetHeadPosition();
773 if (!pos) { 767 if (!pos) {
774 return -1; 768 return -1;
775 } 769 }
776 while (pos) { 770 while (pos) {
777 pPageObj = m_pPage->GetNextObject(pos); 771 pPageObj = m_pPage->GetPageObjectList()->GetNextObject(pos);
778 if (NULL == pPageObj) { 772 if (!pPageObj) {
779 continue; 773 continue;
780 } 774 }
781 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { 775 if (CPDF_PageObject::TEXT != pPageObj->m_Type) {
782 continue; 776 continue;
783 } 777 }
784 int32_t minH = 778 int32_t minH =
785 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; 779 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left;
786 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth 780 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth
787 ? nPageWidth 781 ? nPageWidth
788 : (int32_t)pPageObj->m_Right; 782 : (int32_t)pPageObj->m_Right;
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
847 if (nSumH - nSumV > 0.0f) { 841 if (nSumH - nSumV > 0.0f) {
848 return 0; 842 return 0;
849 } 843 }
850 if (nSumV - nSumH > 0.0f) { 844 if (nSumV - nSumH > 0.0f) {
851 return 1; 845 return 1;
852 } 846 }
853 return -1; 847 return -1;
854 } 848 }
855 849
856 void CPDF_TextPage::ProcessObject() { 850 void CPDF_TextPage::ProcessObject() {
857 CPDF_PageObject* pPageObj = NULL; 851 FX_POSITION pos = m_pPage->GetPageObjectList()->GetHeadPosition();
858 if (!m_pPage) {
859 return;
860 }
861 FX_POSITION pos;
862 pos = m_pPage->GetFirstObjectPosition();
863 if (!pos) { 852 if (!pos) {
864 return; 853 return;
865 } 854 }
866 m_TextlineDir = FindTextlineFlowDirection(); 855 m_TextlineDir = FindTextlineFlowDirection();
867 int nCount = 0; 856 int nCount = 0;
868 while (pos) { 857 while (pos) {
869 pPageObj = m_pPage->GetNextObject(pos); 858 CPDF_PageObject* pPageObj =
859 m_pPage->GetPageObjectList()->GetNextObject(pos);
870 if (pPageObj) { 860 if (pPageObj) {
871 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { 861 if (pPageObj->m_Type == CPDF_PageObject::TEXT) {
872 CFX_Matrix matrix; 862 CFX_Matrix matrix;
873 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); 863 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos);
874 nCount++; 864 nCount++;
875 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { 865 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) {
876 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); 866 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0);
877 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); 867 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix);
878 } 868 }
879 } 869 }
880 pPageObj = NULL;
881 } 870 }
882 int count = m_LineObj.GetSize(); 871 int count = m_LineObj.GetSize();
883 for (int i = 0; i < count; i++) { 872 for (int i = 0; i < count; i++) {
884 ProcessTextObject(m_LineObj.GetAt(i)); 873 ProcessTextObject(m_LineObj.GetAt(i));
885 } 874 }
886 m_LineObj.RemoveAll(); 875 m_LineObj.RemoveAll();
887 CloseTempLine(); 876 CloseTempLine();
888 } 877 }
889 878
890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, 879 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
891 const CFX_Matrix& formMatrix) { 880 const CFX_Matrix& formMatrix) {
892 CPDF_PageObject* pPageObj = NULL; 881 CPDF_PageObject* pPageObj = NULL;
893 FX_POSITION pos; 882 FX_POSITION pos;
894 if (!pFormObj) { 883 if (!pFormObj) {
895 return; 884 return;
896 } 885 }
897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); 886 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition();
898 if (!pos) { 887 if (!pos) {
899 return; 888 return;
900 } 889 }
901 CFX_Matrix curFormMatrix; 890 CFX_Matrix curFormMatrix;
902 curFormMatrix.Copy(pFormObj->m_FormMatrix); 891 curFormMatrix.Copy(pFormObj->m_FormMatrix);
903 curFormMatrix.Concat(formMatrix); 892 curFormMatrix.Concat(formMatrix);
904 while (pos) { 893 while (pos) {
905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); 894 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos);
906 if (pPageObj) { 895 if (pPageObj) {
907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { 896 if (pPageObj->m_Type == CPDF_PageObject::TEXT) {
908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); 897 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos);
909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { 898 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) {
910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); 899 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix);
911 } 900 }
912 } 901 }
913 pPageObj = NULL; 902 pPageObj = NULL;
914 } 903 }
915 } 904 }
(...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after
1844 return TRUE; 1833 return TRUE;
1845 } 1834 }
1846 1835
1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, 1836 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
1848 FX_POSITION ObjPos) { 1837 FX_POSITION ObjPos) {
1849 if (!pTextObj) { 1838 if (!pTextObj) {
1850 return FALSE; 1839 return FALSE;
1851 } 1840 }
1852 int i = 0; 1841 int i = 0;
1853 if (!ObjPos) { 1842 if (!ObjPos) {
1854 ObjPos = m_pPage->GetLastObjectPosition(); 1843 ObjPos = m_pPage->GetPageObjectList()->GetTailPosition();
1855 } 1844 }
1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); 1845 CPDF_PageObject* pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos);
1857 while (i < 5 && ObjPos) { 1846 while (i < 5 && ObjPos) {
1858 pObj = m_pPage->GetPrevObject(ObjPos); 1847 pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos);
1859 if (pObj == pTextObj) { 1848 if (pObj == pTextObj) {
1860 continue; 1849 continue;
1861 } 1850 }
1862 if (pObj->m_Type != CPDF_PageObject::TEXT) { 1851 if (pObj->m_Type != CPDF_PageObject::TEXT) {
1863 continue; 1852 continue;
1864 } 1853 }
1865 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { 1854 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) {
1866 return TRUE; 1855 return TRUE;
1867 } 1856 }
1868 i++; 1857 i++;
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after
2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2524 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2536 return; 2525 return;
2537 } 2526 }
2538 CPDF_LinkExt* link = NULL; 2527 CPDF_LinkExt* link = NULL;
2539 link = m_LinkList.GetAt(index); 2528 link = m_LinkList.GetAt(index);
2540 if (!link) { 2529 if (!link) {
2541 return; 2530 return;
2542 } 2531 }
2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2532 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2544 } 2533 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_render/render_int.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698