OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <cctype> | 10 #include <cctype> |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 case 0x96: | 128 case 0x96: |
129 case 0x97: | 129 case 0x97: |
130 case 0x98: | 130 case 0x98: |
131 case 0xfffe: | 131 case 0xfffe: |
132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
133 default: | 133 default: |
134 return false; | 134 return false; |
135 } | 135 } |
136 } | 136 } |
137 | 137 |
138 FX_BOOL CPDF_TextPage::ParseTextPage() { | 138 void CPDF_TextPage::ParseTextPage() { |
139 m_bIsParsed = false; | 139 m_bIsParsed = false; |
140 if (!m_pPage) | |
141 return FALSE; | |
142 | |
143 m_TextBuf.Clear(); | 140 m_TextBuf.Clear(); |
144 m_CharList.clear(); | 141 m_CharList.clear(); |
145 m_pPreTextObj = NULL; | 142 m_pPreTextObj = NULL; |
146 ProcessObject(); | 143 ProcessObject(); |
| 144 |
147 m_bIsParsed = true; | 145 m_bIsParsed = true; |
148 m_CharIndex.clear(); | 146 m_CharIndex.clear(); |
149 int nCount = pdfium::CollectionSize<int>(m_CharList); | 147 int nCount = pdfium::CollectionSize<int>(m_CharList); |
150 if (nCount) { | 148 if (nCount) { |
151 m_CharIndex.push_back(0); | 149 m_CharIndex.push_back(0); |
152 } | 150 } |
153 for (int i = 0; i < nCount; i++) { | 151 for (int i = 0; i < nCount; i++) { |
154 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 152 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
155 FX_BOOL bNormal = FALSE; | 153 FX_BOOL bNormal = FALSE; |
156 const PAGECHAR_INFO& charinfo = m_CharList[i]; | 154 const PAGECHAR_INFO& charinfo = m_CharList[i]; |
(...skipping 21 matching lines...) Expand all Loading... |
178 m_CharIndex[indexSize - 1] = i + 1; | 176 m_CharIndex[indexSize - 1] = i + 1; |
179 } else { | 177 } else { |
180 m_CharIndex.push_back(i + 1); | 178 m_CharIndex.push_back(i + 1); |
181 } | 179 } |
182 } | 180 } |
183 } | 181 } |
184 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 182 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
185 if (indexSize % 2) { | 183 if (indexSize % 2) { |
186 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); | 184 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
187 } | 185 } |
188 return TRUE; | |
189 } | 186 } |
190 | 187 |
191 int CPDF_TextPage::CountChars() const { | 188 int CPDF_TextPage::CountChars() const { |
192 return pdfium::CollectionSize<int>(m_CharList); | 189 return pdfium::CollectionSize<int>(m_CharList); |
193 } | 190 } |
194 | 191 |
195 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
196 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
197 int count = 0; | 194 int count = 0; |
198 for (int i = 0; i < indexSize; i += 2) { | 195 for (int i = 0; i < indexSize; i += 2) { |
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
749 } else if (direction == FPDFTEXT_RIGHT) { | 746 } else if (direction == FPDFTEXT_RIGHT) { |
750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { | 747 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { |
751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | 748 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
752 break; | 749 break; |
753 } | 750 } |
754 } | 751 } |
755 return breakPos; | 752 return breakPos; |
756 } | 753 } |
757 | 754 |
758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 755 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
759 if (!m_pPage) { | 756 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
760 return -1; | 757 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
761 } | |
762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | |
763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | |
764 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 758 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
765 std::vector<uint8_t> nVerticalMask(nPageHeight); | 759 std::vector<uint8_t> nVerticalMask(nPageHeight); |
766 uint8_t* pDataH = nHorizontalMask.data(); | 760 uint8_t* pDataH = nHorizontalMask.data(); |
767 uint8_t* pDataV = nVerticalMask.data(); | 761 uint8_t* pDataV = nVerticalMask.data(); |
768 int32_t index = 0; | 762 int32_t index = 0; |
769 FX_FLOAT fLineHeight = 0.0f; | 763 FX_FLOAT fLineHeight = 0.0f; |
770 CPDF_PageObject* pPageObj = NULL; | 764 CPDF_PageObject* pPageObj = NULL; |
771 FX_POSITION pos = NULL; | 765 FX_POSITION pos = NULL; |
772 pos = m_pPage->GetFirstObjectPosition(); | 766 pos = m_pPage->GetPageObjectList()->GetHeadPosition(); |
773 if (!pos) { | 767 if (!pos) { |
774 return -1; | 768 return -1; |
775 } | 769 } |
776 while (pos) { | 770 while (pos) { |
777 pPageObj = m_pPage->GetNextObject(pos); | 771 pPageObj = m_pPage->GetPageObjectList()->GetNextObject(pos); |
778 if (NULL == pPageObj) { | 772 if (!pPageObj) { |
779 continue; | 773 continue; |
780 } | 774 } |
781 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { | 775 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { |
782 continue; | 776 continue; |
783 } | 777 } |
784 int32_t minH = | 778 int32_t minH = |
785 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 779 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
786 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 780 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
787 ? nPageWidth | 781 ? nPageWidth |
788 : (int32_t)pPageObj->m_Right; | 782 : (int32_t)pPageObj->m_Right; |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
847 if (nSumH - nSumV > 0.0f) { | 841 if (nSumH - nSumV > 0.0f) { |
848 return 0; | 842 return 0; |
849 } | 843 } |
850 if (nSumV - nSumH > 0.0f) { | 844 if (nSumV - nSumH > 0.0f) { |
851 return 1; | 845 return 1; |
852 } | 846 } |
853 return -1; | 847 return -1; |
854 } | 848 } |
855 | 849 |
856 void CPDF_TextPage::ProcessObject() { | 850 void CPDF_TextPage::ProcessObject() { |
857 CPDF_PageObject* pPageObj = NULL; | 851 FX_POSITION pos = m_pPage->GetPageObjectList()->GetHeadPosition(); |
858 if (!m_pPage) { | |
859 return; | |
860 } | |
861 FX_POSITION pos; | |
862 pos = m_pPage->GetFirstObjectPosition(); | |
863 if (!pos) { | 852 if (!pos) { |
864 return; | 853 return; |
865 } | 854 } |
866 m_TextlineDir = FindTextlineFlowDirection(); | 855 m_TextlineDir = FindTextlineFlowDirection(); |
867 int nCount = 0; | 856 int nCount = 0; |
868 while (pos) { | 857 while (pos) { |
869 pPageObj = m_pPage->GetNextObject(pos); | 858 CPDF_PageObject* pPageObj = |
| 859 m_pPage->GetPageObjectList()->GetNextObject(pos); |
870 if (pPageObj) { | 860 if (pPageObj) { |
871 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 861 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
872 CFX_Matrix matrix; | 862 CFX_Matrix matrix; |
873 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); | 863 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); |
874 nCount++; | 864 nCount++; |
875 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 865 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
876 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); | 866 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); |
877 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); | 867 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); |
878 } | 868 } |
879 } | 869 } |
880 pPageObj = NULL; | |
881 } | 870 } |
882 int count = m_LineObj.GetSize(); | 871 int count = m_LineObj.GetSize(); |
883 for (int i = 0; i < count; i++) { | 872 for (int i = 0; i < count; i++) { |
884 ProcessTextObject(m_LineObj.GetAt(i)); | 873 ProcessTextObject(m_LineObj.GetAt(i)); |
885 } | 874 } |
886 m_LineObj.RemoveAll(); | 875 m_LineObj.RemoveAll(); |
887 CloseTempLine(); | 876 CloseTempLine(); |
888 } | 877 } |
889 | 878 |
890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, | 879 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
891 const CFX_Matrix& formMatrix) { | 880 const CFX_Matrix& formMatrix) { |
892 CPDF_PageObject* pPageObj = NULL; | 881 CPDF_PageObject* pPageObj = NULL; |
893 FX_POSITION pos; | 882 FX_POSITION pos; |
894 if (!pFormObj) { | 883 if (!pFormObj) { |
895 return; | 884 return; |
896 } | 885 } |
897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); | 886 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition(); |
898 if (!pos) { | 887 if (!pos) { |
899 return; | 888 return; |
900 } | 889 } |
901 CFX_Matrix curFormMatrix; | 890 CFX_Matrix curFormMatrix; |
902 curFormMatrix.Copy(pFormObj->m_FormMatrix); | 891 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
903 curFormMatrix.Concat(formMatrix); | 892 curFormMatrix.Concat(formMatrix); |
904 while (pos) { | 893 while (pos) { |
905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); | 894 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos); |
906 if (pPageObj) { | 895 if (pPageObj) { |
907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 896 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); | 897 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); |
909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 898 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | 899 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); |
911 } | 900 } |
912 } | 901 } |
913 pPageObj = NULL; | 902 pPageObj = NULL; |
914 } | 903 } |
915 } | 904 } |
(...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1844 return TRUE; | 1833 return TRUE; |
1845 } | 1834 } |
1846 | 1835 |
1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | 1836 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
1848 FX_POSITION ObjPos) { | 1837 FX_POSITION ObjPos) { |
1849 if (!pTextObj) { | 1838 if (!pTextObj) { |
1850 return FALSE; | 1839 return FALSE; |
1851 } | 1840 } |
1852 int i = 0; | 1841 int i = 0; |
1853 if (!ObjPos) { | 1842 if (!ObjPos) { |
1854 ObjPos = m_pPage->GetLastObjectPosition(); | 1843 ObjPos = m_pPage->GetPageObjectList()->GetTailPosition(); |
1855 } | 1844 } |
1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); | 1845 CPDF_PageObject* pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); |
1857 while (i < 5 && ObjPos) { | 1846 while (i < 5 && ObjPos) { |
1858 pObj = m_pPage->GetPrevObject(ObjPos); | 1847 pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); |
1859 if (pObj == pTextObj) { | 1848 if (pObj == pTextObj) { |
1860 continue; | 1849 continue; |
1861 } | 1850 } |
1862 if (pObj->m_Type != CPDF_PageObject::TEXT) { | 1851 if (pObj->m_Type != CPDF_PageObject::TEXT) { |
1863 continue; | 1852 continue; |
1864 } | 1853 } |
1865 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 1854 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
1866 return TRUE; | 1855 return TRUE; |
1867 } | 1856 } |
1868 i++; | 1857 i++; |
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2524 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2536 return; | 2525 return; |
2537 } | 2526 } |
2538 CPDF_LinkExt* link = NULL; | 2527 CPDF_LinkExt* link = NULL; |
2539 link = m_LinkList.GetAt(index); | 2528 link = m_LinkList.GetAt(index); |
2540 if (!link) { | 2529 if (!link) { |
2541 return; | 2530 return; |
2542 } | 2531 } |
2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2532 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2544 } | 2533 } |
OLD | NEW |