OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <cctype> | 10 #include <cctype> |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
101 #define TEXT_BLANK_CHAR L' ' | 101 #define TEXT_BLANK_CHAR L' ' |
102 #define TEXT_LINEFEED_CHAR L'\n' | 102 #define TEXT_LINEFEED_CHAR L'\n' |
103 #define TEXT_RETURN_CHAR L'\r' | 103 #define TEXT_RETURN_CHAR L'\r' |
104 #define TEXT_EMPTY L"" | 104 #define TEXT_EMPTY L"" |
105 #define TEXT_BLANK L" " | 105 #define TEXT_BLANK L" " |
106 #define TEXT_RETURN_LINEFEED L"\r\n" | 106 #define TEXT_RETURN_LINEFEED L"\r\n" |
107 #define TEXT_LINEFEED L"\n" | 107 #define TEXT_LINEFEED L"\n" |
108 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 108 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
109 | 109 |
110 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 110 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
111 : m_pPage(pPage), | 111 : m_pPageObjectHolder(pPage), |
112 m_parserflag(flags), | 112 m_parserflag(flags), |
113 m_pPreTextObj(nullptr), | 113 m_pPreTextObj(nullptr), |
114 m_bIsParsed(false), | 114 m_bIsParsed(false), |
115 m_TextlineDir(-1), | 115 m_TextlineDir(-1), |
116 m_CurlineRect(0, 0, 0, 0) { | 116 m_CurlineRect(0, 0, 0, 0) { |
117 m_TextBuf.EstimateSize(0, 10240); | 117 m_TextBuf.EstimateSize(0, 10240); |
118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
119 (int)pPage->GetPageHeight(), 0); | 119 (int)pPage->GetPageHeight(), 0); |
120 } | 120 } |
121 | 121 |
122 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 122 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
123 switch (charInfo.m_Unicode) { | 123 switch (charInfo.m_Unicode) { |
124 case 0x2: | 124 case 0x2: |
125 case 0x3: | 125 case 0x3: |
126 case 0x93: | 126 case 0x93: |
127 case 0x94: | 127 case 0x94: |
128 case 0x96: | 128 case 0x96: |
129 case 0x97: | 129 case 0x97: |
130 case 0x98: | 130 case 0x98: |
131 case 0xfffe: | 131 case 0xfffe: |
132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
133 default: | 133 default: |
134 return false; | 134 return false; |
135 } | 135 } |
136 } | 136 } |
137 | 137 |
138 FX_BOOL CPDF_TextPage::ParseTextPage() { | 138 FX_BOOL CPDF_TextPage::ParseTextPage() { |
139 m_bIsParsed = false; | 139 m_bIsParsed = false; |
140 if (!m_pPage) | 140 if (!m_pPageObjectHolder) |
141 return FALSE; | 141 return FALSE; |
142 | 142 |
143 m_TextBuf.Clear(); | 143 m_TextBuf.Clear(); |
144 m_CharList.clear(); | 144 m_CharList.clear(); |
145 m_pPreTextObj = NULL; | 145 m_pPreTextObj = NULL; |
146 ProcessObject(); | 146 ProcessObject(); |
147 m_bIsParsed = true; | 147 m_bIsParsed = true; |
148 m_CharIndex.clear(); | 148 m_CharIndex.clear(); |
149 int nCount = pdfium::CollectionSize<int>(m_CharList); | 149 int nCount = pdfium::CollectionSize<int>(m_CharList); |
150 if (nCount) { | 150 if (nCount) { |
(...skipping 598 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
749 } else if (direction == FPDFTEXT_RIGHT) { | 749 } else if (direction == FPDFTEXT_RIGHT) { |
750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { | 750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { |
751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | 751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
752 break; | 752 break; |
753 } | 753 } |
754 } | 754 } |
755 return breakPos; | 755 return breakPos; |
756 } | 756 } |
757 | 757 |
758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
759 if (!m_pPage) { | 759 if (!m_pPageObjectHolder) { |
760 return -1; | 760 return -1; |
761 } | 761 } |
762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 762 const int32_t nPageWidth = static_cast<int32_t>( |
763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 763 static_cast<const CPDF_Page*>(m_pPageObjectHolder)->GetPageWidth()); |
| 764 const int32_t nPageHeight = static_cast<int32_t>( |
| 765 static_cast<const CPDF_Page*>(m_pPageObjectHolder)->GetPageHeight()); |
764 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 766 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
765 std::vector<uint8_t> nVerticalMask(nPageHeight); | 767 std::vector<uint8_t> nVerticalMask(nPageHeight); |
766 uint8_t* pDataH = nHorizontalMask.data(); | 768 uint8_t* pDataH = nHorizontalMask.data(); |
767 uint8_t* pDataV = nVerticalMask.data(); | 769 uint8_t* pDataV = nVerticalMask.data(); |
768 int32_t index = 0; | 770 int32_t index = 0; |
769 FX_FLOAT fLineHeight = 0.0f; | 771 FX_FLOAT fLineHeight = 0.0f; |
770 CPDF_PageObject* pPageObj = NULL; | 772 CPDF_PageObject* pPageObj = NULL; |
771 FX_POSITION pos = NULL; | 773 FX_POSITION pos = NULL; |
772 pos = m_pPage->GetFirstObjectPosition(); | 774 pos = m_pPageObjectHolder->GetPageObjectList()->GetHeadPosition(); |
773 if (!pos) { | 775 if (!pos) { |
774 return -1; | 776 return -1; |
775 } | 777 } |
776 while (pos) { | 778 while (pos) { |
777 pPageObj = m_pPage->GetNextObject(pos); | 779 pPageObj = m_pPageObjectHolder->GetPageObjectList()->GetNextObject(pos); |
778 if (NULL == pPageObj) { | 780 if (NULL == pPageObj) { |
779 continue; | 781 continue; |
780 } | 782 } |
781 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { | 783 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { |
782 continue; | 784 continue; |
783 } | 785 } |
784 int32_t minH = | 786 int32_t minH = |
785 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 787 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
786 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 788 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
787 ? nPageWidth | 789 ? nPageWidth |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
848 return 0; | 850 return 0; |
849 } | 851 } |
850 if (nSumV - nSumH > 0.0f) { | 852 if (nSumV - nSumH > 0.0f) { |
851 return 1; | 853 return 1; |
852 } | 854 } |
853 return -1; | 855 return -1; |
854 } | 856 } |
855 | 857 |
856 void CPDF_TextPage::ProcessObject() { | 858 void CPDF_TextPage::ProcessObject() { |
857 CPDF_PageObject* pPageObj = NULL; | 859 CPDF_PageObject* pPageObj = NULL; |
858 if (!m_pPage) { | 860 if (!m_pPageObjectHolder) { |
859 return; | 861 return; |
860 } | 862 } |
861 FX_POSITION pos; | 863 FX_POSITION pos; |
862 pos = m_pPage->GetFirstObjectPosition(); | 864 pos = m_pPageObjectHolder->GetPageObjectList()->GetHeadPosition(); |
863 if (!pos) { | 865 if (!pos) { |
864 return; | 866 return; |
865 } | 867 } |
866 m_TextlineDir = FindTextlineFlowDirection(); | 868 m_TextlineDir = FindTextlineFlowDirection(); |
867 int nCount = 0; | 869 int nCount = 0; |
868 while (pos) { | 870 while (pos) { |
869 pPageObj = m_pPage->GetNextObject(pos); | 871 pPageObj = m_pPageObjectHolder->GetPageObjectList()->GetNextObject(pos); |
870 if (pPageObj) { | 872 if (pPageObj) { |
871 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 873 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
872 CFX_Matrix matrix; | 874 CFX_Matrix matrix; |
873 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); | 875 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); |
874 nCount++; | 876 nCount++; |
875 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 877 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
876 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); | 878 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); |
877 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); | 879 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); |
878 } | 880 } |
879 } | 881 } |
880 pPageObj = NULL; | 882 pPageObj = NULL; |
881 } | 883 } |
882 int count = m_LineObj.GetSize(); | 884 int count = m_LineObj.GetSize(); |
883 for (int i = 0; i < count; i++) { | 885 for (int i = 0; i < count; i++) { |
884 ProcessTextObject(m_LineObj.GetAt(i)); | 886 ProcessTextObject(m_LineObj.GetAt(i)); |
885 } | 887 } |
886 m_LineObj.RemoveAll(); | 888 m_LineObj.RemoveAll(); |
887 CloseTempLine(); | 889 CloseTempLine(); |
888 } | 890 } |
889 | 891 |
890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, | 892 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
891 const CFX_Matrix& formMatrix) { | 893 const CFX_Matrix& formMatrix) { |
892 CPDF_PageObject* pPageObj = NULL; | 894 CPDF_PageObject* pPageObj = NULL; |
893 FX_POSITION pos; | 895 FX_POSITION pos; |
894 if (!pFormObj) { | 896 if (!pFormObj) { |
895 return; | 897 return; |
896 } | 898 } |
897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); | 899 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition(); |
898 if (!pos) { | 900 if (!pos) { |
899 return; | 901 return; |
900 } | 902 } |
901 CFX_Matrix curFormMatrix; | 903 CFX_Matrix curFormMatrix; |
902 curFormMatrix.Copy(pFormObj->m_FormMatrix); | 904 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
903 curFormMatrix.Concat(formMatrix); | 905 curFormMatrix.Concat(formMatrix); |
904 while (pos) { | 906 while (pos) { |
905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); | 907 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos); |
906 if (pPageObj) { | 908 if (pPageObj) { |
907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 909 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); | 910 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); |
909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 911 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | 912 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); |
911 } | 913 } |
912 } | 914 } |
913 pPageObj = NULL; | 915 pPageObj = NULL; |
914 } | 916 } |
915 } | 917 } |
(...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1844 return TRUE; | 1846 return TRUE; |
1845 } | 1847 } |
1846 | 1848 |
1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | 1849 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
1848 FX_POSITION ObjPos) { | 1850 FX_POSITION ObjPos) { |
1849 if (!pTextObj) { | 1851 if (!pTextObj) { |
1850 return FALSE; | 1852 return FALSE; |
1851 } | 1853 } |
1852 int i = 0; | 1854 int i = 0; |
1853 if (!ObjPos) { | 1855 if (!ObjPos) { |
1854 ObjPos = m_pPage->GetLastObjectPosition(); | 1856 ObjPos = m_pPageObjectHolder->GetPageObjectList()->GetTailPosition(); |
1855 } | 1857 } |
1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); | 1858 CPDF_PageObject* pObj = |
| 1859 m_pPageObjectHolder->GetPageObjectList()->GetPrevObject(ObjPos); |
1857 while (i < 5 && ObjPos) { | 1860 while (i < 5 && ObjPos) { |
1858 pObj = m_pPage->GetPrevObject(ObjPos); | 1861 pObj = m_pPageObjectHolder->GetPageObjectList()->GetPrevObject(ObjPos); |
1859 if (pObj == pTextObj) { | 1862 if (pObj == pTextObj) { |
1860 continue; | 1863 continue; |
1861 } | 1864 } |
1862 if (pObj->m_Type != CPDF_PageObject::TEXT) { | 1865 if (pObj->m_Type != CPDF_PageObject::TEXT) { |
1863 continue; | 1866 continue; |
1864 } | 1867 } |
1865 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 1868 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
1866 return TRUE; | 1869 return TRUE; |
1867 } | 1870 } |
1868 i++; | 1871 i++; |
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2538 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2536 return; | 2539 return; |
2537 } | 2540 } |
2538 CPDF_LinkExt* link = NULL; | 2541 CPDF_LinkExt* link = NULL; |
2539 link = m_LinkList.GetAt(index); | 2542 link = m_LinkList.GetAt(index); |
2540 if (!link) { | 2543 if (!link) { |
2541 return; | 2544 return; |
2542 } | 2545 } |
2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2546 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2544 } | 2547 } |
OLD | NEW |