| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <cctype> | 10 #include <cctype> |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 128 case 0x96: | 128 case 0x96: |
| 129 case 0x97: | 129 case 0x97: |
| 130 case 0x98: | 130 case 0x98: |
| 131 case 0xfffe: | 131 case 0xfffe: |
| 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
| 133 default: | 133 default: |
| 134 return false; | 134 return false; |
| 135 } | 135 } |
| 136 } | 136 } |
| 137 | 137 |
| 138 FX_BOOL CPDF_TextPage::ParseTextPage() { | 138 void CPDF_TextPage::ParseTextPage() { |
| 139 m_bIsParsed = false; | 139 m_bIsParsed = false; |
| 140 if (!m_pPage) | |
| 141 return FALSE; | |
| 142 | |
| 143 m_TextBuf.Clear(); | 140 m_TextBuf.Clear(); |
| 144 m_CharList.clear(); | 141 m_CharList.clear(); |
| 145 m_pPreTextObj = NULL; | 142 m_pPreTextObj = NULL; |
| 146 ProcessObject(); | 143 ProcessObject(); |
| 144 |
| 147 m_bIsParsed = true; | 145 m_bIsParsed = true; |
| 148 m_CharIndex.clear(); | 146 m_CharIndex.clear(); |
| 149 int nCount = pdfium::CollectionSize<int>(m_CharList); | 147 int nCount = pdfium::CollectionSize<int>(m_CharList); |
| 150 if (nCount) { | 148 if (nCount) { |
| 151 m_CharIndex.push_back(0); | 149 m_CharIndex.push_back(0); |
| 152 } | 150 } |
| 153 for (int i = 0; i < nCount; i++) { | 151 for (int i = 0; i < nCount; i++) { |
| 154 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 152 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 155 FX_BOOL bNormal = FALSE; | 153 FX_BOOL bNormal = FALSE; |
| 156 const PAGECHAR_INFO& charinfo = m_CharList[i]; | 154 const PAGECHAR_INFO& charinfo = m_CharList[i]; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 178 m_CharIndex[indexSize - 1] = i + 1; | 176 m_CharIndex[indexSize - 1] = i + 1; |
| 179 } else { | 177 } else { |
| 180 m_CharIndex.push_back(i + 1); | 178 m_CharIndex.push_back(i + 1); |
| 181 } | 179 } |
| 182 } | 180 } |
| 183 } | 181 } |
| 184 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 182 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 185 if (indexSize % 2) { | 183 if (indexSize % 2) { |
| 186 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); | 184 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
| 187 } | 185 } |
| 188 return TRUE; | |
| 189 } | 186 } |
| 190 | 187 |
| 191 int CPDF_TextPage::CountChars() const { | 188 int CPDF_TextPage::CountChars() const { |
| 192 return pdfium::CollectionSize<int>(m_CharList); | 189 return pdfium::CollectionSize<int>(m_CharList); |
| 193 } | 190 } |
| 194 | 191 |
| 195 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
| 196 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 197 int count = 0; | 194 int count = 0; |
| 198 for (int i = 0; i < indexSize; i += 2) { | 195 for (int i = 0; i < indexSize; i += 2) { |
| (...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 749 } else if (direction == FPDFTEXT_RIGHT) { | 746 } else if (direction == FPDFTEXT_RIGHT) { |
| 750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { | 747 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { |
| 751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | 748 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
| 752 break; | 749 break; |
| 753 } | 750 } |
| 754 } | 751 } |
| 755 return breakPos; | 752 return breakPos; |
| 756 } | 753 } |
| 757 | 754 |
| 758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 755 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| 759 if (!m_pPage) { | 756 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
| 760 return -1; | 757 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
| 761 } | |
| 762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | |
| 763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | |
| 764 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 758 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
| 765 std::vector<uint8_t> nVerticalMask(nPageHeight); | 759 std::vector<uint8_t> nVerticalMask(nPageHeight); |
| 766 uint8_t* pDataH = nHorizontalMask.data(); | 760 uint8_t* pDataH = nHorizontalMask.data(); |
| 767 uint8_t* pDataV = nVerticalMask.data(); | 761 uint8_t* pDataV = nVerticalMask.data(); |
| 768 int32_t index = 0; | 762 int32_t index = 0; |
| 769 FX_FLOAT fLineHeight = 0.0f; | 763 FX_FLOAT fLineHeight = 0.0f; |
| 770 CPDF_PageObject* pPageObj = NULL; | 764 CPDF_PageObject* pPageObj = NULL; |
| 771 FX_POSITION pos = NULL; | 765 FX_POSITION pos = NULL; |
| 772 pos = m_pPage->GetFirstObjectPosition(); | 766 pos = m_pPage->GetPageObjectList()->GetHeadPosition(); |
| 773 if (!pos) { | 767 if (!pos) { |
| 774 return -1; | 768 return -1; |
| 775 } | 769 } |
| 776 while (pos) { | 770 while (pos) { |
| 777 pPageObj = m_pPage->GetNextObject(pos); | 771 pPageObj = m_pPage->GetPageObjectList()->GetNextObject(pos); |
| 778 if (NULL == pPageObj) { | 772 if (!pPageObj) { |
| 779 continue; | 773 continue; |
| 780 } | 774 } |
| 781 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { | 775 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { |
| 782 continue; | 776 continue; |
| 783 } | 777 } |
| 784 int32_t minH = | 778 int32_t minH = |
| 785 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 779 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
| 786 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 780 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
| 787 ? nPageWidth | 781 ? nPageWidth |
| 788 : (int32_t)pPageObj->m_Right; | 782 : (int32_t)pPageObj->m_Right; |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 847 if (nSumH - nSumV > 0.0f) { | 841 if (nSumH - nSumV > 0.0f) { |
| 848 return 0; | 842 return 0; |
| 849 } | 843 } |
| 850 if (nSumV - nSumH > 0.0f) { | 844 if (nSumV - nSumH > 0.0f) { |
| 851 return 1; | 845 return 1; |
| 852 } | 846 } |
| 853 return -1; | 847 return -1; |
| 854 } | 848 } |
| 855 | 849 |
| 856 void CPDF_TextPage::ProcessObject() { | 850 void CPDF_TextPage::ProcessObject() { |
| 857 CPDF_PageObject* pPageObj = NULL; | 851 FX_POSITION pos = m_pPage->GetPageObjectList()->GetHeadPosition(); |
| 858 if (!m_pPage) { | |
| 859 return; | |
| 860 } | |
| 861 FX_POSITION pos; | |
| 862 pos = m_pPage->GetFirstObjectPosition(); | |
| 863 if (!pos) { | 852 if (!pos) { |
| 864 return; | 853 return; |
| 865 } | 854 } |
| 866 m_TextlineDir = FindTextlineFlowDirection(); | 855 m_TextlineDir = FindTextlineFlowDirection(); |
| 867 int nCount = 0; | 856 int nCount = 0; |
| 868 while (pos) { | 857 while (pos) { |
| 869 pPageObj = m_pPage->GetNextObject(pos); | 858 CPDF_PageObject* pPageObj = |
| 859 m_pPage->GetPageObjectList()->GetNextObject(pos); |
| 870 if (pPageObj) { | 860 if (pPageObj) { |
| 871 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 861 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
| 872 CFX_Matrix matrix; | 862 CFX_Matrix matrix; |
| 873 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); | 863 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); |
| 874 nCount++; | 864 nCount++; |
| 875 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 865 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
| 876 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); | 866 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); |
| 877 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); | 867 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); |
| 878 } | 868 } |
| 879 } | 869 } |
| 880 pPageObj = NULL; | |
| 881 } | 870 } |
| 882 int count = m_LineObj.GetSize(); | 871 int count = m_LineObj.GetSize(); |
| 883 for (int i = 0; i < count; i++) { | 872 for (int i = 0; i < count; i++) { |
| 884 ProcessTextObject(m_LineObj.GetAt(i)); | 873 ProcessTextObject(m_LineObj.GetAt(i)); |
| 885 } | 874 } |
| 886 m_LineObj.RemoveAll(); | 875 m_LineObj.RemoveAll(); |
| 887 CloseTempLine(); | 876 CloseTempLine(); |
| 888 } | 877 } |
| 889 | 878 |
| 890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, | 879 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
| 891 const CFX_Matrix& formMatrix) { | 880 const CFX_Matrix& formMatrix) { |
| 892 CPDF_PageObject* pPageObj = NULL; | 881 CPDF_PageObject* pPageObj = NULL; |
| 893 FX_POSITION pos; | 882 FX_POSITION pos; |
| 894 if (!pFormObj) { | 883 if (!pFormObj) { |
| 895 return; | 884 return; |
| 896 } | 885 } |
| 897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); | 886 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition(); |
| 898 if (!pos) { | 887 if (!pos) { |
| 899 return; | 888 return; |
| 900 } | 889 } |
| 901 CFX_Matrix curFormMatrix; | 890 CFX_Matrix curFormMatrix; |
| 902 curFormMatrix.Copy(pFormObj->m_FormMatrix); | 891 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
| 903 curFormMatrix.Concat(formMatrix); | 892 curFormMatrix.Concat(formMatrix); |
| 904 while (pos) { | 893 while (pos) { |
| 905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); | 894 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos); |
| 906 if (pPageObj) { | 895 if (pPageObj) { |
| 907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 896 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
| 908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); | 897 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); |
| 909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 898 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
| 910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | 899 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); |
| 911 } | 900 } |
| 912 } | 901 } |
| 913 pPageObj = NULL; | 902 pPageObj = NULL; |
| 914 } | 903 } |
| 915 } | 904 } |
| (...skipping 928 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1844 return TRUE; | 1833 return TRUE; |
| 1845 } | 1834 } |
| 1846 | 1835 |
| 1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | 1836 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| 1848 FX_POSITION ObjPos) { | 1837 FX_POSITION ObjPos) { |
| 1849 if (!pTextObj) { | 1838 if (!pTextObj) { |
| 1850 return FALSE; | 1839 return FALSE; |
| 1851 } | 1840 } |
| 1852 int i = 0; | 1841 int i = 0; |
| 1853 if (!ObjPos) { | 1842 if (!ObjPos) { |
| 1854 ObjPos = m_pPage->GetLastObjectPosition(); | 1843 ObjPos = m_pPage->GetPageObjectList()->GetTailPosition(); |
| 1855 } | 1844 } |
| 1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); | 1845 CPDF_PageObject* pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); |
| 1857 while (i < 5 && ObjPos) { | 1846 while (i < 5 && ObjPos) { |
| 1858 pObj = m_pPage->GetPrevObject(ObjPos); | 1847 pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); |
| 1859 if (pObj == pTextObj) { | 1848 if (pObj == pTextObj) { |
| 1860 continue; | 1849 continue; |
| 1861 } | 1850 } |
| 1862 if (pObj->m_Type != CPDF_PageObject::TEXT) { | 1851 if (pObj->m_Type != CPDF_PageObject::TEXT) { |
| 1863 continue; | 1852 continue; |
| 1864 } | 1853 } |
| 1865 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 1854 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
| 1866 return TRUE; | 1855 return TRUE; |
| 1867 } | 1856 } |
| 1868 i++; | 1857 i++; |
| (...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2524 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2536 return; | 2525 return; |
| 2537 } | 2526 } |
| 2538 CPDF_LinkExt* link = NULL; | 2527 CPDF_LinkExt* link = NULL; |
| 2539 link = m_LinkList.GetAt(index); | 2528 link = m_LinkList.GetAt(index); |
| 2540 if (!link) { | 2529 if (!link) { |
| 2541 return; | 2530 return; |
| 2542 } | 2531 } |
| 2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2532 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2544 } | 2533 } |
| OLD | NEW |