OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <cctype> | 10 #include <cctype> |
(...skipping 743 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
754 | 754 |
755 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 755 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
756 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | 756 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
757 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | 757 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
758 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 758 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
759 std::vector<uint8_t> nVerticalMask(nPageHeight); | 759 std::vector<uint8_t> nVerticalMask(nPageHeight); |
760 uint8_t* pDataH = nHorizontalMask.data(); | 760 uint8_t* pDataH = nHorizontalMask.data(); |
761 uint8_t* pDataV = nVerticalMask.data(); | 761 uint8_t* pDataV = nVerticalMask.data(); |
762 int32_t index = 0; | 762 int32_t index = 0; |
763 FX_FLOAT fLineHeight = 0.0f; | 763 FX_FLOAT fLineHeight = 0.0f; |
764 CPDF_PageObject* pPageObj = NULL; | 764 if (m_pPage->GetPageObjectList()->empty()) |
765 FX_POSITION pos = NULL; | |
766 pos = m_pPage->GetPageObjectList()->GetHeadPosition(); | |
767 if (!pos) { | |
768 return -1; | 765 return -1; |
769 } | 766 |
770 while (pos) { | 767 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { |
771 pPageObj = m_pPage->GetPageObjectList()->GetNextObject(pos); | 768 if (!pPageObj || pPageObj->m_Type != CPDF_PageObject::TEXT) |
772 if (!pPageObj) { | |
773 continue; | 769 continue; |
774 } | 770 |
775 if (CPDF_PageObject::TEXT != pPageObj->m_Type) { | |
776 continue; | |
777 } | |
778 int32_t minH = | 771 int32_t minH = |
779 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 772 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
780 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 773 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
781 ? nPageWidth | 774 ? nPageWidth |
782 : (int32_t)pPageObj->m_Right; | 775 : (int32_t)pPageObj->m_Right; |
783 int32_t minV = | 776 int32_t minV = |
784 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; | 777 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; |
785 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight | 778 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight |
786 ? nPageHeight | 779 ? nPageHeight |
787 : (int32_t)pPageObj->m_Top; | 780 : (int32_t)pPageObj->m_Top; |
788 if (minH >= maxH || minV >= maxV) { | 781 if (minH >= maxH || minV >= maxV) |
789 continue; | 782 continue; |
790 } | 783 |
791 FXSYS_memset(pDataH + minH, 1, maxH - minH); | 784 FXSYS_memset(pDataH + minH, 1, maxH - minH); |
792 FXSYS_memset(pDataV + minV, 1, maxV - minV); | 785 FXSYS_memset(pDataV + minV, 1, maxV - minV); |
793 if (fLineHeight <= 0.0f) { | 786 if (fLineHeight <= 0.0f) |
794 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; | 787 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; |
795 } | |
796 pPageObj = NULL; | |
797 } | 788 } |
798 int32_t nStartH = 0; | 789 int32_t nStartH = 0; |
799 int32_t nEndH = 0; | 790 int32_t nEndH = 0; |
800 FX_FLOAT nSumH = 0.0f; | 791 FX_FLOAT nSumH = 0.0f; |
801 for (index = 0; index < nPageWidth; index++) | 792 for (index = 0; index < nPageWidth; index++) { |
802 if (1 == nHorizontalMask[index]) { | 793 if (1 == nHorizontalMask[index]) |
803 break; | 794 break; |
804 } | 795 } |
805 nStartH = index; | 796 nStartH = index; |
806 for (index = nPageWidth; index > 0; index--) | 797 for (index = nPageWidth; index > 0; index--) { |
807 if (1 == nHorizontalMask[index - 1]) { | 798 if (1 == nHorizontalMask[index - 1]) |
808 break; | 799 break; |
809 } | 800 } |
810 nEndH = index; | 801 nEndH = index; |
811 for (index = nStartH; index < nEndH; index++) { | 802 for (index = nStartH; index < nEndH; index++) { |
812 nSumH += nHorizontalMask[index]; | 803 nSumH += nHorizontalMask[index]; |
813 } | 804 } |
814 nSumH /= nEndH - nStartH; | 805 nSumH /= nEndH - nStartH; |
815 int32_t nStartV = 0; | 806 int32_t nStartV = 0; |
816 int32_t nEndV = 0; | 807 int32_t nEndV = 0; |
817 FX_FLOAT nSumV = 0.0f; | 808 FX_FLOAT nSumV = 0.0f; |
818 for (index = 0; index < nPageHeight; index++) | 809 for (index = 0; index < nPageHeight; index++) { |
819 if (1 == nVerticalMask[index]) { | 810 if (1 == nVerticalMask[index]) |
820 break; | 811 break; |
821 } | 812 } |
822 nStartV = index; | 813 nStartV = index; |
823 for (index = nPageHeight; index > 0; index--) | 814 for (index = nPageHeight; index > 0; index--) { |
824 if (1 == nVerticalMask[index - 1]) { | 815 if (1 == nVerticalMask[index - 1]) |
825 break; | 816 break; |
826 } | 817 } |
827 nEndV = index; | 818 nEndV = index; |
828 for (index = nStartV; index < nEndV; index++) { | 819 for (index = nStartV; index < nEndV; index++) { |
829 nSumV += nVerticalMask[index]; | 820 nSumV += nVerticalMask[index]; |
830 } | 821 } |
831 nSumV /= nEndV - nStartV; | 822 nSumV /= nEndV - nStartV; |
832 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { | 823 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { |
833 return 0; | 824 return 0; |
834 } | 825 } |
835 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { | 826 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { |
836 return 1; | 827 return 1; |
837 } | 828 } |
838 if (nSumH > 0.8f) { | 829 if (nSumH > 0.8f) { |
839 return 0; | 830 return 0; |
840 } | 831 } |
841 if (nSumH - nSumV > 0.0f) { | 832 if (nSumH - nSumV > 0.0f) { |
842 return 0; | 833 return 0; |
843 } | 834 } |
844 if (nSumV - nSumH > 0.0f) { | 835 if (nSumV - nSumH > 0.0f) { |
845 return 1; | 836 return 1; |
846 } | 837 } |
847 return -1; | 838 return -1; |
848 } | 839 } |
849 | 840 |
850 void CPDF_TextPage::ProcessObject() { | 841 void CPDF_TextPage::ProcessObject() { |
851 FX_POSITION pos = m_pPage->GetPageObjectList()->GetHeadPosition(); | 842 if (m_pPage->GetPageObjectList()->empty()) |
852 if (!pos) { | |
853 return; | 843 return; |
854 } | 844 |
855 m_TextlineDir = FindTextlineFlowDirection(); | 845 m_TextlineDir = FindTextlineFlowDirection(); |
856 int nCount = 0; | 846 const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList(); |
857 while (pos) { | 847 for (auto it = pObjList->begin(); it != pObjList->end(); ++it) { |
858 CPDF_PageObject* pPageObj = | 848 if (CPDF_PageObject* pObj = it->get()) { |
859 m_pPage->GetPageObjectList()->GetNextObject(pos); | 849 if (pObj->m_Type == CPDF_PageObject::TEXT) { |
860 if (pPageObj) { | |
861 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | |
862 CFX_Matrix matrix; | 850 CFX_Matrix matrix; |
863 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); | 851 ProcessTextObject(static_cast<CPDF_TextObject*>(pObj), matrix, pObjList, |
864 nCount++; | 852 it); |
865 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 853 } else if (pObj->m_Type == CPDF_PageObject::FORM) { |
866 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); | 854 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); |
867 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); | 855 ProcessFormObject(static_cast<CPDF_FormObject*>(pObj), formMatrix); |
868 } | 856 } |
869 } | 857 } |
870 } | 858 } |
871 int count = m_LineObj.GetSize(); | 859 for (int i = 0; i < m_LineObj.GetSize(); i++) |
872 for (int i = 0; i < count; i++) { | |
873 ProcessTextObject(m_LineObj.GetAt(i)); | 860 ProcessTextObject(m_LineObj.GetAt(i)); |
874 } | 861 |
875 m_LineObj.RemoveAll(); | 862 m_LineObj.RemoveAll(); |
876 CloseTempLine(); | 863 CloseTempLine(); |
877 } | 864 } |
878 | 865 |
879 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, | 866 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
880 const CFX_Matrix& formMatrix) { | 867 const CFX_Matrix& formMatrix) { |
881 CPDF_PageObject* pPageObj = NULL; | 868 CPDF_PageObjectList* pObjectList = pFormObj->m_pForm->GetPageObjectList(); |
882 FX_POSITION pos; | 869 if (pObjectList->empty()) |
883 if (!pFormObj) { | |
884 return; | 870 return; |
885 } | 871 |
886 pos = pFormObj->m_pForm->GetPageObjectList()->GetHeadPosition(); | |
887 if (!pos) { | |
888 return; | |
889 } | |
890 CFX_Matrix curFormMatrix; | 872 CFX_Matrix curFormMatrix; |
891 curFormMatrix.Copy(pFormObj->m_FormMatrix); | 873 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
892 curFormMatrix.Concat(formMatrix); | 874 curFormMatrix.Concat(formMatrix); |
893 while (pos) { | 875 |
894 pPageObj = pFormObj->m_pForm->GetPageObjectList()->GetNextObject(pos); | 876 for (auto it = pObjectList->begin(); it != pObjectList->end(); ++it) { |
895 if (pPageObj) { | 877 if (CPDF_PageObject* pPageObj = it->get()) { |
896 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 878 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
897 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); | 879 ProcessTextObject(static_cast<CPDF_TextObject*>(pPageObj), |
| 880 curFormMatrix, pObjectList, it); |
898 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 881 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
899 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | 882 ProcessFormObject(static_cast<CPDF_FormObject*>(pPageObj), |
| 883 curFormMatrix); |
900 } | 884 } |
901 } | 885 } |
902 pPageObj = NULL; | |
903 } | 886 } |
904 } | 887 } |
905 | 888 |
906 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { | 889 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { |
907 if (charCode == -1) { | 890 if (charCode == -1) { |
908 return 0; | 891 return 0; |
909 } | 892 } |
910 int w = pFont->GetCharWidthF(charCode); | 893 int w = pFont->GetCharWidthF(charCode); |
911 if (w == 0) { | 894 if (w == 0) { |
912 CFX_ByteString str; | 895 CFX_ByteString str; |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1025 } else { | 1008 } else { |
1026 eCurrentDirection = CFX_BidiChar::LEFT; | 1009 eCurrentDirection = CFX_BidiChar::LEFT; |
1027 for (int m = segment.start; m < segment.start + segment.count; m++) | 1010 for (int m = segment.start; m < segment.start + segment.count; m++) |
1028 AddCharInfoByLRDirection(bidi.CharAt(m), m_TempCharList[m]); | 1011 AddCharInfoByLRDirection(bidi.CharAt(m), m_TempCharList[m]); |
1029 } | 1012 } |
1030 } | 1013 } |
1031 m_TempCharList.clear(); | 1014 m_TempCharList.clear(); |
1032 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); | 1015 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); |
1033 } | 1016 } |
1034 | 1017 |
1035 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, | 1018 void CPDF_TextPage::ProcessTextObject( |
1036 const CFX_Matrix& formMatrix, | 1019 CPDF_TextObject* pTextObj, |
1037 FX_POSITION ObjPos) { | 1020 const CFX_Matrix& formMatrix, |
| 1021 const CPDF_PageObjectList* pObjList, |
| 1022 CPDF_PageObjectList::const_iterator ObjPos) { |
1038 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, | 1023 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, |
1039 pTextObj->m_Top); | 1024 pTextObj->m_Top); |
1040 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { | 1025 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
1041 return; | 1026 return; |
1042 } | 1027 } |
1043 int count = m_LineObj.GetSize(); | 1028 int count = m_LineObj.GetSize(); |
1044 PDFTEXT_Obj Obj; | 1029 PDFTEXT_Obj Obj; |
1045 Obj.m_pTextObj = pTextObj; | 1030 Obj.m_pTextObj = pTextObj; |
1046 Obj.m_formMatrix = formMatrix; | 1031 Obj.m_formMatrix = formMatrix; |
1047 if (count == 0) { | 1032 if (count == 0) { |
1048 m_LineObj.Add(Obj); | 1033 m_LineObj.Add(Obj); |
1049 return; | 1034 return; |
1050 } | 1035 } |
1051 if (IsSameAsPreTextObject(pTextObj, ObjPos)) { | 1036 if (IsSameAsPreTextObject(pTextObj, pObjList, ObjPos)) { |
1052 return; | 1037 return; |
1053 } | 1038 } |
1054 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1); | 1039 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1); |
1055 CPDF_TextObjectItem item; | 1040 CPDF_TextObjectItem item; |
1056 int nItem = prev_Obj.m_pTextObj->CountItems(); | 1041 int nItem = prev_Obj.m_pTextObj->CountItems(); |
1057 prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item); | 1042 prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item); |
1058 FX_FLOAT prev_width = | 1043 FX_FLOAT prev_width = |
1059 GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->GetFont()) * | 1044 GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->GetFont()) * |
1060 prev_Obj.m_pTextObj->GetFontSize() / 1000; | 1045 prev_Obj.m_pTextObj->GetFontSize() / 1000; |
1061 CFX_Matrix prev_matrix; | 1046 CFX_Matrix prev_matrix; |
(...skipping 763 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1825 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * | 1810 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * |
1826 pTextObj2->GetFontSize() / 1000 * 0.9 || | 1811 pTextObj2->GetFontSize() / 1000 * 0.9 || |
1827 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > | 1812 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > |
1828 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), | 1813 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), |
1829 pTextObj2->GetFontSize()) / | 1814 pTextObj2->GetFontSize()) / |
1830 8) { | 1815 8) { |
1831 return FALSE; | 1816 return FALSE; |
1832 } | 1817 } |
1833 return TRUE; | 1818 return TRUE; |
1834 } | 1819 } |
1835 | 1820 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject( |
1836 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | 1821 CPDF_TextObject* pTextObj, |
1837 FX_POSITION ObjPos) { | 1822 const CPDF_PageObjectList* pObjList, |
1838 if (!pTextObj) { | 1823 CPDF_PageObjectList::const_iterator iter) { |
1839 return FALSE; | |
1840 } | |
1841 int i = 0; | 1824 int i = 0; |
1842 if (!ObjPos) { | 1825 while (i < 5 && iter != pObjList->begin()) { |
1843 ObjPos = m_pPage->GetPageObjectList()->GetTailPosition(); | 1826 --iter; |
1844 } | 1827 CPDF_PageObject* pOtherObj = iter->get(); |
1845 CPDF_PageObject* pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); | 1828 if (pOtherObj == pTextObj || pOtherObj->m_Type != CPDF_PageObject::TEXT) |
1846 while (i < 5 && ObjPos) { | |
1847 pObj = m_pPage->GetPageObjectList()->GetPrevObject(ObjPos); | |
1848 if (pObj == pTextObj) { | |
1849 continue; | 1829 continue; |
1850 } | 1830 if (IsSameTextObject(static_cast<CPDF_TextObject*>(pOtherObj), pTextObj)) |
1851 if (pObj->m_Type != CPDF_PageObject::TEXT) { | |
1852 continue; | |
1853 } | |
1854 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | |
1855 return TRUE; | 1831 return TRUE; |
1856 } | 1832 ++i; |
1857 i++; | |
1858 } | 1833 } |
1859 return FALSE; | 1834 return FALSE; |
1860 } | 1835 } |
1861 | 1836 |
1862 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { | 1837 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { |
1863 const PAGECHAR_INFO* preChar; | 1838 const PAGECHAR_INFO* preChar; |
1864 if (!m_TempCharList.empty()) { | 1839 if (!m_TempCharList.empty()) { |
1865 preChar = &m_TempCharList.back(); | 1840 preChar = &m_TempCharList.back(); |
1866 } else if (!m_CharList.empty()) { | 1841 } else if (!m_CharList.empty()) { |
1867 preChar = &m_CharList.back(); | 1842 preChar = &m_CharList.back(); |
(...skipping 654 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2522 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2497 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2523 return; | 2498 return; |
2524 } | 2499 } |
2525 CPDF_LinkExt* link = NULL; | 2500 CPDF_LinkExt* link = NULL; |
2526 link = m_LinkList.GetAt(index); | 2501 link = m_LinkList.GetAt(index); |
2527 if (!link) { | 2502 if (!link) { |
2528 return; | 2503 return; |
2529 } | 2504 } |
2530 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2505 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2531 } | 2506 } |
OLD | NEW |