OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 21 matching lines...) Expand all Loading... |
32 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | 32 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
33 #define FPDFTEXT_CONSECUTIVE 0x00000004 | 33 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
34 | 34 |
35 #define FPDFTEXT_CHAR_ERROR -1 | 35 #define FPDFTEXT_CHAR_ERROR -1 |
36 #define FPDFTEXT_CHAR_NORMAL 0 | 36 #define FPDFTEXT_CHAR_NORMAL 0 |
37 #define FPDFTEXT_CHAR_GENERATED 1 | 37 #define FPDFTEXT_CHAR_GENERATED 1 |
38 #define FPDFTEXT_CHAR_UNUNICODE 2 | 38 #define FPDFTEXT_CHAR_UNUNICODE 2 |
39 #define FPDFTEXT_CHAR_HYPHEN 3 | 39 #define FPDFTEXT_CHAR_HYPHEN 3 |
40 #define FPDFTEXT_CHAR_PIECE 4 | 40 #define FPDFTEXT_CHAR_PIECE 4 |
41 | 41 |
42 #define TEXT_BLANK_CHAR L' ' | 42 #define TEXT_SPACE_CHAR L' ' |
43 #define TEXT_LINEFEED_CHAR L'\n' | 43 #define TEXT_LINEFEED_CHAR L'\n' |
44 #define TEXT_RETURN_CHAR L'\r' | 44 #define TEXT_RETURN_CHAR L'\r' |
45 #define TEXT_EMPTY L"" | 45 #define TEXT_EMPTY L"" |
46 #define TEXT_BLANK L" " | 46 #define TEXT_SPACE L" " |
47 #define TEXT_RETURN_LINEFEED L"\r\n" | 47 #define TEXT_RETURN_LINEFEED L"\r\n" |
48 #define TEXT_LINEFEED L"\n" | 48 #define TEXT_LINEFEED L"\n" |
49 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 49 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
50 | 50 |
51 namespace { | 51 namespace { |
52 | 52 |
53 const FX_FLOAT kDefaultFontSize = 1.0f; | 53 const FX_FLOAT kDefaultFontSize = 1.0f; |
54 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { | 54 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { |
55 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, | 55 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, |
56 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; | 56 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 return (FX_STRSIZE)wFind; | 143 return (FX_STRSIZE)wFind; |
144 } | 144 } |
145 | 145 |
146 } // namespace | 146 } // namespace |
147 | 147 |
148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) | 148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) |
149 : m_pPage(pPage), | 149 : m_pPage(pPage), |
150 m_parserflag(flags), | 150 m_parserflag(flags), |
151 m_pPreTextObj(nullptr), | 151 m_pPreTextObj(nullptr), |
152 m_bIsParsed(false), | 152 m_bIsParsed(false), |
153 m_TextlineDir(-1), | 153 m_TextlineDir(TextOrientation::Unknown), |
154 m_CurlineRect(0, 0, 0, 0) { | 154 m_CurlineRect(0, 0, 0, 0) { |
155 m_TextBuf.EstimateSize(0, 10240); | 155 m_TextBuf.EstimateSize(0, 10240); |
156 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 156 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
157 (int)pPage->GetPageHeight(), 0); | 157 (int)pPage->GetPageHeight(), 0); |
158 } | 158 } |
159 | 159 |
160 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 160 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
161 switch (charInfo.m_Unicode) { | 161 switch (charInfo.m_Unicode) { |
162 case 0x2: | 162 case 0x2: |
163 case 0x3: | 163 case 0x3: |
(...skipping 12 matching lines...) Expand all Loading... |
176 void CPDF_TextPage::ParseTextPage() { | 176 void CPDF_TextPage::ParseTextPage() { |
177 m_bIsParsed = false; | 177 m_bIsParsed = false; |
178 m_TextBuf.Clear(); | 178 m_TextBuf.Clear(); |
179 m_CharList.clear(); | 179 m_CharList.clear(); |
180 m_pPreTextObj = nullptr; | 180 m_pPreTextObj = nullptr; |
181 ProcessObject(); | 181 ProcessObject(); |
182 | 182 |
183 m_bIsParsed = true; | 183 m_bIsParsed = true; |
184 m_CharIndex.clear(); | 184 m_CharIndex.clear(); |
185 int nCount = pdfium::CollectionSize<int>(m_CharList); | 185 int nCount = pdfium::CollectionSize<int>(m_CharList); |
186 if (nCount) { | 186 if (nCount) |
187 m_CharIndex.push_back(0); | 187 m_CharIndex.push_back(0); |
188 } | 188 |
189 for (int i = 0; i < nCount; i++) { | 189 for (int i = 0; i < nCount; i++) { |
190 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 190 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
191 FX_BOOL bNormal = FALSE; | |
192 const PAGECHAR_INFO& charinfo = m_CharList[i]; | 191 const PAGECHAR_INFO& charinfo = m_CharList[i]; |
193 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 192 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED || |
194 bNormal = TRUE; | 193 (charinfo.m_Unicode != 0 && !IsControlChar(charinfo))) { |
195 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { | |
196 bNormal = FALSE; | |
197 } else { | |
198 bNormal = TRUE; | |
199 } | |
200 if (bNormal) { | |
201 if (indexSize % 2) { | 194 if (indexSize % 2) { |
202 m_CharIndex.push_back(1); | 195 m_CharIndex.push_back(1); |
203 } else { | 196 } else { |
204 if (indexSize <= 0) { | 197 if (indexSize <= 0) |
205 continue; | 198 continue; |
206 } | |
207 m_CharIndex[indexSize - 1] += 1; | 199 m_CharIndex[indexSize - 1] += 1; |
208 } | 200 } |
209 } else { | 201 } else { |
210 if (indexSize % 2) { | 202 if (indexSize % 2) { |
211 if (indexSize <= 0) { | 203 if (indexSize <= 0) |
212 continue; | 204 continue; |
213 } | |
214 m_CharIndex[indexSize - 1] = i + 1; | 205 m_CharIndex[indexSize - 1] = i + 1; |
215 } else { | 206 } else { |
216 m_CharIndex.push_back(i + 1); | 207 m_CharIndex.push_back(i + 1); |
217 } | 208 } |
218 } | 209 } |
219 } | 210 } |
220 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 211 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
221 if (indexSize % 2) { | 212 if (indexSize % 2) |
222 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); | 213 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
223 } | |
224 } | 214 } |
225 | 215 |
226 int CPDF_TextPage::CountChars() const { | 216 int CPDF_TextPage::CountChars() const { |
227 return pdfium::CollectionSize<int>(m_CharList); | 217 return pdfium::CollectionSize<int>(m_CharList); |
228 } | 218 } |
229 | 219 |
230 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 220 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
231 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 221 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
232 int count = 0; | 222 int count = 0; |
233 for (int i = 0; i < indexSize; i += 2) { | 223 for (int i = 0; i < indexSize; i += 2) { |
(...skipping 444 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
678 } | 668 } |
679 if (segmentStatus == 1) { | 669 if (segmentStatus == 1) { |
680 segmentStatus = 2; | 670 segmentStatus = 2; |
681 m_Segments.Add(segment); | 671 m_Segments.Add(segment); |
682 segment.m_Start = 0; | 672 segment.m_Start = 0; |
683 segment.m_nCount = 0; | 673 segment.m_nCount = 0; |
684 } | 674 } |
685 return m_Segments.GetSize(); | 675 return m_Segments.GetSize(); |
686 } | 676 } |
687 | 677 |
688 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 678 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() |
| 679 const { |
689 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); | 680 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); |
690 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); | 681 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); |
691 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 682 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
692 std::vector<uint8_t> nVerticalMask(nPageHeight); | 683 std::vector<uint8_t> nVerticalMask(nPageHeight); |
693 uint8_t* pDataH = nHorizontalMask.data(); | 684 uint8_t* pDataH = nHorizontalMask.data(); |
694 uint8_t* pDataV = nVerticalMask.data(); | 685 uint8_t* pDataV = nVerticalMask.data(); |
695 int32_t index = 0; | 686 int32_t index = 0; |
696 FX_FLOAT fLineHeight = 0.0f; | 687 FX_FLOAT fLineHeight = 0.0f; |
697 if (m_pPage->GetPageObjectList()->empty()) | 688 if (m_pPage->GetPageObjectList()->empty()) |
698 return -1; | 689 return TextOrientation::Unknown; |
699 | 690 |
700 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { | 691 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { |
701 if (!pPageObj || !pPageObj->IsText()) | 692 if (!pPageObj || !pPageObj->IsText()) |
702 continue; | 693 continue; |
703 | 694 |
704 int32_t minH = | 695 int32_t minH = |
705 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; | 696 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
706 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth | 697 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
707 ? nPageWidth | 698 ? nPageWidth |
708 : (int32_t)pPageObj->m_Right; | 699 : (int32_t)pPageObj->m_Right; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
742 for (index = 0; index < nPageHeight; index++) { | 733 for (index = 0; index < nPageHeight; index++) { |
743 if (1 == nVerticalMask[index]) | 734 if (1 == nVerticalMask[index]) |
744 break; | 735 break; |
745 } | 736 } |
746 nStartV = index; | 737 nStartV = index; |
747 for (index = nPageHeight; index > 0; index--) { | 738 for (index = nPageHeight; index > 0; index--) { |
748 if (1 == nVerticalMask[index - 1]) | 739 if (1 == nVerticalMask[index - 1]) |
749 break; | 740 break; |
750 } | 741 } |
751 nEndV = index; | 742 nEndV = index; |
752 for (index = nStartV; index < nEndV; index++) { | 743 for (index = nStartV; index < nEndV; index++) |
753 nSumV += nVerticalMask[index]; | 744 nSumV += nVerticalMask[index]; |
754 } | |
755 nSumV /= nEndV - nStartV; | 745 nSumV /= nEndV - nStartV; |
756 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { | 746 |
757 return 0; | 747 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) |
758 } | 748 return TextOrientation::Horizontal; |
759 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { | 749 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) |
760 return 1; | 750 return TextOrientation::Vertical; |
761 } | 751 |
762 if (nSumH > 0.8f) { | 752 if (nSumH > 0.8f) |
763 return 0; | 753 return TextOrientation::Horizontal; |
764 } | 754 |
765 if (nSumH - nSumV > 0.0f) { | 755 if (nSumH > nSumV) |
766 return 0; | 756 return TextOrientation::Horizontal; |
767 } | 757 if (nSumH < nSumV) |
768 if (nSumV - nSumH > 0.0f) { | 758 return TextOrientation::Vertical; |
769 return 1; | 759 return TextOrientation::Unknown; |
770 } | 760 } |
771 return -1; | 761 |
| 762 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, |
| 763 const CFX_Matrix& formMatrix) { |
| 764 PAGECHAR_INFO generateChar; |
| 765 if (!GenerateCharInfo(unicode, generateChar)) |
| 766 return; |
| 767 |
| 768 m_TextBuf.AppendChar(unicode); |
| 769 if (!formMatrix.IsIdentity()) |
| 770 generateChar.m_Matrix.Copy(formMatrix); |
| 771 m_CharList.push_back(generateChar); |
772 } | 772 } |
773 | 773 |
774 void CPDF_TextPage::ProcessObject() { | 774 void CPDF_TextPage::ProcessObject() { |
775 if (m_pPage->GetPageObjectList()->empty()) | 775 if (m_pPage->GetPageObjectList()->empty()) |
776 return; | 776 return; |
777 | 777 |
778 m_TextlineDir = FindTextlineFlowDirection(); | 778 m_TextlineDir = FindTextlineFlowOrientation(); |
779 const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList(); | 779 const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList(); |
780 for (auto it = pObjList->begin(); it != pObjList->end(); ++it) { | 780 for (auto it = pObjList->begin(); it != pObjList->end(); ++it) { |
781 if (CPDF_PageObject* pObj = it->get()) { | 781 if (CPDF_PageObject* pObj = it->get()) { |
782 if (pObj->IsText()) { | 782 if (pObj->IsText()) { |
783 CFX_Matrix matrix; | 783 CFX_Matrix matrix; |
784 ProcessTextObject(pObj->AsText(), matrix, pObjList, it); | 784 ProcessTextObject(pObj->AsText(), matrix, pObjList, it); |
785 } else if (pObj->IsForm()) { | 785 } else if (pObj->IsForm()) { |
786 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); | 786 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); |
787 ProcessFormObject(pObj->AsForm(), formMatrix); | 787 ProcessFormObject(pObj->AsForm(), formMatrix); |
788 } | 788 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
826 CFX_ByteString str; | 826 CFX_ByteString str; |
827 pFont->AppendChar(str, charCode); | 827 pFont->AppendChar(str, charCode); |
828 if (int w = pFont->GetStringWidth(str.c_str(), 1)) | 828 if (int w = pFont->GetStringWidth(str.c_str(), 1)) |
829 return w; | 829 return w; |
830 | 830 |
831 return pFont->GetCharBBox(charCode).Width(); | 831 return pFont->GetCharBBox(charCode).Width(); |
832 } | 832 } |
833 | 833 |
834 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, | 834 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
835 PAGECHAR_INFO info) { | 835 PAGECHAR_INFO info) { |
836 if (!IsControlChar(info)) { | 836 if (IsControlChar(info)) { |
837 info.m_Index = m_TextBuf.GetLength(); | |
838 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | |
839 FX_WCHAR* pDst = nullptr; | |
840 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); | |
841 if (nCount >= 1) { | |
842 pDst = FX_Alloc(FX_WCHAR, nCount); | |
843 Unicode_GetNormalization(wChar, pDst); | |
844 for (int nIndex = 0; nIndex < nCount; nIndex++) { | |
845 PAGECHAR_INFO info2 = info; | |
846 info2.m_Unicode = pDst[nIndex]; | |
847 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | |
848 m_TextBuf.AppendChar(info2.m_Unicode); | |
849 m_CharList.push_back(info2); | |
850 } | |
851 FX_Free(pDst); | |
852 return; | |
853 } | |
854 } | |
855 m_TextBuf.AppendChar(wChar); | |
856 } else { | |
857 info.m_Index = -1; | 837 info.m_Index = -1; |
| 838 m_CharList.push_back(info); |
| 839 return; |
858 } | 840 } |
859 m_CharList.push_back(info); | |
860 } | |
861 | 841 |
862 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, | 842 info.m_Index = m_TextBuf.GetLength(); |
863 PAGECHAR_INFO info) { | 843 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
864 if (!IsControlChar(info)) { | |
865 info.m_Index = m_TextBuf.GetLength(); | |
866 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | |
867 FX_WCHAR* pDst = nullptr; | 844 FX_WCHAR* pDst = nullptr; |
868 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); | 845 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
869 if (nCount >= 1) { | 846 if (nCount >= 1) { |
870 pDst = FX_Alloc(FX_WCHAR, nCount); | 847 pDst = FX_Alloc(FX_WCHAR, nCount); |
871 Unicode_GetNormalization(wChar, pDst); | 848 Unicode_GetNormalization(wChar, pDst); |
872 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 849 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
873 PAGECHAR_INFO info2 = info; | 850 PAGECHAR_INFO info2 = info; |
874 info2.m_Unicode = pDst[nIndex]; | 851 info2.m_Unicode = pDst[nIndex]; |
875 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 852 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
876 m_TextBuf.AppendChar(info2.m_Unicode); | 853 m_TextBuf.AppendChar(info2.m_Unicode); |
877 m_CharList.push_back(info2); | 854 m_CharList.push_back(info2); |
878 } | 855 } |
879 FX_Free(pDst); | 856 FX_Free(pDst); |
880 return; | 857 return; |
881 } | 858 } |
882 info.m_Unicode = wChar; | |
883 m_TextBuf.AppendChar(info.m_Unicode); | |
884 } else { | |
885 info.m_Index = -1; | |
886 } | 859 } |
| 860 m_TextBuf.AppendChar(wChar); |
887 m_CharList.push_back(info); | 861 m_CharList.push_back(info); |
888 } | 862 } |
889 | 863 |
| 864 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| 865 PAGECHAR_INFO info) { |
| 866 if (IsControlChar(info)) { |
| 867 info.m_Index = -1; |
| 868 m_CharList.push_back(info); |
| 869 return; |
| 870 } |
| 871 |
| 872 info.m_Index = m_TextBuf.GetLength(); |
| 873 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| 874 FX_WCHAR* pDst = nullptr; |
| 875 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| 876 if (nCount >= 1) { |
| 877 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 878 Unicode_GetNormalization(wChar, pDst); |
| 879 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 880 PAGECHAR_INFO info2 = info; |
| 881 info2.m_Unicode = pDst[nIndex]; |
| 882 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 883 m_TextBuf.AppendChar(info2.m_Unicode); |
| 884 m_CharList.push_back(info2); |
| 885 } |
| 886 FX_Free(pDst); |
| 887 return; |
| 888 } |
| 889 info.m_Unicode = wChar; |
| 890 m_TextBuf.AppendChar(info.m_Unicode); |
| 891 m_CharList.push_back(info); |
| 892 } |
| 893 |
890 void CPDF_TextPage::CloseTempLine() { | 894 void CPDF_TextPage::CloseTempLine() { |
891 if (m_TempCharList.empty()) | 895 if (m_TempCharList.empty()) |
892 return; | 896 return; |
893 | 897 |
894 CFX_WideString str = m_TempTextBuf.MakeString(); | 898 CFX_WideString str = m_TempTextBuf.MakeString(); |
895 FX_BOOL bPrevSpace = FALSE; | 899 FX_BOOL bPrevSpace = FALSE; |
896 for (int i = 0; i < str.GetLength(); i++) { | 900 for (int i = 0; i < str.GetLength(); i++) { |
897 if (str.GetAt(i) != ' ') { | 901 if (str.GetAt(i) != ' ') { |
898 bPrevSpace = FALSE; | 902 bPrevSpace = FALSE; |
899 continue; | 903 continue; |
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1138 void CPDF_TextPage::FindPreviousTextObject() { | 1142 void CPDF_TextPage::FindPreviousTextObject() { |
1139 if (m_TempCharList.empty() && m_CharList.empty()) | 1143 if (m_TempCharList.empty() && m_CharList.empty()) |
1140 return; | 1144 return; |
1141 | 1145 |
1142 PAGECHAR_INFO preChar = | 1146 PAGECHAR_INFO preChar = |
1143 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); | 1147 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); |
1144 | 1148 |
1145 if (preChar.m_pTextObj) | 1149 if (preChar.m_pTextObj) |
1146 m_pPreTextObj = preChar.m_pTextObj; | 1150 m_pPreTextObj = preChar.m_pTextObj; |
1147 } | 1151 } |
| 1152 |
1148 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, | 1153 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, |
1149 int32_t iBufStartAppend) { | 1154 int32_t iBufStartAppend) { |
1150 int32_t i = iCharListStartAppend; | 1155 int32_t i = iCharListStartAppend; |
1151 int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1; | 1156 int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1; |
1152 for (; i < j; i++, j--) { | 1157 for (; i < j; i++, j--) { |
1153 std::swap(m_TempCharList[i], m_TempCharList[j]); | 1158 std::swap(m_TempCharList[i], m_TempCharList[j]); |
1154 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | 1159 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
1155 } | 1160 } |
1156 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); | 1161 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); |
1157 i = iBufStartAppend; | 1162 i = iBufStartAppend; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1191 CPDF_Font* pFont = pTextObj->GetFont(); | 1196 CPDF_Font* pFont = pTextObj->GetFont(); |
1192 CFX_Matrix matrix; | 1197 CFX_Matrix matrix; |
1193 pTextObj->GetTextMatrix(&matrix); | 1198 pTextObj->GetTextMatrix(&matrix); |
1194 matrix.Concat(formMatrix); | 1199 matrix.Concat(formMatrix); |
1195 FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj); | 1200 FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj); |
1196 if (ePreMKC == FPDFText_MarkedContent::Done) { | 1201 if (ePreMKC == FPDFText_MarkedContent::Done) { |
1197 m_pPreTextObj = pTextObj; | 1202 m_pPreTextObj = pTextObj; |
1198 m_perMatrix.Copy(formMatrix); | 1203 m_perMatrix.Copy(formMatrix); |
1199 return; | 1204 return; |
1200 } | 1205 } |
1201 int result = 0; | 1206 GenerateCharacter result = GenerateCharacter::None; |
1202 if (m_pPreTextObj) { | 1207 if (m_pPreTextObj) { |
1203 result = ProcessInsertObject(pTextObj, formMatrix); | 1208 result = ProcessInsertObject(pTextObj, formMatrix); |
1204 if (2 == result) { | 1209 if (result == GenerateCharacter::LineBreak) { |
1205 m_CurlineRect = | 1210 m_CurlineRect = |
1206 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1211 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
1207 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | 1212 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
1208 } else { | 1213 } else { |
1209 m_CurlineRect.Union( | 1214 m_CurlineRect.Union( |
1210 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1215 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
1211 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); | 1216 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); |
1212 } | 1217 } |
1213 PAGECHAR_INFO generateChar; | 1218 switch (result) { |
1214 if (result == 1) { | 1219 case GenerateCharacter::None: |
1215 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { | 1220 break; |
1216 if (!formMatrix.IsIdentity()) { | 1221 case GenerateCharacter::Space: { |
1217 generateChar.m_Matrix.Copy(formMatrix); | 1222 PAGECHAR_INFO generateChar; |
| 1223 if (GenerateCharInfo(TEXT_SPACE_CHAR, generateChar)) { |
| 1224 if (!formMatrix.IsIdentity()) |
| 1225 generateChar.m_Matrix.Copy(formMatrix); |
| 1226 m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); |
| 1227 m_TempCharList.push_back(generateChar); |
1218 } | 1228 } |
1219 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1229 break; |
1220 m_TempCharList.push_back(generateChar); | |
1221 } | 1230 } |
1222 } else if (result == 2) { | 1231 case GenerateCharacter::LineBreak: |
1223 CloseTempLine(); | 1232 CloseTempLine(); |
1224 if (m_TextBuf.GetSize()) { | 1233 if (m_TextBuf.GetSize()) { |
1225 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { | 1234 AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix); |
1226 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | 1235 AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix); |
1227 if (!formMatrix.IsIdentity()) { | 1236 } |
1228 generateChar.m_Matrix.Copy(formMatrix); | 1237 break; |
| 1238 case GenerateCharacter::Hyphen: |
| 1239 if (pTextObj->CountChars() == 1) { |
| 1240 CPDF_TextObjectItem item; |
| 1241 pTextObj->GetCharInfo(0, &item); |
| 1242 CFX_WideString wstrItem = |
| 1243 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 1244 if (wstrItem.IsEmpty()) { |
| 1245 wstrItem += (FX_WCHAR)item.m_CharCode; |
1229 } | 1246 } |
1230 m_CharList.push_back(generateChar); | 1247 FX_WCHAR curChar = wstrItem.GetAt(0); |
| 1248 if (curChar == 0x2D || curChar == 0xAD) |
| 1249 return; |
1231 } | 1250 } |
1232 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { | 1251 while (m_TempTextBuf.GetSize() > 0 && |
1233 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | 1252 m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) == |
1234 if (!formMatrix.IsIdentity()) { | 1253 0x20) { |
1235 generateChar.m_Matrix.Copy(formMatrix); | 1254 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1236 } | 1255 m_TempCharList.pop_back(); |
1237 m_CharList.push_back(generateChar); | |
1238 } | 1256 } |
1239 } | 1257 PAGECHAR_INFO* charinfo = &m_TempCharList.back(); |
1240 } else if (result == 3) { | |
1241 int32_t nChars = pTextObj->CountChars(); | |
1242 if (nChars == 1) { | |
1243 CPDF_TextObjectItem item; | |
1244 pTextObj->GetCharInfo(0, &item); | |
1245 CFX_WideString wstrItem = | |
1246 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | |
1247 if (wstrItem.IsEmpty()) { | |
1248 wstrItem += (FX_WCHAR)item.m_CharCode; | |
1249 } | |
1250 FX_WCHAR curChar = wstrItem.GetAt(0); | |
1251 if (0x2D == curChar || 0xAD == curChar) { | |
1252 return; | |
1253 } | |
1254 } | |
1255 while (m_TempTextBuf.GetSize() > 0 && | |
1256 m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) == | |
1257 0x20) { | |
1258 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1258 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1259 m_TempCharList.pop_back(); | 1259 charinfo->m_Unicode = 0x2; |
1260 } | 1260 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; |
1261 PAGECHAR_INFO* charinfo = &m_TempCharList.back(); | 1261 m_TempTextBuf.AppendChar(0xfffe); |
1262 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1262 break; |
1263 charinfo->m_Unicode = 0x2; | |
1264 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; | |
1265 m_TempTextBuf.AppendChar(0xfffe); | |
1266 } | 1263 } |
1267 } else { | 1264 } else { |
1268 m_CurlineRect = | 1265 m_CurlineRect = |
1269 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1266 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
1270 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | 1267 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
1271 } | 1268 } |
1272 if (ePreMKC == FPDFText_MarkedContent::Delay) { | 1269 if (ePreMKC == FPDFText_MarkedContent::Delay) { |
1273 ProcessMarkedContent(Obj); | 1270 ProcessMarkedContent(Obj); |
1274 m_pPreTextObj = pTextObj; | 1271 m_pPreTextObj = pTextObj; |
1275 m_perMatrix.Copy(formMatrix); | 1272 m_perMatrix.Copy(formMatrix); |
(...skipping 16 matching lines...) Expand all Loading... |
1292 CPDF_TextObjectItem item; | 1289 CPDF_TextObjectItem item; |
1293 PAGECHAR_INFO charinfo; | 1290 PAGECHAR_INFO charinfo; |
1294 charinfo.m_OriginX = 0; | 1291 charinfo.m_OriginX = 0; |
1295 charinfo.m_OriginY = 0; | 1292 charinfo.m_OriginY = 0; |
1296 pTextObj->GetItemInfo(i, &item); | 1293 pTextObj->GetItemInfo(i, &item); |
1297 if (item.m_CharCode == (uint32_t)-1) { | 1294 if (item.m_CharCode == (uint32_t)-1) { |
1298 CFX_WideString str = m_TempTextBuf.MakeString(); | 1295 CFX_WideString str = m_TempTextBuf.MakeString(); |
1299 if (str.IsEmpty()) { | 1296 if (str.IsEmpty()) { |
1300 str = m_TextBuf.AsStringC(); | 1297 str = m_TextBuf.AsStringC(); |
1301 } | 1298 } |
1302 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { | 1299 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) |
1303 continue; | 1300 continue; |
1304 } | 1301 |
1305 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 1302 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
1306 spacing = -fontsize_h * item.m_OriginX / 1000; | 1303 spacing = -fontsize_h * item.m_OriginX / 1000; |
1307 continue; | 1304 continue; |
1308 } | 1305 } |
1309 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace; | 1306 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace; |
1310 if (charSpace > 0.001) { | 1307 if (charSpace > 0.001) { |
1311 spacing += matrix.TransformDistance(charSpace); | 1308 spacing += matrix.TransformDistance(charSpace); |
1312 } else if (charSpace < -0.001) { | 1309 } else if (charSpace < -0.001) { |
1313 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); | 1310 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); |
1314 } | 1311 } |
(...skipping 13 matching lines...) Expand all Loading... |
1328 } | 1325 } |
1329 if (threshold == 0) { | 1326 if (threshold == 0) { |
1330 threshold = fontsize_h; | 1327 threshold = fontsize_h; |
1331 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); | 1328 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
1332 threshold = this_width > last_width ? (FX_FLOAT)this_width | 1329 threshold = this_width > last_width ? (FX_FLOAT)this_width |
1333 : (FX_FLOAT)last_width; | 1330 : (FX_FLOAT)last_width; |
1334 threshold = NormalizeThreshold(threshold); | 1331 threshold = NormalizeThreshold(threshold); |
1335 threshold = fontsize_h * threshold / 1000; | 1332 threshold = fontsize_h * threshold / 1000; |
1336 } | 1333 } |
1337 if (threshold && (spacing && spacing >= threshold)) { | 1334 if (threshold && (spacing && spacing >= threshold)) { |
1338 charinfo.m_Unicode = TEXT_BLANK_CHAR; | 1335 charinfo.m_Unicode = TEXT_SPACE_CHAR; |
1339 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; | 1336 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; |
1340 charinfo.m_pTextObj = pTextObj; | 1337 charinfo.m_pTextObj = pTextObj; |
1341 charinfo.m_Index = m_TextBuf.GetLength(); | 1338 charinfo.m_Index = m_TextBuf.GetLength(); |
1342 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1339 m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); |
1343 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; | 1340 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; |
1344 charinfo.m_Matrix.Copy(formMatrix); | 1341 charinfo.m_Matrix.Copy(formMatrix); |
1345 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, | 1342 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, |
1346 charinfo.m_OriginY); | 1343 charinfo.m_OriginY); |
1347 charinfo.m_CharBox = | 1344 charinfo.m_CharBox = |
1348 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, | 1345 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, |
1349 charinfo.m_OriginX, charinfo.m_OriginY); | 1346 charinfo.m_OriginX, charinfo.m_OriginY); |
1350 m_TempCharList.push_back(charinfo); | 1347 m_TempCharList.push_back(charinfo); |
1351 } | 1348 } |
1352 if (item.m_CharCode == CPDF_Font::kInvalidCharCode) { | 1349 if (item.m_CharCode == CPDF_Font::kInvalidCharCode) { |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1426 charinfo.m_Index = m_TextBuf.GetLength(); | 1423 charinfo.m_Index = m_TextBuf.GetLength(); |
1427 m_TempTextBuf.AppendChar(charinfo.m_Unicode); | 1424 m_TempTextBuf.AppendChar(charinfo.m_Unicode); |
1428 } else { | 1425 } else { |
1429 m_TempTextBuf.AppendChar(0xfffe); | 1426 m_TempTextBuf.AppendChar(0xfffe); |
1430 } | 1427 } |
1431 m_TempCharList.push_back(charinfo); | 1428 m_TempCharList.push_back(charinfo); |
1432 } | 1429 } |
1433 } else if (i == 0) { | 1430 } else if (i == 0) { |
1434 CFX_WideString str = m_TempTextBuf.MakeString(); | 1431 CFX_WideString str = m_TempTextBuf.MakeString(); |
1435 if (!str.IsEmpty() && | 1432 if (!str.IsEmpty() && |
1436 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { | 1433 str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) { |
1437 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1434 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1438 m_TempCharList.pop_back(); | 1435 m_TempCharList.pop_back(); |
1439 } | 1436 } |
1440 } | 1437 } |
1441 } | 1438 } |
1442 } | 1439 } |
1443 if (bIsBidiAndMirrorInverse) { | 1440 if (bIsBidiAndMirrorInverse) { |
1444 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | 1441 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
1445 } | 1442 } |
1446 } | 1443 } |
1447 | 1444 |
1448 int32_t CPDF_TextPage::GetTextObjectWritingMode( | 1445 CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode( |
1449 const CPDF_TextObject* pTextObj) { | 1446 const CPDF_TextObject* pTextObj) const { |
1450 int32_t nChars = pTextObj->CountChars(); | 1447 int32_t nChars = pTextObj->CountChars(); |
1451 if (nChars == 1) { | 1448 if (nChars == 1) |
1452 return m_TextlineDir; | 1449 return m_TextlineDir; |
1453 } | 1450 |
1454 CPDF_TextObjectItem first, last; | 1451 CPDF_TextObjectItem first, last; |
1455 pTextObj->GetCharInfo(0, &first); | 1452 pTextObj->GetCharInfo(0, &first); |
1456 pTextObj->GetCharInfo(nChars - 1, &last); | 1453 pTextObj->GetCharInfo(nChars - 1, &last); |
1457 CFX_Matrix textMatrix; | 1454 CFX_Matrix textMatrix; |
1458 pTextObj->GetTextMatrix(&textMatrix); | 1455 pTextObj->GetTextMatrix(&textMatrix); |
1459 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY); | 1456 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY); |
1460 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); | 1457 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); |
1461 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); | 1458 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); |
1462 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); | 1459 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); |
1463 if (dX <= 0.0001f && dY <= 0.0001f) { | 1460 if (dX <= 0.0001f && dY <= 0.0001f) |
1464 return -1; | 1461 return TextOrientation::Unknown; |
1465 } | 1462 |
1466 CFX_VectorF v(dX, dY); | 1463 CFX_VectorF v(dX, dY); |
1467 v.Normalize(); | 1464 v.Normalize(); |
1468 if (v.y <= 0.0872f) { | 1465 if (v.y <= 0.0872f) |
1469 return v.x <= 0.0872f ? m_TextlineDir : 0; | 1466 return v.x <= 0.0872f ? m_TextlineDir : TextOrientation::Horizontal; |
1470 } | 1467 |
1471 if (v.x <= 0.0872f) { | 1468 if (v.x <= 0.0872f) |
1472 return 1; | 1469 return TextOrientation::Vertical; |
1473 } | 1470 |
1474 return m_TextlineDir; | 1471 return m_TextlineDir; |
1475 } | 1472 } |
| 1473 |
1476 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { | 1474 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { |
1477 CFX_WideString strCurText = m_TempTextBuf.MakeString(); | 1475 CFX_WideString strCurText = m_TempTextBuf.MakeString(); |
1478 if (strCurText.GetLength() == 0) { | 1476 if (strCurText.GetLength() == 0) { |
1479 strCurText = m_TextBuf.AsStringC(); | 1477 strCurText = m_TextBuf.AsStringC(); |
1480 } | 1478 } |
1481 FX_STRSIZE nCount = strCurText.GetLength(); | 1479 FX_STRSIZE nCount = strCurText.GetLength(); |
1482 int nIndex = nCount - 1; | 1480 int nIndex = nCount - 1; |
1483 FX_WCHAR wcTmp = strCurText.GetAt(nIndex); | 1481 FX_WCHAR wcTmp = strCurText.GetAt(nIndex); |
1484 while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) { | 1482 while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) { |
1485 wcTmp = strCurText.GetAt(--nIndex); | 1483 wcTmp = strCurText.GetAt(--nIndex); |
(...skipping 17 matching lines...) Expand all Loading... |
1503 return FALSE; | 1501 return FALSE; |
1504 } | 1502 } |
1505 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && | 1503 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && |
1506 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { | 1504 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { |
1507 return TRUE; | 1505 return TRUE; |
1508 } | 1506 } |
1509 } | 1507 } |
1510 return FALSE; | 1508 return FALSE; |
1511 } | 1509 } |
1512 | 1510 |
1513 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, | 1511 CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject( |
1514 const CFX_Matrix& formMatrix) { | 1512 const CPDF_TextObject* pObj, |
| 1513 const CFX_Matrix& formMatrix) { |
1515 FindPreviousTextObject(); | 1514 FindPreviousTextObject(); |
1516 FX_BOOL bNewline = FALSE; | 1515 TextOrientation WritingMode = GetTextObjectWritingMode(pObj); |
1517 int WritingMode = GetTextObjectWritingMode(pObj); | 1516 if (WritingMode == TextOrientation::Unknown) |
1518 if (WritingMode == -1) { | |
1519 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); | 1517 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); |
1520 } | 1518 |
1521 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, | 1519 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, |
1522 pObj->m_Top); | 1520 pObj->m_Top); |
1523 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, | 1521 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, |
1524 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); | 1522 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); |
1525 CPDF_TextObjectItem PrevItem, item; | 1523 CPDF_TextObjectItem PrevItem, item; |
1526 int nItem = m_pPreTextObj->CountItems(); | 1524 int nItem = m_pPreTextObj->CountItems(); |
1527 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); | 1525 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); |
1528 pObj->GetItemInfo(0, &item); | 1526 pObj->GetItemInfo(0, &item); |
1529 CFX_WideString wstrItem = | 1527 CFX_WideString wstrItem = |
1530 pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | 1528 pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
1531 if (wstrItem.IsEmpty()) { | 1529 if (wstrItem.IsEmpty()) { |
1532 wstrItem += (FX_WCHAR)item.m_CharCode; | 1530 wstrItem += (FX_WCHAR)item.m_CharCode; |
1533 } | 1531 } |
1534 FX_WCHAR curChar = wstrItem.GetAt(0); | 1532 FX_WCHAR curChar = wstrItem.GetAt(0); |
1535 if (WritingMode == 0) { | 1533 if (WritingMode == TextOrientation::Horizontal) { |
1536 if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { | 1534 if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { |
1537 FX_FLOAT top = | 1535 FX_FLOAT top = |
1538 this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top; | 1536 this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top; |
1539 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom | 1537 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom |
1540 : prev_rect.bottom; | 1538 : prev_rect.bottom; |
1541 if (bottom >= top) { | 1539 if (bottom >= top) { |
1542 if (IsHyphen(curChar)) { | 1540 return IsHyphen(curChar) ? GenerateCharacter::Hyphen |
1543 return 3; | 1541 : GenerateCharacter::LineBreak; |
1544 } | |
1545 return 2; | |
1546 } | 1542 } |
1547 } | 1543 } |
1548 } else if (WritingMode == 1) { | 1544 } else if (WritingMode == TextOrientation::Vertical) { |
1549 if (this_rect.Width() > pObj->GetFontSize() * 0.1f && | 1545 if (this_rect.Width() > pObj->GetFontSize() * 0.1f && |
1550 prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) { | 1546 prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) { |
1551 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left | 1547 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left |
1552 : m_CurlineRect.left; | 1548 : m_CurlineRect.left; |
1553 FX_FLOAT right = this_rect.right < m_CurlineRect.right | 1549 FX_FLOAT right = this_rect.right < m_CurlineRect.right |
1554 ? this_rect.right | 1550 ? this_rect.right |
1555 : m_CurlineRect.right; | 1551 : m_CurlineRect.right; |
1556 if (right <= left) { | 1552 if (right <= left) { |
1557 if (IsHyphen(curChar)) { | 1553 return IsHyphen(curChar) ? GenerateCharacter::Hyphen |
1558 return 3; | 1554 : GenerateCharacter::LineBreak; |
1559 } | |
1560 return 2; | |
1561 } | 1555 } |
1562 } | 1556 } |
1563 } | 1557 } |
1564 FX_FLOAT last_pos = PrevItem.m_OriginX; | 1558 FX_FLOAT last_pos = PrevItem.m_OriginX; |
1565 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont()); | 1559 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont()); |
1566 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000; | 1560 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000; |
1567 last_width = FXSYS_fabs(last_width); | 1561 last_width = FXSYS_fabs(last_width); |
1568 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); | 1562 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); |
1569 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; | 1563 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; |
1570 this_width = FXSYS_fabs(this_width); | 1564 this_width = FXSYS_fabs(this_width); |
1571 FX_FLOAT threshold = | 1565 FX_FLOAT threshold = |
1572 last_width > this_width ? last_width / 4 : this_width / 4; | 1566 last_width > this_width ? last_width / 4 : this_width / 4; |
1573 CFX_Matrix prev_matrix, prev_reverse; | 1567 CFX_Matrix prev_matrix, prev_reverse; |
1574 m_pPreTextObj->GetTextMatrix(&prev_matrix); | 1568 m_pPreTextObj->GetTextMatrix(&prev_matrix); |
1575 prev_matrix.Concat(m_perMatrix); | 1569 prev_matrix.Concat(m_perMatrix); |
1576 prev_reverse.SetReverse(prev_matrix); | 1570 prev_reverse.SetReverse(prev_matrix); |
1577 FX_FLOAT x = pObj->GetPosX(); | 1571 FX_FLOAT x = pObj->GetPosX(); |
1578 FX_FLOAT y = pObj->GetPosY(); | 1572 FX_FLOAT y = pObj->GetPosY(); |
1579 formMatrix.Transform(x, y); | 1573 formMatrix.Transform(x, y); |
1580 prev_reverse.Transform(x, y); | 1574 prev_reverse.Transform(x, y); |
1581 if (last_width < this_width) { | 1575 if (last_width < this_width) { |
1582 threshold = prev_reverse.TransformDistance(threshold); | 1576 threshold = prev_reverse.TransformDistance(threshold); |
1583 } | 1577 } |
1584 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, | 1578 bool bNewline = false; |
1585 m_pPreTextObj->m_Right, pObj->m_Top); | 1579 if (WritingMode == TextOrientation::Horizontal) { |
1586 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, | 1580 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, |
1587 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); | 1581 m_pPreTextObj->m_Right, pObj->m_Top); |
1588 CFX_FloatRect rect3 = rect1; | 1582 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, |
1589 rect1.Intersect(rect2); | 1583 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); |
1590 if (WritingMode == 0) { | 1584 CFX_FloatRect rect3 = rect1; |
| 1585 rect1.Intersect(rect2); |
1591 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) || | 1586 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) || |
1592 ((y > threshold * 2 || y < threshold * -3) && | 1587 ((y > threshold * 2 || y < threshold * -3) && |
1593 (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { | 1588 (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { |
1594 bNewline = TRUE; | 1589 bNewline = true; |
1595 if (nItem > 1) { | 1590 if (nItem > 1) { |
1596 CPDF_TextObjectItem tempItem; | 1591 CPDF_TextObjectItem tempItem; |
1597 m_pPreTextObj->GetItemInfo(0, &tempItem); | 1592 m_pPreTextObj->GetItemInfo(0, &tempItem); |
1598 CFX_Matrix m; | 1593 CFX_Matrix m; |
1599 m_pPreTextObj->GetTextMatrix(&m); | 1594 m_pPreTextObj->GetTextMatrix(&m); |
1600 if (PrevItem.m_OriginX > tempItem.m_OriginX && | 1595 if (PrevItem.m_OriginX > tempItem.m_OriginX && |
1601 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 && | 1596 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 && |
1602 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 && | 1597 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 && |
1603 m.c < 0.1) { | 1598 m.c < 0.1) { |
1604 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, | 1599 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, |
1605 m_pPreTextObj->m_Top); | 1600 m_pPreTextObj->m_Top); |
1606 if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) { | 1601 if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) { |
1607 bNewline = FALSE; | 1602 bNewline = false; |
1608 } else { | 1603 } else { |
1609 CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top); | 1604 CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top); |
1610 if (rect.Contains(m_pPreTextObj->GetPosX(), | 1605 if (rect.Contains(m_pPreTextObj->GetPosX(), |
1611 m_pPreTextObj->GetPosY())) { | 1606 m_pPreTextObj->GetPosY())) { |
1612 bNewline = FALSE; | 1607 bNewline = false; |
1613 } | 1608 } |
1614 } | 1609 } |
1615 } | 1610 } |
1616 } | 1611 } |
1617 } | 1612 } |
1618 } | 1613 } |
1619 if (bNewline) | 1614 if (bNewline) { |
1620 return IsHyphen(curChar) ? 3 : 2; | 1615 return IsHyphen(curChar) ? GenerateCharacter::Hyphen |
| 1616 : GenerateCharacter::LineBreak; |
| 1617 } |
1621 | 1618 |
1622 int32_t nChars = pObj->CountChars(); | 1619 int32_t nChars = pObj->CountChars(); |
1623 if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) && | 1620 if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) && |
1624 IsHyphen(curChar)) { | 1621 IsHyphen(curChar)) { |
1625 return 3; | 1622 return GenerateCharacter::Hyphen; |
1626 } | 1623 } |
1627 CFX_WideString PrevStr = | 1624 CFX_WideString PrevStr = |
1628 m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); | 1625 m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); |
1629 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1); | 1626 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1); |
1630 CFX_Matrix matrix; | 1627 CFX_Matrix matrix; |
1631 pObj->GetTextMatrix(&matrix); | 1628 pObj->GetTextMatrix(&matrix); |
1632 matrix.Concat(formMatrix); | 1629 matrix.Concat(formMatrix); |
1633 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); | 1630 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); |
1634 threshold = threshold > 400 | 1631 threshold = threshold > 400 |
1635 ? (threshold < 700 | 1632 ? (threshold < 700 |
(...skipping 10 matching lines...) Expand all Loading... |
1646 threshold /= 1000; | 1643 threshold /= 1000; |
1647 if ((threshold < 1.4881 && threshold > 1.4879) || | 1644 if ((threshold < 1.4881 && threshold > 1.4879) || |
1648 (threshold < 1.39001 && threshold > 1.38999)) { | 1645 (threshold < 1.39001 && threshold > 1.38999)) { |
1649 threshold *= 1.5; | 1646 threshold *= 1.5; |
1650 } | 1647 } |
1651 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && | 1648 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && |
1652 preChar != L' ') { | 1649 preChar != L' ') { |
1653 if (curChar != L' ' && preChar != L' ') { | 1650 if (curChar != L' ' && preChar != L' ') { |
1654 if ((x - last_pos - last_width) > threshold || | 1651 if ((x - last_pos - last_width) > threshold || |
1655 (last_pos - x - last_width) > threshold) { | 1652 (last_pos - x - last_width) > threshold) { |
1656 return 1; | 1653 return GenerateCharacter::Space; |
1657 } | 1654 } |
1658 if (x < 0 && (last_pos - x - last_width) > threshold) { | 1655 if (x < 0 && (last_pos - x - last_width) > threshold) { |
1659 return 1; | 1656 return GenerateCharacter::Space; |
1660 } | 1657 } |
1661 if ((x - last_pos - last_width) > this_width || | 1658 if ((x - last_pos - last_width) > this_width || |
1662 (x - last_pos - this_width) > last_width) { | 1659 (x - last_pos - this_width) > last_width) { |
1663 return 1; | 1660 return GenerateCharacter::Space; |
1664 } | 1661 } |
1665 } | 1662 } |
1666 } | 1663 } |
1667 return 0; | 1664 return GenerateCharacter::None; |
1668 } | 1665 } |
1669 | 1666 |
1670 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, | 1667 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
1671 CPDF_TextObject* pTextObj2) { | 1668 CPDF_TextObject* pTextObj2) { |
1672 if (!pTextObj1 || !pTextObj2) { | 1669 if (!pTextObj1 || !pTextObj2) { |
1673 return FALSE; | 1670 return FALSE; |
1674 } | 1671 } |
1675 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, | 1672 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
1676 pTextObj2->m_Right, pTextObj2->m_Top); | 1673 pTextObj2->m_Right, pTextObj2->m_Top); |
1677 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, | 1674 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1907 int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray); | 1904 int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray); |
1908 int nResultPos = 0; | 1905 int nResultPos = 0; |
1909 int nStartPos = 0; | 1906 int nStartPos = 0; |
1910 nStartPos = m_findNextStart; | 1907 nStartPos = m_findNextStart; |
1911 FX_BOOL bSpaceStart = FALSE; | 1908 FX_BOOL bSpaceStart = FALSE; |
1912 for (int iWord = 0; iWord < nCount; iWord++) { | 1909 for (int iWord = 0; iWord < nCount; iWord++) { |
1913 CFX_WideString csWord = m_csFindWhatArray[iWord]; | 1910 CFX_WideString csWord = m_csFindWhatArray[iWord]; |
1914 if (csWord.IsEmpty()) { | 1911 if (csWord.IsEmpty()) { |
1915 if (iWord == nCount - 1) { | 1912 if (iWord == nCount - 1) { |
1916 FX_WCHAR strInsert = m_strText.GetAt(nStartPos); | 1913 FX_WCHAR strInsert = m_strText.GetAt(nStartPos); |
1917 if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || | 1914 if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR || |
1918 strInsert == TEXT_RETURN_CHAR || strInsert == 160) { | 1915 strInsert == TEXT_RETURN_CHAR || strInsert == 160) { |
1919 nResultPos = nStartPos + 1; | 1916 nResultPos = nStartPos + 1; |
1920 break; | 1917 break; |
1921 } | 1918 } |
1922 iWord = -1; | 1919 iWord = -1; |
1923 } else if (iWord == 0) { | 1920 } else if (iWord == 0) { |
1924 bSpaceStart = TRUE; | 1921 bSpaceStart = TRUE; |
1925 } | 1922 } |
1926 continue; | 1923 continue; |
1927 } | 1924 } |
(...skipping 13 matching lines...) Expand all Loading... |
1941 int curChar = csWord.GetAt(0); | 1938 int curChar = csWord.GetAt(0); |
1942 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; | 1939 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; |
1943 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); | 1940 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); |
1944 if (nStartPos == nResultPos && | 1941 if (nStartPos == nResultPos && |
1945 !(IsIgnoreSpaceCharacter(lastChar) || | 1942 !(IsIgnoreSpaceCharacter(lastChar) || |
1946 IsIgnoreSpaceCharacter(curChar))) { | 1943 IsIgnoreSpaceCharacter(curChar))) { |
1947 bMatch = FALSE; | 1944 bMatch = FALSE; |
1948 } | 1945 } |
1949 for (int d = PreResEndPos; d < nResultPos; d++) { | 1946 for (int d = PreResEndPos; d < nResultPos; d++) { |
1950 FX_WCHAR strInsert = m_strText.GetAt(d); | 1947 FX_WCHAR strInsert = m_strText.GetAt(d); |
1951 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && | 1948 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && |
1952 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | 1949 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
1953 bMatch = FALSE; | 1950 bMatch = FALSE; |
1954 break; | 1951 break; |
1955 } | 1952 } |
1956 } | 1953 } |
1957 } else if (bSpaceStart) { | 1954 } else if (bSpaceStart) { |
1958 if (nResultPos > 0) { | 1955 if (nResultPos > 0) { |
1959 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); | 1956 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); |
1960 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && | 1957 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && |
1961 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | 1958 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
1962 bMatch = FALSE; | 1959 bMatch = FALSE; |
1963 m_resStart = nResultPos; | 1960 m_resStart = nResultPos; |
1964 } else { | 1961 } else { |
1965 m_resStart = nResultPos - 1; | 1962 m_resStart = nResultPos - 1; |
1966 } | 1963 } |
1967 } | 1964 } |
1968 } | 1965 } |
1969 if (m_bMatchWholeWord && bMatch) { | 1966 if (m_bMatchWholeWord && bMatch) { |
1970 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); | 1967 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2041 } | 2038 } |
2042 | 2039 |
2043 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { | 2040 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
2044 if (findwhat.IsEmpty()) { | 2041 if (findwhat.IsEmpty()) { |
2045 return; | 2042 return; |
2046 } | 2043 } |
2047 int index = 0; | 2044 int index = 0; |
2048 while (1) { | 2045 while (1) { |
2049 CFX_WideString csWord = TEXT_EMPTY; | 2046 CFX_WideString csWord = TEXT_EMPTY; |
2050 int ret = | 2047 int ret = |
2051 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); | 2048 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR); |
2052 if (csWord.IsEmpty()) { | 2049 if (csWord.IsEmpty()) { |
2053 if (ret) { | 2050 if (ret) { |
2054 m_csFindWhatArray.push_back(L""); | 2051 m_csFindWhatArray.push_back(L""); |
2055 index++; | 2052 index++; |
2056 continue; | 2053 continue; |
2057 } else { | 2054 } else { |
2058 break; | 2055 break; |
2059 } | 2056 } |
2060 } | 2057 } |
2061 int pos = 0; | 2058 int pos = 0; |
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2336 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; | 2333 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; |
2337 } | 2334 } |
2338 | 2335 |
2339 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { | 2336 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { |
2340 if (index >= m_LinkArray.size()) | 2337 if (index >= m_LinkArray.size()) |
2341 return std::vector<CFX_FloatRect>(); | 2338 return std::vector<CFX_FloatRect>(); |
2342 | 2339 |
2343 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, | 2340 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, |
2344 m_LinkArray[index].m_Count); | 2341 m_LinkArray[index].m_Count); |
2345 } | 2342 } |
OLD | NEW |