Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(526)

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 2064223002: Clean up CPDF_TextPage. (Closed) Base URL: https://pdfium.googlesource.com/pdfium@master
Patch Set: Fix breakage, do a bit more refactoring Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | core/fpdftext/include/cpdf_textpage.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <cwctype> 9 #include <cwctype>
10 #include <memory> 10 #include <memory>
(...skipping 21 matching lines...) Expand all
32 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 32 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
33 #define FPDFTEXT_CONSECUTIVE 0x00000004 33 #define FPDFTEXT_CONSECUTIVE 0x00000004
34 34
35 #define FPDFTEXT_CHAR_ERROR -1 35 #define FPDFTEXT_CHAR_ERROR -1
36 #define FPDFTEXT_CHAR_NORMAL 0 36 #define FPDFTEXT_CHAR_NORMAL 0
37 #define FPDFTEXT_CHAR_GENERATED 1 37 #define FPDFTEXT_CHAR_GENERATED 1
38 #define FPDFTEXT_CHAR_UNUNICODE 2 38 #define FPDFTEXT_CHAR_UNUNICODE 2
39 #define FPDFTEXT_CHAR_HYPHEN 3 39 #define FPDFTEXT_CHAR_HYPHEN 3
40 #define FPDFTEXT_CHAR_PIECE 4 40 #define FPDFTEXT_CHAR_PIECE 4
41 41
42 #define TEXT_BLANK_CHAR L' ' 42 #define TEXT_SPACE_CHAR L' '
43 #define TEXT_LINEFEED_CHAR L'\n' 43 #define TEXT_LINEFEED_CHAR L'\n'
44 #define TEXT_RETURN_CHAR L'\r' 44 #define TEXT_RETURN_CHAR L'\r'
45 #define TEXT_EMPTY L"" 45 #define TEXT_EMPTY L""
46 #define TEXT_BLANK L" " 46 #define TEXT_SPACE L" "
47 #define TEXT_RETURN_LINEFEED L"\r\n" 47 #define TEXT_RETURN_LINEFEED L"\r\n"
48 #define TEXT_LINEFEED L"\n" 48 #define TEXT_LINEFEED L"\n"
49 #define TEXT_CHARRATIO_GAPDELTA 0.070 49 #define TEXT_CHARRATIO_GAPDELTA 0.070
50 50
51 namespace { 51 namespace {
52 52
53 const FX_FLOAT kDefaultFontSize = 1.0f; 53 const FX_FLOAT kDefaultFontSize = 1.0f;
54 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { 54 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = {
55 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, 55 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
56 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; 56 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 return (FX_STRSIZE)wFind; 143 return (FX_STRSIZE)wFind;
144 } 144 }
145 145
146 } // namespace 146 } // namespace
147 147
148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) 148 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags)
149 : m_pPage(pPage), 149 : m_pPage(pPage),
150 m_parserflag(flags), 150 m_parserflag(flags),
151 m_pPreTextObj(nullptr), 151 m_pPreTextObj(nullptr),
152 m_bIsParsed(false), 152 m_bIsParsed(false),
153 m_TextlineDir(-1), 153 m_TextlineDir(TextOrientation::Unknown),
154 m_CurlineRect(0, 0, 0, 0) { 154 m_CurlineRect(0, 0, 0, 0) {
155 m_TextBuf.EstimateSize(0, 10240); 155 m_TextBuf.EstimateSize(0, 10240);
156 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 156 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
157 (int)pPage->GetPageHeight(), 0); 157 (int)pPage->GetPageHeight(), 0);
158 } 158 }
159 159
160 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 160 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
161 switch (charInfo.m_Unicode) { 161 switch (charInfo.m_Unicode) {
162 case 0x2: 162 case 0x2:
163 case 0x3: 163 case 0x3:
(...skipping 12 matching lines...) Expand all
176 void CPDF_TextPage::ParseTextPage() { 176 void CPDF_TextPage::ParseTextPage() {
177 m_bIsParsed = false; 177 m_bIsParsed = false;
178 m_TextBuf.Clear(); 178 m_TextBuf.Clear();
179 m_CharList.clear(); 179 m_CharList.clear();
180 m_pPreTextObj = nullptr; 180 m_pPreTextObj = nullptr;
181 ProcessObject(); 181 ProcessObject();
182 182
183 m_bIsParsed = true; 183 m_bIsParsed = true;
184 m_CharIndex.clear(); 184 m_CharIndex.clear();
185 int nCount = pdfium::CollectionSize<int>(m_CharList); 185 int nCount = pdfium::CollectionSize<int>(m_CharList);
186 if (nCount) { 186 if (nCount)
187 m_CharIndex.push_back(0); 187 m_CharIndex.push_back(0);
188 } 188
189 for (int i = 0; i < nCount; i++) { 189 for (int i = 0; i < nCount; i++) {
190 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 190 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
191 FX_BOOL bNormal = FALSE;
192 const PAGECHAR_INFO& charinfo = m_CharList[i]; 191 const PAGECHAR_INFO& charinfo = m_CharList[i];
193 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { 192 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED ||
194 bNormal = TRUE; 193 (charinfo.m_Unicode != 0 && !IsControlChar(charinfo))) {
195 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
196 bNormal = FALSE;
197 } else {
198 bNormal = TRUE;
199 }
200 if (bNormal) {
201 if (indexSize % 2) { 194 if (indexSize % 2) {
202 m_CharIndex.push_back(1); 195 m_CharIndex.push_back(1);
203 } else { 196 } else {
204 if (indexSize <= 0) { 197 if (indexSize <= 0)
205 continue; 198 continue;
206 }
207 m_CharIndex[indexSize - 1] += 1; 199 m_CharIndex[indexSize - 1] += 1;
208 } 200 }
209 } else { 201 } else {
210 if (indexSize % 2) { 202 if (indexSize % 2) {
211 if (indexSize <= 0) { 203 if (indexSize <= 0)
212 continue; 204 continue;
213 }
214 m_CharIndex[indexSize - 1] = i + 1; 205 m_CharIndex[indexSize - 1] = i + 1;
215 } else { 206 } else {
216 m_CharIndex.push_back(i + 1); 207 m_CharIndex.push_back(i + 1);
217 } 208 }
218 } 209 }
219 } 210 }
220 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 211 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
221 if (indexSize % 2) { 212 if (indexSize % 2)
222 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); 213 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
223 }
224 } 214 }
225 215
226 int CPDF_TextPage::CountChars() const { 216 int CPDF_TextPage::CountChars() const {
227 return pdfium::CollectionSize<int>(m_CharList); 217 return pdfium::CollectionSize<int>(m_CharList);
228 } 218 }
229 219
230 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { 220 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
231 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 221 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
232 int count = 0; 222 int count = 0;
233 for (int i = 0; i < indexSize; i += 2) { 223 for (int i = 0; i < indexSize; i += 2) {
(...skipping 444 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 } 668 }
679 if (segmentStatus == 1) { 669 if (segmentStatus == 1) {
680 segmentStatus = 2; 670 segmentStatus = 2;
681 m_Segments.Add(segment); 671 m_Segments.Add(segment);
682 segment.m_Start = 0; 672 segment.m_Start = 0;
683 segment.m_nCount = 0; 673 segment.m_nCount = 0;
684 } 674 }
685 return m_Segments.GetSize(); 675 return m_Segments.GetSize();
686 } 676 }
687 677
688 int32_t CPDF_TextPage::FindTextlineFlowDirection() { 678 CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation()
679 const {
689 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); 680 const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
690 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); 681 const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
691 std::vector<uint8_t> nHorizontalMask(nPageWidth); 682 std::vector<uint8_t> nHorizontalMask(nPageWidth);
692 std::vector<uint8_t> nVerticalMask(nPageHeight); 683 std::vector<uint8_t> nVerticalMask(nPageHeight);
693 uint8_t* pDataH = nHorizontalMask.data(); 684 uint8_t* pDataH = nHorizontalMask.data();
694 uint8_t* pDataV = nVerticalMask.data(); 685 uint8_t* pDataV = nVerticalMask.data();
695 int32_t index = 0; 686 int32_t index = 0;
696 FX_FLOAT fLineHeight = 0.0f; 687 FX_FLOAT fLineHeight = 0.0f;
697 if (m_pPage->GetPageObjectList()->empty()) 688 if (m_pPage->GetPageObjectList()->empty())
698 return -1; 689 return TextOrientation::Unknown;
699 690
700 for (auto& pPageObj : *m_pPage->GetPageObjectList()) { 691 for (auto& pPageObj : *m_pPage->GetPageObjectList()) {
701 if (!pPageObj || !pPageObj->IsText()) 692 if (!pPageObj || !pPageObj->IsText())
702 continue; 693 continue;
703 694
704 int32_t minH = 695 int32_t minH =
705 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; 696 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left;
706 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth 697 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth
707 ? nPageWidth 698 ? nPageWidth
708 : (int32_t)pPageObj->m_Right; 699 : (int32_t)pPageObj->m_Right;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
742 for (index = 0; index < nPageHeight; index++) { 733 for (index = 0; index < nPageHeight; index++) {
743 if (1 == nVerticalMask[index]) 734 if (1 == nVerticalMask[index])
744 break; 735 break;
745 } 736 }
746 nStartV = index; 737 nStartV = index;
747 for (index = nPageHeight; index > 0; index--) { 738 for (index = nPageHeight; index > 0; index--) {
748 if (1 == nVerticalMask[index - 1]) 739 if (1 == nVerticalMask[index - 1])
749 break; 740 break;
750 } 741 }
751 nEndV = index; 742 nEndV = index;
752 for (index = nStartV; index < nEndV; index++) { 743 for (index = nStartV; index < nEndV; index++)
753 nSumV += nVerticalMask[index]; 744 nSumV += nVerticalMask[index];
754 }
755 nSumV /= nEndV - nStartV; 745 nSumV /= nEndV - nStartV;
756 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { 746
757 return 0; 747 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight))
758 } 748 return TextOrientation::Horizontal;
759 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { 749 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight))
760 return 1; 750 return TextOrientation::Vertical;
761 } 751
762 if (nSumH > 0.8f) { 752 if (nSumH > 0.8f)
763 return 0; 753 return TextOrientation::Horizontal;
764 } 754
765 if (nSumH - nSumV > 0.0f) { 755 if (nSumH > nSumV)
766 return 0; 756 return TextOrientation::Horizontal;
767 } 757 if (nSumH < nSumV)
768 if (nSumV - nSumH > 0.0f) { 758 return TextOrientation::Vertical;
769 return 1; 759 return TextOrientation::Unknown;
770 } 760 }
771 return -1; 761
762 void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode,
763 const CFX_Matrix& formMatrix) {
764 PAGECHAR_INFO generateChar;
765 if (!GenerateCharInfo(unicode, generateChar))
766 return;
767
768 m_TextBuf.AppendChar(unicode);
769 if (!formMatrix.IsIdentity())
770 generateChar.m_Matrix.Copy(formMatrix);
771 m_CharList.push_back(generateChar);
772 } 772 }
773 773
774 void CPDF_TextPage::ProcessObject() { 774 void CPDF_TextPage::ProcessObject() {
775 if (m_pPage->GetPageObjectList()->empty()) 775 if (m_pPage->GetPageObjectList()->empty())
776 return; 776 return;
777 777
778 m_TextlineDir = FindTextlineFlowDirection(); 778 m_TextlineDir = FindTextlineFlowOrientation();
779 const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList(); 779 const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList();
780 for (auto it = pObjList->begin(); it != pObjList->end(); ++it) { 780 for (auto it = pObjList->begin(); it != pObjList->end(); ++it) {
781 if (CPDF_PageObject* pObj = it->get()) { 781 if (CPDF_PageObject* pObj = it->get()) {
782 if (pObj->IsText()) { 782 if (pObj->IsText()) {
783 CFX_Matrix matrix; 783 CFX_Matrix matrix;
784 ProcessTextObject(pObj->AsText(), matrix, pObjList, it); 784 ProcessTextObject(pObj->AsText(), matrix, pObjList, it);
785 } else if (pObj->IsForm()) { 785 } else if (pObj->IsForm()) {
786 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0); 786 CFX_Matrix formMatrix(1, 0, 0, 1, 0, 0);
787 ProcessFormObject(pObj->AsForm(), formMatrix); 787 ProcessFormObject(pObj->AsForm(), formMatrix);
788 } 788 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
826 CFX_ByteString str; 826 CFX_ByteString str;
827 pFont->AppendChar(str, charCode); 827 pFont->AppendChar(str, charCode);
828 if (int w = pFont->GetStringWidth(str.c_str(), 1)) 828 if (int w = pFont->GetStringWidth(str.c_str(), 1))
829 return w; 829 return w;
830 830
831 return pFont->GetCharBBox(charCode).Width(); 831 return pFont->GetCharBBox(charCode).Width();
832 } 832 }
833 833
834 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, 834 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
835 PAGECHAR_INFO info) { 835 PAGECHAR_INFO info) {
836 if (!IsControlChar(info)) { 836 if (IsControlChar(info)) {
837 info.m_Index = m_TextBuf.GetLength();
838 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
839 FX_WCHAR* pDst = nullptr;
840 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
841 if (nCount >= 1) {
842 pDst = FX_Alloc(FX_WCHAR, nCount);
843 Unicode_GetNormalization(wChar, pDst);
844 for (int nIndex = 0; nIndex < nCount; nIndex++) {
845 PAGECHAR_INFO info2 = info;
846 info2.m_Unicode = pDst[nIndex];
847 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
848 m_TextBuf.AppendChar(info2.m_Unicode);
849 m_CharList.push_back(info2);
850 }
851 FX_Free(pDst);
852 return;
853 }
854 }
855 m_TextBuf.AppendChar(wChar);
856 } else {
857 info.m_Index = -1; 837 info.m_Index = -1;
838 m_CharList.push_back(info);
839 return;
858 } 840 }
859 m_CharList.push_back(info);
860 }
861 841
862 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, 842 info.m_Index = m_TextBuf.GetLength();
863 PAGECHAR_INFO info) { 843 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
864 if (!IsControlChar(info)) {
865 info.m_Index = m_TextBuf.GetLength();
866 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
867 FX_WCHAR* pDst = nullptr; 844 FX_WCHAR* pDst = nullptr;
868 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); 845 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
869 if (nCount >= 1) { 846 if (nCount >= 1) {
870 pDst = FX_Alloc(FX_WCHAR, nCount); 847 pDst = FX_Alloc(FX_WCHAR, nCount);
871 Unicode_GetNormalization(wChar, pDst); 848 Unicode_GetNormalization(wChar, pDst);
872 for (int nIndex = 0; nIndex < nCount; nIndex++) { 849 for (int nIndex = 0; nIndex < nCount; nIndex++) {
873 PAGECHAR_INFO info2 = info; 850 PAGECHAR_INFO info2 = info;
874 info2.m_Unicode = pDst[nIndex]; 851 info2.m_Unicode = pDst[nIndex];
875 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 852 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
876 m_TextBuf.AppendChar(info2.m_Unicode); 853 m_TextBuf.AppendChar(info2.m_Unicode);
877 m_CharList.push_back(info2); 854 m_CharList.push_back(info2);
878 } 855 }
879 FX_Free(pDst); 856 FX_Free(pDst);
880 return; 857 return;
881 } 858 }
882 info.m_Unicode = wChar;
883 m_TextBuf.AppendChar(info.m_Unicode);
884 } else {
885 info.m_Index = -1;
886 } 859 }
860 m_TextBuf.AppendChar(wChar);
887 m_CharList.push_back(info); 861 m_CharList.push_back(info);
888 } 862 }
889 863
864 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
865 PAGECHAR_INFO info) {
866 if (IsControlChar(info)) {
867 info.m_Index = -1;
868 m_CharList.push_back(info);
869 return;
870 }
871
872 info.m_Index = m_TextBuf.GetLength();
873 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
874 FX_WCHAR* pDst = nullptr;
875 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
876 if (nCount >= 1) {
877 pDst = FX_Alloc(FX_WCHAR, nCount);
878 Unicode_GetNormalization(wChar, pDst);
879 for (int nIndex = 0; nIndex < nCount; nIndex++) {
880 PAGECHAR_INFO info2 = info;
881 info2.m_Unicode = pDst[nIndex];
882 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
883 m_TextBuf.AppendChar(info2.m_Unicode);
884 m_CharList.push_back(info2);
885 }
886 FX_Free(pDst);
887 return;
888 }
889 info.m_Unicode = wChar;
890 m_TextBuf.AppendChar(info.m_Unicode);
891 m_CharList.push_back(info);
892 }
893
890 void CPDF_TextPage::CloseTempLine() { 894 void CPDF_TextPage::CloseTempLine() {
891 if (m_TempCharList.empty()) 895 if (m_TempCharList.empty())
892 return; 896 return;
893 897
894 CFX_WideString str = m_TempTextBuf.MakeString(); 898 CFX_WideString str = m_TempTextBuf.MakeString();
895 FX_BOOL bPrevSpace = FALSE; 899 FX_BOOL bPrevSpace = FALSE;
896 for (int i = 0; i < str.GetLength(); i++) { 900 for (int i = 0; i < str.GetLength(); i++) {
897 if (str.GetAt(i) != ' ') { 901 if (str.GetAt(i) != ' ') {
898 bPrevSpace = FALSE; 902 bPrevSpace = FALSE;
899 continue; 903 continue;
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
1138 void CPDF_TextPage::FindPreviousTextObject() { 1142 void CPDF_TextPage::FindPreviousTextObject() {
1139 if (m_TempCharList.empty() && m_CharList.empty()) 1143 if (m_TempCharList.empty() && m_CharList.empty())
1140 return; 1144 return;
1141 1145
1142 PAGECHAR_INFO preChar = 1146 PAGECHAR_INFO preChar =
1143 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); 1147 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back();
1144 1148
1145 if (preChar.m_pTextObj) 1149 if (preChar.m_pTextObj)
1146 m_pPreTextObj = preChar.m_pTextObj; 1150 m_pPreTextObj = preChar.m_pTextObj;
1147 } 1151 }
1152
1148 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, 1153 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
1149 int32_t iBufStartAppend) { 1154 int32_t iBufStartAppend) {
1150 int32_t i = iCharListStartAppend; 1155 int32_t i = iCharListStartAppend;
1151 int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1; 1156 int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1;
1152 for (; i < j; i++, j--) { 1157 for (; i < j; i++, j--) {
1153 std::swap(m_TempCharList[i], m_TempCharList[j]); 1158 std::swap(m_TempCharList[i], m_TempCharList[j]);
1154 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); 1159 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
1155 } 1160 }
1156 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); 1161 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer();
1157 i = iBufStartAppend; 1162 i = iBufStartAppend;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1191 CPDF_Font* pFont = pTextObj->GetFont(); 1196 CPDF_Font* pFont = pTextObj->GetFont();
1192 CFX_Matrix matrix; 1197 CFX_Matrix matrix;
1193 pTextObj->GetTextMatrix(&matrix); 1198 pTextObj->GetTextMatrix(&matrix);
1194 matrix.Concat(formMatrix); 1199 matrix.Concat(formMatrix);
1195 FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj); 1200 FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj);
1196 if (ePreMKC == FPDFText_MarkedContent::Done) { 1201 if (ePreMKC == FPDFText_MarkedContent::Done) {
1197 m_pPreTextObj = pTextObj; 1202 m_pPreTextObj = pTextObj;
1198 m_perMatrix.Copy(formMatrix); 1203 m_perMatrix.Copy(formMatrix);
1199 return; 1204 return;
1200 } 1205 }
1201 int result = 0; 1206 GenerateCharacter result = GenerateCharacter::None;
1202 if (m_pPreTextObj) { 1207 if (m_pPreTextObj) {
1203 result = ProcessInsertObject(pTextObj, formMatrix); 1208 result = ProcessInsertObject(pTextObj, formMatrix);
1204 if (2 == result) { 1209 if (result == GenerateCharacter::LineBreak) {
1205 m_CurlineRect = 1210 m_CurlineRect =
1206 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, 1211 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
1207 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); 1212 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
1208 } else { 1213 } else {
1209 m_CurlineRect.Union( 1214 m_CurlineRect.Union(
1210 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, 1215 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
1211 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); 1216 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
1212 } 1217 }
1213 PAGECHAR_INFO generateChar; 1218 switch (result) {
1214 if (result == 1) { 1219 case GenerateCharacter::None:
1215 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { 1220 break;
1216 if (!formMatrix.IsIdentity()) { 1221 case GenerateCharacter::Space: {
1217 generateChar.m_Matrix.Copy(formMatrix); 1222 PAGECHAR_INFO generateChar;
1223 if (GenerateCharInfo(TEXT_SPACE_CHAR, generateChar)) {
1224 if (!formMatrix.IsIdentity())
1225 generateChar.m_Matrix.Copy(formMatrix);
1226 m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR);
1227 m_TempCharList.push_back(generateChar);
1218 } 1228 }
1219 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1229 break;
1220 m_TempCharList.push_back(generateChar);
1221 } 1230 }
1222 } else if (result == 2) { 1231 case GenerateCharacter::LineBreak:
1223 CloseTempLine(); 1232 CloseTempLine();
1224 if (m_TextBuf.GetSize()) { 1233 if (m_TextBuf.GetSize()) {
1225 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { 1234 AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix);
1226 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); 1235 AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix);
1227 if (!formMatrix.IsIdentity()) { 1236 }
1228 generateChar.m_Matrix.Copy(formMatrix); 1237 break;
1238 case GenerateCharacter::Hyphen:
1239 if (pTextObj->CountChars() == 1) {
1240 CPDF_TextObjectItem item;
1241 pTextObj->GetCharInfo(0, &item);
1242 CFX_WideString wstrItem =
1243 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
1244 if (wstrItem.IsEmpty()) {
1245 wstrItem += (FX_WCHAR)item.m_CharCode;
1229 } 1246 }
1230 m_CharList.push_back(generateChar); 1247 FX_WCHAR curChar = wstrItem.GetAt(0);
1248 if (curChar == 0x2D || curChar == 0xAD)
1249 return;
1231 } 1250 }
1232 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { 1251 while (m_TempTextBuf.GetSize() > 0 &&
1233 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); 1252 m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
1234 if (!formMatrix.IsIdentity()) { 1253 0x20) {
1235 generateChar.m_Matrix.Copy(formMatrix); 1254 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1236 } 1255 m_TempCharList.pop_back();
1237 m_CharList.push_back(generateChar);
1238 } 1256 }
1239 } 1257 PAGECHAR_INFO* charinfo = &m_TempCharList.back();
1240 } else if (result == 3) {
1241 int32_t nChars = pTextObj->CountChars();
1242 if (nChars == 1) {
1243 CPDF_TextObjectItem item;
1244 pTextObj->GetCharInfo(0, &item);
1245 CFX_WideString wstrItem =
1246 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
1247 if (wstrItem.IsEmpty()) {
1248 wstrItem += (FX_WCHAR)item.m_CharCode;
1249 }
1250 FX_WCHAR curChar = wstrItem.GetAt(0);
1251 if (0x2D == curChar || 0xAD == curChar) {
1252 return;
1253 }
1254 }
1255 while (m_TempTextBuf.GetSize() > 0 &&
1256 m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
1257 0x20) {
1258 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1258 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1259 m_TempCharList.pop_back(); 1259 charinfo->m_Unicode = 0x2;
1260 } 1260 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
1261 PAGECHAR_INFO* charinfo = &m_TempCharList.back(); 1261 m_TempTextBuf.AppendChar(0xfffe);
1262 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1262 break;
1263 charinfo->m_Unicode = 0x2;
1264 charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
1265 m_TempTextBuf.AppendChar(0xfffe);
1266 } 1263 }
1267 } else { 1264 } else {
1268 m_CurlineRect = 1265 m_CurlineRect =
1269 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, 1266 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
1270 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); 1267 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
1271 } 1268 }
1272 if (ePreMKC == FPDFText_MarkedContent::Delay) { 1269 if (ePreMKC == FPDFText_MarkedContent::Delay) {
1273 ProcessMarkedContent(Obj); 1270 ProcessMarkedContent(Obj);
1274 m_pPreTextObj = pTextObj; 1271 m_pPreTextObj = pTextObj;
1275 m_perMatrix.Copy(formMatrix); 1272 m_perMatrix.Copy(formMatrix);
(...skipping 16 matching lines...) Expand all
1292 CPDF_TextObjectItem item; 1289 CPDF_TextObjectItem item;
1293 PAGECHAR_INFO charinfo; 1290 PAGECHAR_INFO charinfo;
1294 charinfo.m_OriginX = 0; 1291 charinfo.m_OriginX = 0;
1295 charinfo.m_OriginY = 0; 1292 charinfo.m_OriginY = 0;
1296 pTextObj->GetItemInfo(i, &item); 1293 pTextObj->GetItemInfo(i, &item);
1297 if (item.m_CharCode == (uint32_t)-1) { 1294 if (item.m_CharCode == (uint32_t)-1) {
1298 CFX_WideString str = m_TempTextBuf.MakeString(); 1295 CFX_WideString str = m_TempTextBuf.MakeString();
1299 if (str.IsEmpty()) { 1296 if (str.IsEmpty()) {
1300 str = m_TextBuf.AsStringC(); 1297 str = m_TextBuf.AsStringC();
1301 } 1298 }
1302 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { 1299 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR)
1303 continue; 1300 continue;
1304 } 1301
1305 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); 1302 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
1306 spacing = -fontsize_h * item.m_OriginX / 1000; 1303 spacing = -fontsize_h * item.m_OriginX / 1000;
1307 continue; 1304 continue;
1308 } 1305 }
1309 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace; 1306 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace;
1310 if (charSpace > 0.001) { 1307 if (charSpace > 0.001) {
1311 spacing += matrix.TransformDistance(charSpace); 1308 spacing += matrix.TransformDistance(charSpace);
1312 } else if (charSpace < -0.001) { 1309 } else if (charSpace < -0.001) {
1313 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); 1310 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
1314 } 1311 }
(...skipping 13 matching lines...) Expand all
1328 } 1325 }
1329 if (threshold == 0) { 1326 if (threshold == 0) {
1330 threshold = fontsize_h; 1327 threshold = fontsize_h;
1331 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); 1328 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
1332 threshold = this_width > last_width ? (FX_FLOAT)this_width 1329 threshold = this_width > last_width ? (FX_FLOAT)this_width
1333 : (FX_FLOAT)last_width; 1330 : (FX_FLOAT)last_width;
1334 threshold = NormalizeThreshold(threshold); 1331 threshold = NormalizeThreshold(threshold);
1335 threshold = fontsize_h * threshold / 1000; 1332 threshold = fontsize_h * threshold / 1000;
1336 } 1333 }
1337 if (threshold && (spacing && spacing >= threshold)) { 1334 if (threshold && (spacing && spacing >= threshold)) {
1338 charinfo.m_Unicode = TEXT_BLANK_CHAR; 1335 charinfo.m_Unicode = TEXT_SPACE_CHAR;
1339 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; 1336 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
1340 charinfo.m_pTextObj = pTextObj; 1337 charinfo.m_pTextObj = pTextObj;
1341 charinfo.m_Index = m_TextBuf.GetLength(); 1338 charinfo.m_Index = m_TextBuf.GetLength();
1342 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1339 m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR);
1343 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; 1340 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode;
1344 charinfo.m_Matrix.Copy(formMatrix); 1341 charinfo.m_Matrix.Copy(formMatrix);
1345 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, 1342 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX,
1346 charinfo.m_OriginY); 1343 charinfo.m_OriginY);
1347 charinfo.m_CharBox = 1344 charinfo.m_CharBox =
1348 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, 1345 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY,
1349 charinfo.m_OriginX, charinfo.m_OriginY); 1346 charinfo.m_OriginX, charinfo.m_OriginY);
1350 m_TempCharList.push_back(charinfo); 1347 m_TempCharList.push_back(charinfo);
1351 } 1348 }
1352 if (item.m_CharCode == CPDF_Font::kInvalidCharCode) { 1349 if (item.m_CharCode == CPDF_Font::kInvalidCharCode) {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
1426 charinfo.m_Index = m_TextBuf.GetLength(); 1423 charinfo.m_Index = m_TextBuf.GetLength();
1427 m_TempTextBuf.AppendChar(charinfo.m_Unicode); 1424 m_TempTextBuf.AppendChar(charinfo.m_Unicode);
1428 } else { 1425 } else {
1429 m_TempTextBuf.AppendChar(0xfffe); 1426 m_TempTextBuf.AppendChar(0xfffe);
1430 } 1427 }
1431 m_TempCharList.push_back(charinfo); 1428 m_TempCharList.push_back(charinfo);
1432 } 1429 }
1433 } else if (i == 0) { 1430 } else if (i == 0) {
1434 CFX_WideString str = m_TempTextBuf.MakeString(); 1431 CFX_WideString str = m_TempTextBuf.MakeString();
1435 if (!str.IsEmpty() && 1432 if (!str.IsEmpty() &&
1436 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { 1433 str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) {
1437 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1434 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1438 m_TempCharList.pop_back(); 1435 m_TempCharList.pop_back();
1439 } 1436 }
1440 } 1437 }
1441 } 1438 }
1442 } 1439 }
1443 if (bIsBidiAndMirrorInverse) { 1440 if (bIsBidiAndMirrorInverse) {
1444 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); 1441 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
1445 } 1442 }
1446 } 1443 }
1447 1444
1448 int32_t CPDF_TextPage::GetTextObjectWritingMode( 1445 CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
1449 const CPDF_TextObject* pTextObj) { 1446 const CPDF_TextObject* pTextObj) const {
1450 int32_t nChars = pTextObj->CountChars(); 1447 int32_t nChars = pTextObj->CountChars();
1451 if (nChars == 1) { 1448 if (nChars == 1)
1452 return m_TextlineDir; 1449 return m_TextlineDir;
1453 } 1450
1454 CPDF_TextObjectItem first, last; 1451 CPDF_TextObjectItem first, last;
1455 pTextObj->GetCharInfo(0, &first); 1452 pTextObj->GetCharInfo(0, &first);
1456 pTextObj->GetCharInfo(nChars - 1, &last); 1453 pTextObj->GetCharInfo(nChars - 1, &last);
1457 CFX_Matrix textMatrix; 1454 CFX_Matrix textMatrix;
1458 pTextObj->GetTextMatrix(&textMatrix); 1455 pTextObj->GetTextMatrix(&textMatrix);
1459 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY); 1456 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY);
1460 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); 1457 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
1461 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); 1458 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
1462 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); 1459 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
1463 if (dX <= 0.0001f && dY <= 0.0001f) { 1460 if (dX <= 0.0001f && dY <= 0.0001f)
1464 return -1; 1461 return TextOrientation::Unknown;
1465 } 1462
1466 CFX_VectorF v(dX, dY); 1463 CFX_VectorF v(dX, dY);
1467 v.Normalize(); 1464 v.Normalize();
1468 if (v.y <= 0.0872f) { 1465 if (v.y <= 0.0872f)
1469 return v.x <= 0.0872f ? m_TextlineDir : 0; 1466 return v.x <= 0.0872f ? m_TextlineDir : TextOrientation::Horizontal;
1470 } 1467
1471 if (v.x <= 0.0872f) { 1468 if (v.x <= 0.0872f)
1472 return 1; 1469 return TextOrientation::Vertical;
1473 } 1470
1474 return m_TextlineDir; 1471 return m_TextlineDir;
1475 } 1472 }
1473
1476 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { 1474 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
1477 CFX_WideString strCurText = m_TempTextBuf.MakeString(); 1475 CFX_WideString strCurText = m_TempTextBuf.MakeString();
1478 if (strCurText.GetLength() == 0) { 1476 if (strCurText.GetLength() == 0) {
1479 strCurText = m_TextBuf.AsStringC(); 1477 strCurText = m_TextBuf.AsStringC();
1480 } 1478 }
1481 FX_STRSIZE nCount = strCurText.GetLength(); 1479 FX_STRSIZE nCount = strCurText.GetLength();
1482 int nIndex = nCount - 1; 1480 int nIndex = nCount - 1;
1483 FX_WCHAR wcTmp = strCurText.GetAt(nIndex); 1481 FX_WCHAR wcTmp = strCurText.GetAt(nIndex);
1484 while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) { 1482 while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) {
1485 wcTmp = strCurText.GetAt(--nIndex); 1483 wcTmp = strCurText.GetAt(--nIndex);
(...skipping 17 matching lines...) Expand all
1503 return FALSE; 1501 return FALSE;
1504 } 1502 }
1505 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && 1503 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
1506 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { 1504 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
1507 return TRUE; 1505 return TRUE;
1508 } 1506 }
1509 } 1507 }
1510 return FALSE; 1508 return FALSE;
1511 } 1509 }
1512 1510
1513 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, 1511 CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
1514 const CFX_Matrix& formMatrix) { 1512 const CPDF_TextObject* pObj,
1513 const CFX_Matrix& formMatrix) {
1515 FindPreviousTextObject(); 1514 FindPreviousTextObject();
1516 FX_BOOL bNewline = FALSE; 1515 TextOrientation WritingMode = GetTextObjectWritingMode(pObj);
1517 int WritingMode = GetTextObjectWritingMode(pObj); 1516 if (WritingMode == TextOrientation::Unknown)
1518 if (WritingMode == -1) {
1519 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); 1517 WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
1520 } 1518
1521 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, 1519 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
1522 pObj->m_Top); 1520 pObj->m_Top);
1523 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, 1521 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
1524 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); 1522 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
1525 CPDF_TextObjectItem PrevItem, item; 1523 CPDF_TextObjectItem PrevItem, item;
1526 int nItem = m_pPreTextObj->CountItems(); 1524 int nItem = m_pPreTextObj->CountItems();
1527 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); 1525 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem);
1528 pObj->GetItemInfo(0, &item); 1526 pObj->GetItemInfo(0, &item);
1529 CFX_WideString wstrItem = 1527 CFX_WideString wstrItem =
1530 pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); 1528 pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
1531 if (wstrItem.IsEmpty()) { 1529 if (wstrItem.IsEmpty()) {
1532 wstrItem += (FX_WCHAR)item.m_CharCode; 1530 wstrItem += (FX_WCHAR)item.m_CharCode;
1533 } 1531 }
1534 FX_WCHAR curChar = wstrItem.GetAt(0); 1532 FX_WCHAR curChar = wstrItem.GetAt(0);
1535 if (WritingMode == 0) { 1533 if (WritingMode == TextOrientation::Horizontal) {
1536 if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { 1534 if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) {
1537 FX_FLOAT top = 1535 FX_FLOAT top =
1538 this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top; 1536 this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top;
1539 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom 1537 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom
1540 : prev_rect.bottom; 1538 : prev_rect.bottom;
1541 if (bottom >= top) { 1539 if (bottom >= top) {
1542 if (IsHyphen(curChar)) { 1540 return IsHyphen(curChar) ? GenerateCharacter::Hyphen
1543 return 3; 1541 : GenerateCharacter::LineBreak;
1544 }
1545 return 2;
1546 } 1542 }
1547 } 1543 }
1548 } else if (WritingMode == 1) { 1544 } else if (WritingMode == TextOrientation::Vertical) {
1549 if (this_rect.Width() > pObj->GetFontSize() * 0.1f && 1545 if (this_rect.Width() > pObj->GetFontSize() * 0.1f &&
1550 prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) { 1546 prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) {
1551 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left 1547 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left
1552 : m_CurlineRect.left; 1548 : m_CurlineRect.left;
1553 FX_FLOAT right = this_rect.right < m_CurlineRect.right 1549 FX_FLOAT right = this_rect.right < m_CurlineRect.right
1554 ? this_rect.right 1550 ? this_rect.right
1555 : m_CurlineRect.right; 1551 : m_CurlineRect.right;
1556 if (right <= left) { 1552 if (right <= left) {
1557 if (IsHyphen(curChar)) { 1553 return IsHyphen(curChar) ? GenerateCharacter::Hyphen
1558 return 3; 1554 : GenerateCharacter::LineBreak;
1559 }
1560 return 2;
1561 } 1555 }
1562 } 1556 }
1563 } 1557 }
1564 FX_FLOAT last_pos = PrevItem.m_OriginX; 1558 FX_FLOAT last_pos = PrevItem.m_OriginX;
1565 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont()); 1559 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont());
1566 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000; 1560 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000;
1567 last_width = FXSYS_fabs(last_width); 1561 last_width = FXSYS_fabs(last_width);
1568 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); 1562 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
1569 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; 1563 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
1570 this_width = FXSYS_fabs(this_width); 1564 this_width = FXSYS_fabs(this_width);
1571 FX_FLOAT threshold = 1565 FX_FLOAT threshold =
1572 last_width > this_width ? last_width / 4 : this_width / 4; 1566 last_width > this_width ? last_width / 4 : this_width / 4;
1573 CFX_Matrix prev_matrix, prev_reverse; 1567 CFX_Matrix prev_matrix, prev_reverse;
1574 m_pPreTextObj->GetTextMatrix(&prev_matrix); 1568 m_pPreTextObj->GetTextMatrix(&prev_matrix);
1575 prev_matrix.Concat(m_perMatrix); 1569 prev_matrix.Concat(m_perMatrix);
1576 prev_reverse.SetReverse(prev_matrix); 1570 prev_reverse.SetReverse(prev_matrix);
1577 FX_FLOAT x = pObj->GetPosX(); 1571 FX_FLOAT x = pObj->GetPosX();
1578 FX_FLOAT y = pObj->GetPosY(); 1572 FX_FLOAT y = pObj->GetPosY();
1579 formMatrix.Transform(x, y); 1573 formMatrix.Transform(x, y);
1580 prev_reverse.Transform(x, y); 1574 prev_reverse.Transform(x, y);
1581 if (last_width < this_width) { 1575 if (last_width < this_width) {
1582 threshold = prev_reverse.TransformDistance(threshold); 1576 threshold = prev_reverse.TransformDistance(threshold);
1583 } 1577 }
1584 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, 1578 bool bNewline = false;
1585 m_pPreTextObj->m_Right, pObj->m_Top); 1579 if (WritingMode == TextOrientation::Horizontal) {
1586 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, 1580 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom,
1587 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); 1581 m_pPreTextObj->m_Right, pObj->m_Top);
1588 CFX_FloatRect rect3 = rect1; 1582 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
1589 rect1.Intersect(rect2); 1583 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
1590 if (WritingMode == 0) { 1584 CFX_FloatRect rect3 = rect1;
1585 rect1.Intersect(rect2);
1591 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) || 1586 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) ||
1592 ((y > threshold * 2 || y < threshold * -3) && 1587 ((y > threshold * 2 || y < threshold * -3) &&
1593 (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { 1588 (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) {
1594 bNewline = TRUE; 1589 bNewline = true;
1595 if (nItem > 1) { 1590 if (nItem > 1) {
1596 CPDF_TextObjectItem tempItem; 1591 CPDF_TextObjectItem tempItem;
1597 m_pPreTextObj->GetItemInfo(0, &tempItem); 1592 m_pPreTextObj->GetItemInfo(0, &tempItem);
1598 CFX_Matrix m; 1593 CFX_Matrix m;
1599 m_pPreTextObj->GetTextMatrix(&m); 1594 m_pPreTextObj->GetTextMatrix(&m);
1600 if (PrevItem.m_OriginX > tempItem.m_OriginX && 1595 if (PrevItem.m_OriginX > tempItem.m_OriginX &&
1601 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 && 1596 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 &&
1602 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 && 1597 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 &&
1603 m.c < 0.1) { 1598 m.c < 0.1) {
1604 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, 1599 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000,
1605 m_pPreTextObj->m_Top); 1600 m_pPreTextObj->m_Top);
1606 if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) { 1601 if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) {
1607 bNewline = FALSE; 1602 bNewline = false;
1608 } else { 1603 } else {
1609 CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top); 1604 CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top);
1610 if (rect.Contains(m_pPreTextObj->GetPosX(), 1605 if (rect.Contains(m_pPreTextObj->GetPosX(),
1611 m_pPreTextObj->GetPosY())) { 1606 m_pPreTextObj->GetPosY())) {
1612 bNewline = FALSE; 1607 bNewline = false;
1613 } 1608 }
1614 } 1609 }
1615 } 1610 }
1616 } 1611 }
1617 } 1612 }
1618 } 1613 }
1619 if (bNewline) 1614 if (bNewline) {
1620 return IsHyphen(curChar) ? 3 : 2; 1615 return IsHyphen(curChar) ? GenerateCharacter::Hyphen
1616 : GenerateCharacter::LineBreak;
1617 }
1621 1618
1622 int32_t nChars = pObj->CountChars(); 1619 int32_t nChars = pObj->CountChars();
1623 if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) && 1620 if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) &&
1624 IsHyphen(curChar)) { 1621 IsHyphen(curChar)) {
1625 return 3; 1622 return GenerateCharacter::Hyphen;
1626 } 1623 }
1627 CFX_WideString PrevStr = 1624 CFX_WideString PrevStr =
1628 m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); 1625 m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
1629 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1); 1626 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1);
1630 CFX_Matrix matrix; 1627 CFX_Matrix matrix;
1631 pObj->GetTextMatrix(&matrix); 1628 pObj->GetTextMatrix(&matrix);
1632 matrix.Concat(formMatrix); 1629 matrix.Concat(formMatrix);
1633 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); 1630 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
1634 threshold = threshold > 400 1631 threshold = threshold > 400
1635 ? (threshold < 700 1632 ? (threshold < 700
(...skipping 10 matching lines...) Expand all
1646 threshold /= 1000; 1643 threshold /= 1000;
1647 if ((threshold < 1.4881 && threshold > 1.4879) || 1644 if ((threshold < 1.4881 && threshold > 1.4879) ||
1648 (threshold < 1.39001 && threshold > 1.38999)) { 1645 (threshold < 1.39001 && threshold > 1.38999)) {
1649 threshold *= 1.5; 1646 threshold *= 1.5;
1650 } 1647 }
1651 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && 1648 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
1652 preChar != L' ') { 1649 preChar != L' ') {
1653 if (curChar != L' ' && preChar != L' ') { 1650 if (curChar != L' ' && preChar != L' ') {
1654 if ((x - last_pos - last_width) > threshold || 1651 if ((x - last_pos - last_width) > threshold ||
1655 (last_pos - x - last_width) > threshold) { 1652 (last_pos - x - last_width) > threshold) {
1656 return 1; 1653 return GenerateCharacter::Space;
1657 } 1654 }
1658 if (x < 0 && (last_pos - x - last_width) > threshold) { 1655 if (x < 0 && (last_pos - x - last_width) > threshold) {
1659 return 1; 1656 return GenerateCharacter::Space;
1660 } 1657 }
1661 if ((x - last_pos - last_width) > this_width || 1658 if ((x - last_pos - last_width) > this_width ||
1662 (x - last_pos - this_width) > last_width) { 1659 (x - last_pos - this_width) > last_width) {
1663 return 1; 1660 return GenerateCharacter::Space;
1664 } 1661 }
1665 } 1662 }
1666 } 1663 }
1667 return 0; 1664 return GenerateCharacter::None;
1668 } 1665 }
1669 1666
1670 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, 1667 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
1671 CPDF_TextObject* pTextObj2) { 1668 CPDF_TextObject* pTextObj2) {
1672 if (!pTextObj1 || !pTextObj2) { 1669 if (!pTextObj1 || !pTextObj2) {
1673 return FALSE; 1670 return FALSE;
1674 } 1671 }
1675 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, 1672 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom,
1676 pTextObj2->m_Right, pTextObj2->m_Top); 1673 pTextObj2->m_Right, pTextObj2->m_Top);
1677 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, 1674 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom,
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
1907 int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray); 1904 int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
1908 int nResultPos = 0; 1905 int nResultPos = 0;
1909 int nStartPos = 0; 1906 int nStartPos = 0;
1910 nStartPos = m_findNextStart; 1907 nStartPos = m_findNextStart;
1911 FX_BOOL bSpaceStart = FALSE; 1908 FX_BOOL bSpaceStart = FALSE;
1912 for (int iWord = 0; iWord < nCount; iWord++) { 1909 for (int iWord = 0; iWord < nCount; iWord++) {
1913 CFX_WideString csWord = m_csFindWhatArray[iWord]; 1910 CFX_WideString csWord = m_csFindWhatArray[iWord];
1914 if (csWord.IsEmpty()) { 1911 if (csWord.IsEmpty()) {
1915 if (iWord == nCount - 1) { 1912 if (iWord == nCount - 1) {
1916 FX_WCHAR strInsert = m_strText.GetAt(nStartPos); 1913 FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
1917 if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || 1914 if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR ||
1918 strInsert == TEXT_RETURN_CHAR || strInsert == 160) { 1915 strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
1919 nResultPos = nStartPos + 1; 1916 nResultPos = nStartPos + 1;
1920 break; 1917 break;
1921 } 1918 }
1922 iWord = -1; 1919 iWord = -1;
1923 } else if (iWord == 0) { 1920 } else if (iWord == 0) {
1924 bSpaceStart = TRUE; 1921 bSpaceStart = TRUE;
1925 } 1922 }
1926 continue; 1923 continue;
1927 } 1924 }
(...skipping 13 matching lines...) Expand all
1941 int curChar = csWord.GetAt(0); 1938 int curChar = csWord.GetAt(0);
1942 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; 1939 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
1943 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); 1940 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
1944 if (nStartPos == nResultPos && 1941 if (nStartPos == nResultPos &&
1945 !(IsIgnoreSpaceCharacter(lastChar) || 1942 !(IsIgnoreSpaceCharacter(lastChar) ||
1946 IsIgnoreSpaceCharacter(curChar))) { 1943 IsIgnoreSpaceCharacter(curChar))) {
1947 bMatch = FALSE; 1944 bMatch = FALSE;
1948 } 1945 }
1949 for (int d = PreResEndPos; d < nResultPos; d++) { 1946 for (int d = PreResEndPos; d < nResultPos; d++) {
1950 FX_WCHAR strInsert = m_strText.GetAt(d); 1947 FX_WCHAR strInsert = m_strText.GetAt(d);
1951 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && 1948 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
1952 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { 1949 strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
1953 bMatch = FALSE; 1950 bMatch = FALSE;
1954 break; 1951 break;
1955 } 1952 }
1956 } 1953 }
1957 } else if (bSpaceStart) { 1954 } else if (bSpaceStart) {
1958 if (nResultPos > 0) { 1955 if (nResultPos > 0) {
1959 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); 1956 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
1960 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && 1957 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
1961 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { 1958 strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
1962 bMatch = FALSE; 1959 bMatch = FALSE;
1963 m_resStart = nResultPos; 1960 m_resStart = nResultPos;
1964 } else { 1961 } else {
1965 m_resStart = nResultPos - 1; 1962 m_resStart = nResultPos - 1;
1966 } 1963 }
1967 } 1964 }
1968 } 1965 }
1969 if (m_bMatchWholeWord && bMatch) { 1966 if (m_bMatchWholeWord && bMatch) {
1970 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); 1967 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
2041 } 2038 }
2042 2039
2043 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { 2040 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
2044 if (findwhat.IsEmpty()) { 2041 if (findwhat.IsEmpty()) {
2045 return; 2042 return;
2046 } 2043 }
2047 int index = 0; 2044 int index = 0;
2048 while (1) { 2045 while (1) {
2049 CFX_WideString csWord = TEXT_EMPTY; 2046 CFX_WideString csWord = TEXT_EMPTY;
2050 int ret = 2047 int ret =
2051 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); 2048 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR);
2052 if (csWord.IsEmpty()) { 2049 if (csWord.IsEmpty()) {
2053 if (ret) { 2050 if (ret) {
2054 m_csFindWhatArray.push_back(L""); 2051 m_csFindWhatArray.push_back(L"");
2055 index++; 2052 index++;
2056 continue; 2053 continue;
2057 } else { 2054 } else {
2058 break; 2055 break;
2059 } 2056 }
2060 } 2057 }
2061 int pos = 0; 2058 int pos = 0;
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
2336 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; 2333 return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
2337 } 2334 }
2338 2335
2339 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const { 2336 std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const {
2340 if (index >= m_LinkArray.size()) 2337 if (index >= m_LinkArray.size())
2341 return std::vector<CFX_FloatRect>(); 2338 return std::vector<CFX_FloatRect>();
2342 2339
2343 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, 2340 return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
2344 m_LinkArray[index].m_Count); 2341 m_LinkArray[index].m_Count);
2345 } 2342 }
OLDNEW
« no previous file with comments | « no previous file | core/fpdftext/include/cpdf_textpage.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698