| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <cctype> | 10 #include <cctype> |
| (...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 907 w = pFont->GetStringWidth(str, 1); | 907 w = pFont->GetStringWidth(str, 1); |
| 908 if (w == 0) { | 908 if (w == 0) { |
| 909 FX_RECT BBox; | 909 FX_RECT BBox; |
| 910 pFont->GetCharBBox(charCode, BBox); | 910 pFont->GetCharBBox(charCode, BBox); |
| 911 w = BBox.right - BBox.left; | 911 w = BBox.right - BBox.left; |
| 912 } | 912 } |
| 913 } | 913 } |
| 914 return w; | 914 return w; |
| 915 } | 915 } |
| 916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { | 916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { |
| 917 int32_t start, count; | 917 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); |
| 918 CFX_BidiChar::Direction ret = pBidi->GetBidiInfo(&start, &count); | 918 if (seg.direction == CFX_BidiChar::RIGHT) { |
| 919 if (ret == CFX_BidiChar::RIGHT) { | 919 for (int i = seg.start + seg.count; i > seg.start; i--) { |
| 920 for (int i = start + count - 1; i >= start; i--) { | 920 m_TextBuf.AppendChar(str.GetAt(i - i)); |
| 921 m_TextBuf.AppendChar(str.GetAt(i)); | 921 m_CharList.push_back(m_TempCharList[i - 1]); |
| 922 m_CharList.push_back(m_TempCharList[i]); | |
| 923 } | 922 } |
| 924 } else { | 923 } else { |
| 925 int end = start + count; | 924 for (int i = seg.start; i < seg.start + seg.count; i++) { |
| 926 for (int i = start; i < end; i++) { | |
| 927 m_TextBuf.AppendChar(str.GetAt(i)); | 925 m_TextBuf.AppendChar(str.GetAt(i)); |
| 928 m_CharList.push_back(m_TempCharList[i]); | 926 m_CharList.push_back(m_TempCharList[i]); |
| 929 } | 927 } |
| 930 } | 928 } |
| 931 } | 929 } |
| 932 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { | 930 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| 933 PAGECHAR_INFO info = m_TempCharList[i]; | 931 PAGECHAR_INFO info) { |
| 934 FX_WCHAR wChar = str.GetAt(i); | |
| 935 if (!IsControlChar(info)) { | 932 if (!IsControlChar(info)) { |
| 936 info.m_Index = m_TextBuf.GetLength(); | 933 info.m_Index = m_TextBuf.GetLength(); |
| 937 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 934 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| 938 FX_WCHAR* pDst = NULL; | 935 FX_WCHAR* pDst = NULL; |
| 939 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 936 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 940 if (nCount >= 1) { | 937 if (nCount >= 1) { |
| 941 pDst = FX_Alloc(FX_WCHAR, nCount); | 938 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 942 FX_Unicode_GetNormalization(wChar, pDst); | 939 FX_Unicode_GetNormalization(wChar, pDst); |
| 943 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 940 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 944 PAGECHAR_INFO info2 = info; | 941 PAGECHAR_INFO info2 = info; |
| 945 info2.m_Unicode = pDst[nIndex]; | 942 info2.m_Unicode = pDst[nIndex]; |
| 946 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 943 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 947 m_TextBuf.AppendChar(info2.m_Unicode); | 944 m_TextBuf.AppendChar(info2.m_Unicode); |
| 948 m_CharList.push_back(info2); | 945 m_CharList.push_back(info2); |
| 949 } | 946 } |
| 950 FX_Free(pDst); | 947 FX_Free(pDst); |
| 951 return; | 948 return; |
| 952 } | 949 } |
| 953 } | 950 } |
| 954 m_TextBuf.AppendChar(wChar); | 951 m_TextBuf.AppendChar(wChar); |
| 955 } else { | 952 } else { |
| 956 info.m_Index = -1; | 953 info.m_Index = -1; |
| 957 } | 954 } |
| 958 m_CharList.push_back(info); | 955 m_CharList.push_back(info); |
| 959 } | 956 } |
| 960 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { | 957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| 961 PAGECHAR_INFO info = m_TempCharList[i]; | 958 PAGECHAR_INFO info) { |
| 962 if (!IsControlChar(info)) { | 959 if (!IsControlChar(info)) { |
| 963 info.m_Index = m_TextBuf.GetLength(); | 960 info.m_Index = m_TextBuf.GetLength(); |
| 964 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); | 961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| 965 FX_WCHAR* pDst = NULL; | 962 FX_WCHAR* pDst = NULL; |
| 966 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 967 if (nCount >= 1) { | 964 if (nCount >= 1) { |
| 968 pDst = FX_Alloc(FX_WCHAR, nCount); | 965 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 969 FX_Unicode_GetNormalization(wChar, pDst); | 966 FX_Unicode_GetNormalization(wChar, pDst); |
| 970 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 967 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 971 PAGECHAR_INFO info2 = info; | 968 PAGECHAR_INFO info2 = info; |
| 972 info2.m_Unicode = pDst[nIndex]; | 969 info2.m_Unicode = pDst[nIndex]; |
| 973 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 970 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 974 m_TextBuf.AppendChar(info2.m_Unicode); | 971 m_TextBuf.AppendChar(info2.m_Unicode); |
| 975 m_CharList.push_back(info2); | 972 m_CharList.push_back(info2); |
| 976 } | 973 } |
| 977 FX_Free(pDst); | 974 FX_Free(pDst); |
| 978 return; | 975 return; |
| 979 } | 976 } |
| 980 info.m_Unicode = wChar; | 977 info.m_Unicode = wChar; |
| 981 m_TextBuf.AppendChar(info.m_Unicode); | 978 m_TextBuf.AppendChar(info.m_Unicode); |
| 982 } else { | 979 } else { |
| 983 info.m_Index = -1; | 980 info.m_Index = -1; |
| 984 } | 981 } |
| 985 m_CharList.push_back(info); | 982 m_CharList.push_back(info); |
| 986 } | 983 } |
| 984 |
| 987 void CPDF_TextPage::CloseTempLine() { | 985 void CPDF_TextPage::CloseTempLine() { |
| 988 if (m_TempCharList.empty()) { | 986 if (m_TempCharList.empty()) |
| 989 return; | 987 return; |
| 990 } | 988 |
| 991 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | |
| 992 CFX_WideString str = m_TempTextBuf.GetWideString(); | 989 CFX_WideString str = m_TempTextBuf.GetWideString(); |
| 993 std::vector<FX_WORD> order; | |
| 994 FX_BOOL bR2L = FALSE; | |
| 995 int32_t start = 0, count = 0; | |
| 996 int nR2L = 0, nL2R = 0; | |
| 997 FX_BOOL bPrevSpace = FALSE; | 990 FX_BOOL bPrevSpace = FALSE; |
| 998 for (int i = 0; i < str.GetLength(); i++) { | 991 for (int i = 0; i < str.GetLength(); i++) { |
| 999 if (str.GetAt(i) == 32) { | 992 if (str.GetAt(i) != ' ') { |
| 1000 if (bPrevSpace) { | 993 bPrevSpace = FALSE; |
| 1001 m_TempTextBuf.Delete(i, 1); | 994 continue; |
| 1002 m_TempCharList.erase(m_TempCharList.begin() + i); | 995 } |
| 1003 str.Delete(i); | 996 if (bPrevSpace) { |
| 1004 i--; | 997 m_TempTextBuf.Delete(i, 1); |
| 1005 continue; | 998 m_TempCharList.erase(m_TempCharList.begin() + i); |
| 1006 } | 999 str.Delete(i); |
| 1007 bPrevSpace = TRUE; | 1000 i--; |
| 1001 } |
| 1002 bPrevSpace = TRUE; |
| 1003 } |
| 1004 CFX_BidiString bidi(str); |
| 1005 if (m_parserflag == FPDFTEXT_RLTB) |
| 1006 bidi.SetOverallDirection(CFX_BidiChar::RIGHT); |
| 1007 CFX_BidiChar::Direction eCurrentDirection = bidi.OverallDirection(); |
| 1008 for (const auto& segment : bidi) { |
| 1009 if (segment.direction == CFX_BidiChar::RIGHT || |
| 1010 (segment.direction == CFX_BidiChar::NEUTRAL && |
| 1011 eCurrentDirection == CFX_BidiChar::RIGHT)) { |
| 1012 eCurrentDirection = CFX_BidiChar::RIGHT; |
| 1013 for (int m = segment.start + segment.count; m > segment.start; --m) |
| 1014 AddCharInfoByRLDirection(bidi.CharAt(m - 1), m_TempCharList[m - 1]); |
| 1008 } else { | 1015 } else { |
| 1009 bPrevSpace = FALSE; | 1016 eCurrentDirection = CFX_BidiChar::LEFT; |
| 1010 } | 1017 for (int m = segment.start; m < segment.start + segment.count; m++) |
| 1011 if (pBidiChar->AppendChar(str.GetAt(i))) { | 1018 AddCharInfoByLRDirection(bidi.CharAt(m), m_TempCharList[m]); |
| 1012 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | |
| 1013 order.push_back(start); | |
| 1014 order.push_back(count); | |
| 1015 order.push_back(ret); | |
| 1016 if (!bR2L) { | |
| 1017 if (ret == CFX_BidiChar::RIGHT) { | |
| 1018 nR2L++; | |
| 1019 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1020 nL2R++; | |
| 1021 } | |
| 1022 } | |
| 1023 } | |
| 1024 } | |
| 1025 if (pBidiChar->EndChar()) { | |
| 1026 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | |
| 1027 order.push_back(start); | |
| 1028 order.push_back(count); | |
| 1029 order.push_back(ret); | |
| 1030 if (!bR2L) { | |
| 1031 if (ret == CFX_BidiChar::RIGHT) { | |
| 1032 nR2L++; | |
| 1033 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1034 nL2R++; | |
| 1035 } | |
| 1036 } | |
| 1037 } | |
| 1038 if (nR2L > 0 && nR2L >= nL2R) { | |
| 1039 bR2L = TRUE; | |
| 1040 } | |
| 1041 if (m_parserflag == FPDFTEXT_RLTB || bR2L) { | |
| 1042 int count = pdfium::CollectionSize<int>(order); | |
| 1043 for (int i = count - 1; i > 0; i -= 3) { | |
| 1044 int ret = order[i]; | |
| 1045 int count1 = order[i - 1]; | |
| 1046 int start = order[i - 2]; | |
| 1047 if (ret == 2 || ret == 0) { | |
| 1048 for (int j = start + count1 - 1; j >= start; j--) { | |
| 1049 AddCharInfoByRLDirection(str, j); | |
| 1050 } | |
| 1051 } else { | |
| 1052 int j = i; | |
| 1053 FX_BOOL bSymbol = FALSE; | |
| 1054 while (j > 0 && order[j] != 2) { | |
| 1055 bSymbol = !order[j]; | |
| 1056 j -= 3; | |
| 1057 } | |
| 1058 int end = start + count1; | |
| 1059 int n = 0; | |
| 1060 if (bSymbol) { | |
| 1061 n = j + 6; | |
| 1062 } else { | |
| 1063 n = j + 3; | |
| 1064 } | |
| 1065 if (n >= i) { | |
| 1066 for (int m = start; m < end; m++) { | |
| 1067 AddCharInfoByLRDirection(str, m); | |
| 1068 } | |
| 1069 } else { | |
| 1070 j = i; | |
| 1071 i = n; | |
| 1072 for (; n <= j; n += 3) { | |
| 1073 int start = order[n - 2]; | |
| 1074 int count1 = order[n - 1]; | |
| 1075 int end = start + count1; | |
| 1076 for (int m = start; m < end; m++) { | |
| 1077 AddCharInfoByLRDirection(str, m); | |
| 1078 } | |
| 1079 } | |
| 1080 } | |
| 1081 } | |
| 1082 } | |
| 1083 } else { | |
| 1084 int count = pdfium::CollectionSize<int>(order); | |
| 1085 FX_BOOL bL2R = FALSE; | |
| 1086 for (int i = 0; i < count; i += 3) { | |
| 1087 int start = order[i]; | |
| 1088 int count1 = order[i + 1]; | |
| 1089 int ret = order[i + 2]; | |
| 1090 if (ret == 2 || (i == 0 && ret == 0 && !bL2R)) { | |
| 1091 int j = i + 3; | |
| 1092 while (bR2L && j < count) { | |
| 1093 if (order[j + 2] == 1) | |
| 1094 break; | |
| 1095 j += 3; | |
| 1096 } | |
| 1097 if (j == 3) { | |
| 1098 i = -3; | |
| 1099 bL2R = TRUE; | |
| 1100 continue; | |
| 1101 } | |
| 1102 int end = pdfium::CollectionSize<int>(m_TempCharList) - 1; | |
| 1103 if (j < count) { | |
| 1104 end = order[j] - 1; | |
| 1105 } | |
| 1106 i = j - 3; | |
| 1107 for (int n = end; n >= start; n--) { | |
| 1108 AddCharInfoByRLDirection(str, n); | |
| 1109 } | |
| 1110 } else { | |
| 1111 int end = start + count1; | |
| 1112 for (int n = start; n < end; n++) { | |
| 1113 AddCharInfoByLRDirection(str, n); | |
| 1114 } | |
| 1115 } | |
| 1116 } | 1019 } |
| 1117 } | 1020 } |
| 1118 m_TempCharList.clear(); | 1021 m_TempCharList.clear(); |
| 1119 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); | 1022 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); |
| 1120 } | 1023 } |
| 1024 |
| 1121 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, | 1025 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, |
| 1122 const CFX_Matrix& formMatrix, | 1026 const CFX_Matrix& formMatrix, |
| 1123 FX_POSITION ObjPos) { | 1027 FX_POSITION ObjPos) { |
| 1124 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, | 1028 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, |
| 1125 pTextObj->m_Top); | 1029 pTextObj->m_Top); |
| 1126 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { | 1030 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
| 1127 return; | 1031 return; |
| 1128 } | 1032 } |
| 1129 int count = m_LineObj.GetSize(); | 1033 int count = m_LineObj.GetSize(); |
| 1130 PDFTEXT_Obj Obj; | 1034 PDFTEXT_Obj Obj; |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1353 std::swap(m_TempCharList[i], m_TempCharList[j]); | 1257 std::swap(m_TempCharList[i], m_TempCharList[j]); |
| 1354 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | 1258 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
| 1355 } | 1259 } |
| 1356 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); | 1260 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); |
| 1357 i = iBufStartAppend; | 1261 i = iBufStartAppend; |
| 1358 j = m_TempTextBuf.GetLength() - 1; | 1262 j = m_TempTextBuf.GetLength() - 1; |
| 1359 for (; i < j; i++, j--) { | 1263 for (; i < j; i++, j--) { |
| 1360 std::swap(pTempBuffer[i], pTempBuffer[j]); | 1264 std::swap(pTempBuffer[i], pTempBuffer[j]); |
| 1361 } | 1265 } |
| 1362 } | 1266 } |
| 1267 |
| 1363 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, | 1268 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, |
| 1364 const CPDF_Font* pFont, | 1269 const CPDF_Font* pFont, |
| 1365 int nItems) const { | 1270 int nItems) const { |
| 1366 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | 1271 CFX_WideString str; |
| 1367 int32_t nR2L = 0; | |
| 1368 int32_t nL2R = 0; | |
| 1369 int32_t start = 0, count = 0; | |
| 1370 CPDF_TextObjectItem item; | |
| 1371 for (int32_t i = 0; i < nItems; i++) { | 1272 for (int32_t i = 0; i < nItems; i++) { |
| 1273 CPDF_TextObjectItem item; |
| 1372 pTextObj->GetItemInfo(i, &item); | 1274 pTextObj->GetItemInfo(i, &item); |
| 1373 if (item.m_CharCode == (FX_DWORD)-1) { | 1275 if (item.m_CharCode == (FX_DWORD)-1) { |
| 1374 continue; | 1276 continue; |
| 1375 } | 1277 } |
| 1376 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | 1278 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); |
| 1377 FX_WCHAR wChar = wstrItem.GetAt(0); | 1279 FX_WCHAR wChar = wstrItem.GetAt(0); |
| 1378 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | 1280 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { |
| 1379 wChar = (FX_WCHAR)item.m_CharCode; | 1281 wChar = (FX_WCHAR)item.m_CharCode; |
| 1380 } | 1282 } |
| 1381 if (!wChar) { | 1283 if (wChar) |
| 1382 continue; | 1284 str += wChar; |
| 1383 } | |
| 1384 if (pBidiChar->AppendChar(wChar)) { | |
| 1385 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | |
| 1386 if (ret == CFX_BidiChar::RIGHT) { | |
| 1387 nR2L++; | |
| 1388 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1389 nL2R++; | |
| 1390 } | |
| 1391 } | |
| 1392 } | 1285 } |
| 1393 if (pBidiChar->EndChar()) { | 1286 return CFX_BidiString(str).OverallDirection() == CFX_BidiChar::RIGHT; |
| 1394 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | |
| 1395 if (ret == CFX_BidiChar::RIGHT) { | |
| 1396 nR2L++; | |
| 1397 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1398 nL2R++; | |
| 1399 } | |
| 1400 } | |
| 1401 return (nR2L > 0 && nR2L >= nL2R); | |
| 1402 } | 1287 } |
| 1288 |
| 1403 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { | 1289 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
| 1404 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1290 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1405 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { | 1291 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
| 1406 return; | 1292 return; |
| 1407 } | 1293 } |
| 1408 CFX_Matrix formMatrix = Obj.m_formMatrix; | 1294 CFX_Matrix formMatrix = Obj.m_formMatrix; |
| 1409 CPDF_Font* pFont = pTextObj->GetFont(); | 1295 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1410 CFX_Matrix matrix; | 1296 CFX_Matrix matrix; |
| 1411 pTextObj->GetTextMatrix(&matrix); | 1297 pTextObj->GetTextMatrix(&matrix); |
| 1412 matrix.Concat(formMatrix); | 1298 matrix.Concat(formMatrix); |
| (...skipping 1201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2614 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2500 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2615 return; | 2501 return; |
| 2616 } | 2502 } |
| 2617 CPDF_LinkExt* link = NULL; | 2503 CPDF_LinkExt* link = NULL; |
| 2618 link = m_LinkList.GetAt(index); | 2504 link = m_LinkList.GetAt(index); |
| 2619 if (!link) { | 2505 if (!link) { |
| 2620 return; | 2506 return; |
| 2621 } | 2507 } |
| 2622 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2508 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2623 } | 2509 } |
| OLD | NEW |