Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1682983002: Make fx_bidi sane. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Address issues in c#5. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/include/fxcrt/fx_bidi.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdftext/text_int.h" 7 #include "core/src/fpdftext/text_int.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <cctype> 10 #include <cctype>
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after
907 w = pFont->GetStringWidth(str, 1); 907 w = pFont->GetStringWidth(str, 1);
908 if (w == 0) { 908 if (w == 0) {
909 FX_RECT BBox; 909 FX_RECT BBox;
910 pFont->GetCharBBox(charCode, BBox); 910 pFont->GetCharBBox(charCode, BBox);
911 w = BBox.right - BBox.left; 911 w = BBox.right - BBox.left;
912 } 912 }
913 } 913 }
914 return w; 914 return w;
915 } 915 }
916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { 916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
917 int32_t start, count; 917 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo();
918 CFX_BidiChar::Direction ret = pBidi->GetBidiInfo(&start, &count); 918 if (seg.direction == CFX_BidiChar::RIGHT) {
919 if (ret == CFX_BidiChar::RIGHT) { 919 for (int i = seg.start + seg.count; i > seg.start; i--) {
920 for (int i = start + count - 1; i >= start; i--) { 920 m_TextBuf.AppendChar(str.GetAt(i - i));
921 m_TextBuf.AppendChar(str.GetAt(i)); 921 m_CharList.push_back(m_TempCharList[i - 1]);
922 m_CharList.push_back(m_TempCharList[i]);
923 } 922 }
924 } else { 923 } else {
925 int end = start + count; 924 for (int i = seg.start; i < seg.start + seg.count; i++) {
926 for (int i = start; i < end; i++) {
927 m_TextBuf.AppendChar(str.GetAt(i)); 925 m_TextBuf.AppendChar(str.GetAt(i));
928 m_CharList.push_back(m_TempCharList[i]); 926 m_CharList.push_back(m_TempCharList[i]);
929 } 927 }
930 } 928 }
931 } 929 }
932 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { 930 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
933 PAGECHAR_INFO info = m_TempCharList[i]; 931 PAGECHAR_INFO info) {
934 FX_WCHAR wChar = str.GetAt(i);
935 if (!IsControlChar(info)) { 932 if (!IsControlChar(info)) {
936 info.m_Index = m_TextBuf.GetLength(); 933 info.m_Index = m_TextBuf.GetLength();
937 if (wChar >= 0xFB00 && wChar <= 0xFB06) { 934 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
938 FX_WCHAR* pDst = NULL; 935 FX_WCHAR* pDst = NULL;
939 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 936 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
940 if (nCount >= 1) { 937 if (nCount >= 1) {
941 pDst = FX_Alloc(FX_WCHAR, nCount); 938 pDst = FX_Alloc(FX_WCHAR, nCount);
942 FX_Unicode_GetNormalization(wChar, pDst); 939 FX_Unicode_GetNormalization(wChar, pDst);
943 for (int nIndex = 0; nIndex < nCount; nIndex++) { 940 for (int nIndex = 0; nIndex < nCount; nIndex++) {
944 PAGECHAR_INFO info2 = info; 941 PAGECHAR_INFO info2 = info;
945 info2.m_Unicode = pDst[nIndex]; 942 info2.m_Unicode = pDst[nIndex];
946 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 943 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
947 m_TextBuf.AppendChar(info2.m_Unicode); 944 m_TextBuf.AppendChar(info2.m_Unicode);
948 m_CharList.push_back(info2); 945 m_CharList.push_back(info2);
949 } 946 }
950 FX_Free(pDst); 947 FX_Free(pDst);
951 return; 948 return;
952 } 949 }
953 } 950 }
954 m_TextBuf.AppendChar(wChar); 951 m_TextBuf.AppendChar(wChar);
955 } else { 952 } else {
956 info.m_Index = -1; 953 info.m_Index = -1;
957 } 954 }
958 m_CharList.push_back(info); 955 m_CharList.push_back(info);
959 } 956 }
960 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { 957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
961 PAGECHAR_INFO info = m_TempCharList[i]; 958 PAGECHAR_INFO info) {
962 if (!IsControlChar(info)) { 959 if (!IsControlChar(info)) {
963 info.m_Index = m_TextBuf.GetLength(); 960 info.m_Index = m_TextBuf.GetLength();
964 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); 961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
965 FX_WCHAR* pDst = NULL; 962 FX_WCHAR* pDst = NULL;
966 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
967 if (nCount >= 1) { 964 if (nCount >= 1) {
968 pDst = FX_Alloc(FX_WCHAR, nCount); 965 pDst = FX_Alloc(FX_WCHAR, nCount);
969 FX_Unicode_GetNormalization(wChar, pDst); 966 FX_Unicode_GetNormalization(wChar, pDst);
970 for (int nIndex = 0; nIndex < nCount; nIndex++) { 967 for (int nIndex = 0; nIndex < nCount; nIndex++) {
971 PAGECHAR_INFO info2 = info; 968 PAGECHAR_INFO info2 = info;
972 info2.m_Unicode = pDst[nIndex]; 969 info2.m_Unicode = pDst[nIndex];
973 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 970 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
974 m_TextBuf.AppendChar(info2.m_Unicode); 971 m_TextBuf.AppendChar(info2.m_Unicode);
975 m_CharList.push_back(info2); 972 m_CharList.push_back(info2);
976 } 973 }
977 FX_Free(pDst); 974 FX_Free(pDst);
978 return; 975 return;
979 } 976 }
980 info.m_Unicode = wChar; 977 info.m_Unicode = wChar;
981 m_TextBuf.AppendChar(info.m_Unicode); 978 m_TextBuf.AppendChar(info.m_Unicode);
982 } else { 979 } else {
983 info.m_Index = -1; 980 info.m_Index = -1;
984 } 981 }
985 m_CharList.push_back(info); 982 m_CharList.push_back(info);
986 } 983 }
984
987 void CPDF_TextPage::CloseTempLine() { 985 void CPDF_TextPage::CloseTempLine() {
988 if (m_TempCharList.empty()) { 986 if (m_TempCharList.empty())
989 return; 987 return;
990 } 988
991 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar);
992 CFX_WideString str = m_TempTextBuf.GetWideString(); 989 CFX_WideString str = m_TempTextBuf.GetWideString();
993 std::vector<FX_WORD> order;
994 FX_BOOL bR2L = FALSE;
995 int32_t start = 0, count = 0;
996 int nR2L = 0, nL2R = 0;
997 FX_BOOL bPrevSpace = FALSE; 990 FX_BOOL bPrevSpace = FALSE;
998 for (int i = 0; i < str.GetLength(); i++) { 991 for (int i = 0; i < str.GetLength(); i++) {
999 if (str.GetAt(i) == 32) { 992 if (str.GetAt(i) != ' ') {
1000 if (bPrevSpace) { 993 bPrevSpace = FALSE;
1001 m_TempTextBuf.Delete(i, 1); 994 continue;
1002 m_TempCharList.erase(m_TempCharList.begin() + i); 995 }
1003 str.Delete(i); 996 if (bPrevSpace) {
1004 i--; 997 m_TempTextBuf.Delete(i, 1);
1005 continue; 998 m_TempCharList.erase(m_TempCharList.begin() + i);
1006 } 999 str.Delete(i);
1007 bPrevSpace = TRUE; 1000 i--;
1001 }
1002 bPrevSpace = TRUE;
1003 }
1004 CFX_BidiString bidi(str);
1005 if (m_parserflag == FPDFTEXT_RLTB)
1006 bidi.SetOverallDirectionRight();
1007 CFX_BidiChar::Direction eCurrentDirection = bidi.OverallDirection();
1008 for (const auto& segment : bidi) {
1009 if (segment.direction == CFX_BidiChar::RIGHT ||
1010 (segment.direction == CFX_BidiChar::NEUTRAL &&
1011 eCurrentDirection == CFX_BidiChar::RIGHT)) {
1012 eCurrentDirection = CFX_BidiChar::RIGHT;
1013 for (int m = segment.start + segment.count; m > segment.start; --m)
1014 AddCharInfoByRLDirection(bidi.CharAt(m - 1), m_TempCharList[m - 1]);
1008 } else { 1015 } else {
1009 bPrevSpace = FALSE; 1016 eCurrentDirection = CFX_BidiChar::LEFT;
1010 } 1017 for (int m = segment.start; m < segment.start + segment.count; m++)
1011 if (pBidiChar->AppendChar(str.GetAt(i))) { 1018 AddCharInfoByLRDirection(bidi.CharAt(m), m_TempCharList[m]);
1012 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1013 order.push_back(start);
1014 order.push_back(count);
1015 order.push_back(ret);
1016 if (!bR2L) {
1017 if (ret == CFX_BidiChar::RIGHT) {
1018 nR2L++;
1019 } else if (ret == CFX_BidiChar::LEFT) {
1020 nL2R++;
1021 }
1022 }
1023 }
1024 }
1025 if (pBidiChar->EndChar()) {
1026 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1027 order.push_back(start);
1028 order.push_back(count);
1029 order.push_back(ret);
1030 if (!bR2L) {
1031 if (ret == CFX_BidiChar::RIGHT) {
1032 nR2L++;
1033 } else if (ret == CFX_BidiChar::LEFT) {
1034 nL2R++;
1035 }
1036 }
1037 }
1038 if (nR2L > 0 && nR2L >= nL2R) {
1039 bR2L = TRUE;
1040 }
1041 if (m_parserflag == FPDFTEXT_RLTB || bR2L) {
1042 int count = pdfium::CollectionSize<int>(order);
1043 for (int i = count - 1; i > 0; i -= 3) {
1044 int ret = order[i];
1045 int count1 = order[i - 1];
1046 int start = order[i - 2];
1047 if (ret == 2 || ret == 0) {
1048 for (int j = start + count1 - 1; j >= start; j--) {
1049 AddCharInfoByRLDirection(str, j);
1050 }
1051 } else {
1052 int j = i;
1053 FX_BOOL bSymbol = FALSE;
1054 while (j > 0 && order[j] != 2) {
1055 bSymbol = !order[j];
1056 j -= 3;
1057 }
1058 int end = start + count1;
1059 int n = 0;
1060 if (bSymbol) {
1061 n = j + 6;
1062 } else {
1063 n = j + 3;
1064 }
1065 if (n >= i) {
1066 for (int m = start; m < end; m++) {
1067 AddCharInfoByLRDirection(str, m);
1068 }
1069 } else {
1070 j = i;
1071 i = n;
1072 for (; n <= j; n += 3) {
1073 int start = order[n - 2];
1074 int count1 = order[n - 1];
1075 int end = start + count1;
1076 for (int m = start; m < end; m++) {
1077 AddCharInfoByLRDirection(str, m);
1078 }
1079 }
1080 }
1081 }
1082 }
1083 } else {
1084 int count = pdfium::CollectionSize<int>(order);
1085 FX_BOOL bL2R = FALSE;
1086 for (int i = 0; i < count; i += 3) {
1087 int start = order[i];
1088 int count1 = order[i + 1];
1089 int ret = order[i + 2];
1090 if (ret == 2 || (i == 0 && ret == 0 && !bL2R)) {
1091 int j = i + 3;
1092 while (bR2L && j < count) {
1093 if (order[j + 2] == 1)
1094 break;
1095 j += 3;
1096 }
1097 if (j == 3) {
1098 i = -3;
1099 bL2R = TRUE;
1100 continue;
1101 }
1102 int end = pdfium::CollectionSize<int>(m_TempCharList) - 1;
1103 if (j < count) {
1104 end = order[j] - 1;
1105 }
1106 i = j - 3;
1107 for (int n = end; n >= start; n--) {
1108 AddCharInfoByRLDirection(str, n);
1109 }
1110 } else {
1111 int end = start + count1;
1112 for (int n = start; n < end; n++) {
1113 AddCharInfoByLRDirection(str, n);
1114 }
1115 }
1116 } 1019 }
1117 } 1020 }
1118 m_TempCharList.clear(); 1021 m_TempCharList.clear();
1119 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); 1022 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength());
1120 } 1023 }
1024
1121 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, 1025 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj,
1122 const CFX_Matrix& formMatrix, 1026 const CFX_Matrix& formMatrix,
1123 FX_POSITION ObjPos) { 1027 FX_POSITION ObjPos) {
1124 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, 1028 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right,
1125 pTextObj->m_Top); 1029 pTextObj->m_Top);
1126 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { 1030 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) {
1127 return; 1031 return;
1128 } 1032 }
1129 int count = m_LineObj.GetSize(); 1033 int count = m_LineObj.GetSize();
1130 PDFTEXT_Obj Obj; 1034 PDFTEXT_Obj Obj;
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after
1353 std::swap(m_TempCharList[i], m_TempCharList[j]); 1257 std::swap(m_TempCharList[i], m_TempCharList[j]);
1354 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); 1258 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
1355 } 1259 }
1356 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); 1260 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer();
1357 i = iBufStartAppend; 1261 i = iBufStartAppend;
1358 j = m_TempTextBuf.GetLength() - 1; 1262 j = m_TempTextBuf.GetLength() - 1;
1359 for (; i < j; i++, j--) { 1263 for (; i < j; i++, j--) {
1360 std::swap(pTempBuffer[i], pTempBuffer[j]); 1264 std::swap(pTempBuffer[i], pTempBuffer[j]);
1361 } 1265 }
1362 } 1266 }
1267
1363 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, 1268 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj,
1364 const CPDF_Font* pFont, 1269 const CPDF_Font* pFont,
1365 int nItems) const { 1270 int nItems) const {
1366 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); 1271 CFX_WideString str;
1367 int32_t nR2L = 0;
1368 int32_t nL2R = 0;
1369 int32_t start = 0, count = 0;
1370 CPDF_TextObjectItem item;
1371 for (int32_t i = 0; i < nItems; i++) { 1272 for (int32_t i = 0; i < nItems; i++) {
1273 CPDF_TextObjectItem item;
1372 pTextObj->GetItemInfo(i, &item); 1274 pTextObj->GetItemInfo(i, &item);
1373 if (item.m_CharCode == (FX_DWORD)-1) { 1275 if (item.m_CharCode == (FX_DWORD)-1) {
1374 continue; 1276 continue;
1375 } 1277 }
1376 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); 1278 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
1377 FX_WCHAR wChar = wstrItem.GetAt(0); 1279 FX_WCHAR wChar = wstrItem.GetAt(0);
1378 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { 1280 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
1379 wChar = (FX_WCHAR)item.m_CharCode; 1281 wChar = (FX_WCHAR)item.m_CharCode;
1380 } 1282 }
1381 if (!wChar) { 1283 if (wChar)
1382 continue; 1284 str += wChar;
1383 }
1384 if (pBidiChar->AppendChar(wChar)) {
1385 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1386 if (ret == CFX_BidiChar::RIGHT) {
1387 nR2L++;
1388 } else if (ret == CFX_BidiChar::LEFT) {
1389 nL2R++;
1390 }
1391 }
1392 } 1285 }
1393 if (pBidiChar->EndChar()) { 1286 return CFX_BidiString(str).OverallDirection() == CFX_BidiChar::RIGHT;
1394 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1395 if (ret == CFX_BidiChar::RIGHT) {
1396 nR2L++;
1397 } else if (ret == CFX_BidiChar::LEFT) {
1398 nL2R++;
1399 }
1400 }
1401 return (nR2L > 0 && nR2L >= nL2R);
1402 } 1287 }
1288
1403 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { 1289 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
1404 CPDF_TextObject* pTextObj = Obj.m_pTextObj; 1290 CPDF_TextObject* pTextObj = Obj.m_pTextObj;
1405 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { 1291 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) {
1406 return; 1292 return;
1407 } 1293 }
1408 CFX_Matrix formMatrix = Obj.m_formMatrix; 1294 CFX_Matrix formMatrix = Obj.m_formMatrix;
1409 CPDF_Font* pFont = pTextObj->GetFont(); 1295 CPDF_Font* pFont = pTextObj->GetFont();
1410 CFX_Matrix matrix; 1296 CFX_Matrix matrix;
1411 pTextObj->GetTextMatrix(&matrix); 1297 pTextObj->GetTextMatrix(&matrix);
1412 matrix.Concat(formMatrix); 1298 matrix.Concat(formMatrix);
(...skipping 1201 matching lines...) Expand 10 before | Expand all | Expand 10 after
2614 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2500 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2615 return; 2501 return;
2616 } 2502 }
2617 CPDF_LinkExt* link = NULL; 2503 CPDF_LinkExt* link = NULL;
2618 link = m_LinkList.GetAt(index); 2504 link = m_LinkList.GetAt(index);
2619 if (!link) { 2505 if (!link) {
2620 return; 2506 return;
2621 } 2507 }
2622 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2508 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2623 } 2509 }
OLDNEW
« no previous file with comments | « core/include/fxcrt/fx_bidi.h ('k') | core/src/fpdftext/text_int.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698