| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/fpdftext/fpdf_text_int.h" | |
| 8 | |
| 9 #include <algorithm> | 7 #include <algorithm> |
| 10 #include <cctype> | 8 #include <cctype> |
| 11 #include <cwctype> | 9 #include <cwctype> |
| 12 #include <memory> | 10 #include <memory> |
| 13 #include <utility> | 11 #include <utility> |
| 14 #include <vector> | 12 #include <vector> |
| 15 | 13 |
| 16 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" | 14 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" |
| 15 #include "core/fpdfapi/fpdf_page/include/cpdf_form.h" |
| 17 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" | 16 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" |
| 17 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h" |
| 18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" | 18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" |
| 19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" | 19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" |
| 20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" | 20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" |
| 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" | 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" |
| 22 #include "core/fpdftext/include/ipdf_linkextract.h" | 22 #include "core/fpdftext/include/cpdf_linkextract.h" |
| 23 #include "core/fpdftext/include/ipdf_textpage.h" | 23 #include "core/fpdftext/include/cpdf_textpage.h" |
| 24 #include "core/fpdftext/include/ipdf_textpagefind.h" | 24 #include "core/fpdftext/include/cpdf_textpagefind.h" |
| 25 #include "core/fpdftext/unicodenormalization.h" | 25 #include "core/fpdftext/unicodenormalizationdata.h" |
| 26 #include "core/fxcrt/fx_bidi.h" | 26 #include "core/fxcrt/fx_bidi.h" |
| 27 #include "core/fxcrt/include/fx_ext.h" | 27 #include "core/fxcrt/include/fx_ext.h" |
| 28 #include "core/fxcrt/include/fx_ucd.h" | 28 #include "core/fxcrt/include/fx_ucd.h" |
| 29 #include "third_party/base/stl_util.h" | 29 #include "third_party/base/stl_util.h" |
| 30 | 30 |
| 31 #define FPDFTEXT_RLTB 1 | 31 #define FPDFTEXT_RLTB 1 |
| 32 #define FPDFTEXT_LEFT -1 | 32 #define FPDFTEXT_LEFT -1 |
| 33 #define FPDFTEXT_RIGHT 1 | 33 #define FPDFTEXT_RIGHT 1 |
| 34 | 34 |
| 35 #define FPDFTEXT_MATCHCASE 0x00000001 | 35 #define FPDFTEXT_MATCHCASE 0x00000001 |
| 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
| 37 #define FPDFTEXT_CONSECUTIVE 0x00000004 | 37 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| 38 | 38 |
| 39 #define FPDFTEXT_CHAR_ERROR -1 |
| 40 #define FPDFTEXT_CHAR_NORMAL 0 |
| 41 #define FPDFTEXT_CHAR_GENERATED 1 |
| 42 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 43 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 44 #define FPDFTEXT_CHAR_PIECE 4 |
| 45 #define FPDFTEXT_MC_PASS 0 |
| 46 #define FPDFTEXT_MC_DONE 1 |
| 47 #define FPDFTEXT_MC_DELAY 2 |
| 48 |
| 39 namespace { | 49 namespace { |
| 40 | 50 |
| 41 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { | 51 const FX_FLOAT kDefaultFontSize = 1.0f; |
| 52 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { |
| 53 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, |
| 54 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; |
| 55 |
| 56 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
| 42 if (curChar < 255) { | 57 if (curChar < 255) { |
| 43 return FALSE; | 58 return FALSE; |
| 44 } | 59 } |
| 45 if ((curChar >= 0x0600 && curChar <= 0x06FF) || | 60 if ((curChar >= 0x0600 && curChar <= 0x06FF) || |
| 46 (curChar >= 0xFE70 && curChar <= 0xFEFF) || | 61 (curChar >= 0xFE70 && curChar <= 0xFEFF) || |
| 47 (curChar >= 0xFB50 && curChar <= 0xFDFF) || | 62 (curChar >= 0xFB50 && curChar <= 0xFDFF) || |
| 48 (curChar >= 0x0400 && curChar <= 0x04FF) || | 63 (curChar >= 0x0400 && curChar <= 0x04FF) || |
| 49 (curChar >= 0x0500 && curChar <= 0x052F) || | 64 (curChar >= 0x0500 && curChar <= 0x052F) || |
| 50 (curChar >= 0xA640 && curChar <= 0xA69F) || | 65 (curChar >= 0xA640 && curChar <= 0xA69F) || |
| 51 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || | 66 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || |
| 52 (curChar >= 0x2000 && curChar <= 0x206F)) { | 67 (curChar >= 0x2000 && curChar <= 0x206F)) { |
| 53 return FALSE; | 68 return FALSE; |
| 54 } | 69 } |
| 55 return TRUE; | 70 return TRUE; |
| 56 } | 71 } |
| 57 | 72 |
| 58 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { | 73 FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) { |
| 59 if (threshold < 300) { | 74 if (threshold < 300) { |
| 60 return threshold / 2.0f; | 75 return threshold / 2.0f; |
| 61 } | 76 } |
| 62 if (threshold < 500) { | 77 if (threshold < 500) { |
| 63 return threshold / 4.0f; | 78 return threshold / 4.0f; |
| 64 } | 79 } |
| 65 if (threshold < 700) { | 80 if (threshold < 700) { |
| 66 return threshold / 5.0f; | 81 return threshold / 5.0f; |
| 67 } | 82 } |
| 68 return threshold / 6.0f; | 83 return threshold / 6.0f; |
| 69 } | 84 } |
| 70 | 85 |
| 71 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | 86 FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| 72 const CFX_Matrix& matrix) { | 87 const CFX_Matrix& matrix) { |
| 73 FX_FLOAT baseSpace = 0.0; | 88 FX_FLOAT baseSpace = 0.0; |
| 74 const int nItems = pTextObj->CountItems(); | 89 const int nItems = pTextObj->CountItems(); |
| 75 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { | 90 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
| 76 FX_BOOL bAllChar = TRUE; | 91 FX_BOOL bAllChar = TRUE; |
| 77 FX_FLOAT spacing = matrix.TransformDistance( | 92 FX_FLOAT spacing = matrix.TransformDistance( |
| 78 pTextObj->m_TextState.GetObject()->m_CharSpace); | 93 pTextObj->m_TextState.GetObject()->m_CharSpace); |
| 79 baseSpace = spacing; | 94 baseSpace = spacing; |
| 80 for (int i = 0; i < nItems; i++) { | 95 for (int i = 0; i < nItems; i++) { |
| 81 CPDF_TextObjectItem item; | 96 CPDF_TextObjectItem item; |
| 82 pTextObj->GetItemInfo(i, &item); | 97 pTextObj->GetItemInfo(i, &item); |
| 83 if (item.m_CharCode == (uint32_t)-1) { | 98 if (item.m_CharCode == (uint32_t)-1) { |
| 84 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 99 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
| 85 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; | 100 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; |
| 86 baseSpace = std::min(baseSpace, kerning + spacing); | 101 baseSpace = std::min(baseSpace, kerning + spacing); |
| 87 bAllChar = FALSE; | 102 bAllChar = FALSE; |
| 88 } | 103 } |
| 89 } | 104 } |
| 90 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 105 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
| 91 baseSpace = 0.0; | 106 baseSpace = 0.0; |
| 92 } | 107 } |
| 93 } | 108 } |
| 94 return baseSpace; | 109 return baseSpace; |
| 95 } | 110 } |
| 96 | 111 |
| 97 const FX_FLOAT kDefaultFontSize = 1.0f; | 112 FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) { |
| 113 wch = wch & 0xFFFF; |
| 114 FX_WCHAR wFind = g_UnicodeData_Normalization[wch]; |
| 115 if (!wFind) { |
| 116 if (pDst) { |
| 117 *pDst = wch; |
| 118 } |
| 119 return 1; |
| 120 } |
| 121 if (wFind >= 0x8000) { |
| 122 wch = wFind - 0x8000; |
| 123 wFind = 1; |
| 124 } else { |
| 125 wch = wFind & 0x0FFF; |
| 126 wFind >>= 12; |
| 127 } |
| 128 const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind]; |
| 129 if (pMap == g_UnicodeData_Normalization_Map4) { |
| 130 pMap = g_UnicodeData_Normalization_Map4 + wch; |
| 131 wFind = (FX_WCHAR)(*pMap++); |
| 132 } else { |
| 133 pMap += wch; |
| 134 } |
| 135 if (pDst) { |
| 136 FX_WCHAR n = wFind; |
| 137 while (n--) { |
| 138 *pDst++ = *pMap++; |
| 139 } |
| 140 } |
| 141 return (FX_STRSIZE)wFind; |
| 142 } |
| 98 | 143 |
| 99 } // namespace | 144 } // namespace |
| 100 | 145 |
| 101 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | |
| 102 int flags) { | |
| 103 return new CPDF_TextPage(pPage, flags); | |
| 104 } | |
| 105 | |
| 106 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | |
| 107 const IPDF_TextPage* pTextPage) { | |
| 108 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; | |
| 109 } | |
| 110 | |
| 111 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | |
| 112 return new CPDF_LinkExtract(); | |
| 113 } | |
| 114 | |
| 115 #define TEXT_BLANK_CHAR L' ' | 146 #define TEXT_BLANK_CHAR L' ' |
| 116 #define TEXT_LINEFEED_CHAR L'\n' | 147 #define TEXT_LINEFEED_CHAR L'\n' |
| 117 #define TEXT_RETURN_CHAR L'\r' | 148 #define TEXT_RETURN_CHAR L'\r' |
| 118 #define TEXT_EMPTY L"" | 149 #define TEXT_EMPTY L"" |
| 119 #define TEXT_BLANK L" " | 150 #define TEXT_BLANK L" " |
| 120 #define TEXT_RETURN_LINEFEED L"\r\n" | 151 #define TEXT_RETURN_LINEFEED L"\r\n" |
| 121 #define TEXT_LINEFEED L"\n" | 152 #define TEXT_LINEFEED L"\n" |
| 122 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 153 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 123 | 154 |
| 124 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 155 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
| (...skipping 800 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 925 } | 956 } |
| 926 } | 957 } |
| 927 } | 958 } |
| 928 | 959 |
| 929 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, | 960 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| 930 PAGECHAR_INFO info) { | 961 PAGECHAR_INFO info) { |
| 931 if (!IsControlChar(info)) { | 962 if (!IsControlChar(info)) { |
| 932 info.m_Index = m_TextBuf.GetLength(); | 963 info.m_Index = m_TextBuf.GetLength(); |
| 933 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 964 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| 934 FX_WCHAR* pDst = NULL; | 965 FX_WCHAR* pDst = NULL; |
| 935 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 966 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| 936 if (nCount >= 1) { | 967 if (nCount >= 1) { |
| 937 pDst = FX_Alloc(FX_WCHAR, nCount); | 968 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 938 FX_Unicode_GetNormalization(wChar, pDst); | 969 Unicode_GetNormalization(wChar, pDst); |
| 939 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 970 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 940 PAGECHAR_INFO info2 = info; | 971 PAGECHAR_INFO info2 = info; |
| 941 info2.m_Unicode = pDst[nIndex]; | 972 info2.m_Unicode = pDst[nIndex]; |
| 942 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 973 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 943 m_TextBuf.AppendChar(info2.m_Unicode); | 974 m_TextBuf.AppendChar(info2.m_Unicode); |
| 944 m_CharList.push_back(info2); | 975 m_CharList.push_back(info2); |
| 945 } | 976 } |
| 946 FX_Free(pDst); | 977 FX_Free(pDst); |
| 947 return; | 978 return; |
| 948 } | 979 } |
| 949 } | 980 } |
| 950 m_TextBuf.AppendChar(wChar); | 981 m_TextBuf.AppendChar(wChar); |
| 951 } else { | 982 } else { |
| 952 info.m_Index = -1; | 983 info.m_Index = -1; |
| 953 } | 984 } |
| 954 m_CharList.push_back(info); | 985 m_CharList.push_back(info); |
| 955 } | 986 } |
| 956 | 987 |
| 957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, | 988 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| 958 PAGECHAR_INFO info) { | 989 PAGECHAR_INFO info) { |
| 959 if (!IsControlChar(info)) { | 990 if (!IsControlChar(info)) { |
| 960 info.m_Index = m_TextBuf.GetLength(); | 991 info.m_Index = m_TextBuf.GetLength(); |
| 961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | 992 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| 962 FX_WCHAR* pDst = NULL; | 993 FX_WCHAR* pDst = NULL; |
| 963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 994 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
| 964 if (nCount >= 1) { | 995 if (nCount >= 1) { |
| 965 pDst = FX_Alloc(FX_WCHAR, nCount); | 996 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 966 FX_Unicode_GetNormalization(wChar, pDst); | 997 Unicode_GetNormalization(wChar, pDst); |
| 967 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 998 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 968 PAGECHAR_INFO info2 = info; | 999 PAGECHAR_INFO info2 = info; |
| 969 info2.m_Unicode = pDst[nIndex]; | 1000 info2.m_Unicode = pDst[nIndex]; |
| 970 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1001 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 971 m_TextBuf.AppendChar(info2.m_Unicode); | 1002 m_TextBuf.AppendChar(info2.m_Unicode); |
| 972 m_CharList.push_back(info2); | 1003 m_CharList.push_back(info2); |
| 973 } | 1004 } |
| 974 FX_Free(pDst); | 1005 FX_Free(pDst); |
| 975 return; | 1006 return; |
| 976 } | 1007 } |
| (...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1370 } | 1401 } |
| 1371 if (FPDFTEXT_MC_DELAY == bPreMKC) { | 1402 if (FPDFTEXT_MC_DELAY == bPreMKC) { |
| 1372 ProcessMarkedContent(Obj); | 1403 ProcessMarkedContent(Obj); |
| 1373 m_pPreTextObj = pTextObj; | 1404 m_pPreTextObj = pTextObj; |
| 1374 m_perMatrix.Copy(formMatrix); | 1405 m_perMatrix.Copy(formMatrix); |
| 1375 return; | 1406 return; |
| 1376 } | 1407 } |
| 1377 m_pPreTextObj = pTextObj; | 1408 m_pPreTextObj = pTextObj; |
| 1378 m_perMatrix.Copy(formMatrix); | 1409 m_perMatrix.Copy(formMatrix); |
| 1379 int nItems = pTextObj->CountItems(); | 1410 int nItems = pTextObj->CountItems(); |
| 1380 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1411 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix); |
| 1381 | 1412 |
| 1382 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1413 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
| 1383 const FX_BOOL bIsBidiAndMirrorInverse = | 1414 const FX_BOOL bIsBidiAndMirrorInverse = |
| 1384 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1415 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
| 1385 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1416 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
| 1386 int32_t iCharListStartAppend = | 1417 int32_t iCharListStartAppend = |
| 1387 pdfium::CollectionSize<int32_t>(m_TempCharList); | 1418 pdfium::CollectionSize<int32_t>(m_TempCharList); |
| 1388 | 1419 |
| 1389 FX_FLOAT spacing = 0; | 1420 FX_FLOAT spacing = 0; |
| 1390 for (int i = 0; i < nItems; i++) { | 1421 for (int i = 0; i < nItems; i++) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1423 if (threshold > fontsize_h / 3) { | 1454 if (threshold > fontsize_h / 3) { |
| 1424 threshold = 0; | 1455 threshold = 0; |
| 1425 } else { | 1456 } else { |
| 1426 threshold /= 2; | 1457 threshold /= 2; |
| 1427 } | 1458 } |
| 1428 if (threshold == 0) { | 1459 if (threshold == 0) { |
| 1429 threshold = fontsize_h; | 1460 threshold = fontsize_h; |
| 1430 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); | 1461 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
| 1431 threshold = this_width > last_width ? (FX_FLOAT)this_width | 1462 threshold = this_width > last_width ? (FX_FLOAT)this_width |
| 1432 : (FX_FLOAT)last_width; | 1463 : (FX_FLOAT)last_width; |
| 1433 threshold = _NormalizeThreshold(threshold); | 1464 threshold = NormalizeThreshold(threshold); |
| 1434 threshold = fontsize_h * threshold / 1000; | 1465 threshold = fontsize_h * threshold / 1000; |
| 1435 } | 1466 } |
| 1436 if (threshold && (spacing && spacing >= threshold)) { | 1467 if (threshold && (spacing && spacing >= threshold)) { |
| 1437 charinfo.m_Unicode = TEXT_BLANK_CHAR; | 1468 charinfo.m_Unicode = TEXT_BLANK_CHAR; |
| 1438 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; | 1469 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; |
| 1439 charinfo.m_pTextObj = pTextObj; | 1470 charinfo.m_pTextObj = pTextObj; |
| 1440 charinfo.m_Index = m_TextBuf.GetLength(); | 1471 charinfo.m_Index = m_TextBuf.GetLength(); |
| 1441 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1472 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
| 1442 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; | 1473 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; |
| 1443 charinfo.m_Matrix.Copy(formMatrix); | 1474 charinfo.m_Matrix.Copy(formMatrix); |
| (...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1891 } | 1922 } |
| 1892 if (unicode > L'Z' && unicode < L'a') { | 1923 if (unicode > L'Z' && unicode < L'a') { |
| 1893 return FALSE; | 1924 return FALSE; |
| 1894 } | 1925 } |
| 1895 if (unicode > L'z') { | 1926 if (unicode > L'z') { |
| 1896 return FALSE; | 1927 return FALSE; |
| 1897 } | 1928 } |
| 1898 return TRUE; | 1929 return TRUE; |
| 1899 } | 1930 } |
| 1900 | 1931 |
| 1901 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) | 1932 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) |
| 1902 : m_pTextPage(pTextPage), | 1933 : m_pTextPage(pTextPage), |
| 1903 m_flags(0), | 1934 m_flags(0), |
| 1904 m_findNextStart(-1), | 1935 m_findNextStart(-1), |
| 1905 m_findPreStart(-1), | 1936 m_findPreStart(-1), |
| 1906 m_bMatchCase(FALSE), | 1937 m_bMatchCase(FALSE), |
| 1907 m_bMatchWholeWord(FALSE), | 1938 m_bMatchWholeWord(FALSE), |
| 1908 m_resStart(0), | 1939 m_resStart(0), |
| 1909 m_resEnd(-1), | 1940 m_resEnd(-1), |
| 1910 m_IsFind(FALSE) { | 1941 m_IsFind(FALSE) { |
| 1911 m_strText = m_pTextPage->GetPageText(); | 1942 m_strText = m_pTextPage->GetPageText(); |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2047 if (iWord == 0) { | 2078 if (iWord == 0) { |
| 2048 m_resStart = nResultPos; | 2079 m_resStart = nResultPos; |
| 2049 } | 2080 } |
| 2050 FX_BOOL bMatch = TRUE; | 2081 FX_BOOL bMatch = TRUE; |
| 2051 if (iWord != 0 && !bSpaceStart) { | 2082 if (iWord != 0 && !bSpaceStart) { |
| 2052 int PreResEndPos = nStartPos; | 2083 int PreResEndPos = nStartPos; |
| 2053 int curChar = csWord.GetAt(0); | 2084 int curChar = csWord.GetAt(0); |
| 2054 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; | 2085 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; |
| 2055 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); | 2086 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); |
| 2056 if (nStartPos == nResultPos && | 2087 if (nStartPos == nResultPos && |
| 2057 !(_IsIgnoreSpaceCharacter(lastChar) || | 2088 !(IsIgnoreSpaceCharacter(lastChar) || |
| 2058 _IsIgnoreSpaceCharacter(curChar))) { | 2089 IsIgnoreSpaceCharacter(curChar))) { |
| 2059 bMatch = FALSE; | 2090 bMatch = FALSE; |
| 2060 } | 2091 } |
| 2061 for (int d = PreResEndPos; d < nResultPos; d++) { | 2092 for (int d = PreResEndPos; d < nResultPos; d++) { |
| 2062 FX_WCHAR strInsert = m_strText.GetAt(d); | 2093 FX_WCHAR strInsert = m_strText.GetAt(d); |
| 2063 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && | 2094 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && |
| 2064 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | 2095 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
| 2065 bMatch = FALSE; | 2096 bMatch = FALSE; |
| 2066 break; | 2097 break; |
| 2067 } | 2098 } |
| 2068 } | 2099 } |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2167 index++; | 2198 index++; |
| 2168 continue; | 2199 continue; |
| 2169 } else { | 2200 } else { |
| 2170 break; | 2201 break; |
| 2171 } | 2202 } |
| 2172 } | 2203 } |
| 2173 int pos = 0; | 2204 int pos = 0; |
| 2174 while (pos < csWord.GetLength()) { | 2205 while (pos < csWord.GetLength()) { |
| 2175 CFX_WideString curStr = csWord.Mid(pos, 1); | 2206 CFX_WideString curStr = csWord.Mid(pos, 1); |
| 2176 FX_WCHAR curChar = csWord.GetAt(pos); | 2207 FX_WCHAR curChar = csWord.GetAt(pos); |
| 2177 if (_IsIgnoreSpaceCharacter(curChar)) { | 2208 if (IsIgnoreSpaceCharacter(curChar)) { |
| 2178 if (pos > 0 && curChar == 0x2019) { | 2209 if (pos > 0 && curChar == 0x2019) { |
| 2179 pos++; | 2210 pos++; |
| 2180 continue; | 2211 continue; |
| 2181 } | 2212 } |
| 2182 if (pos > 0) { | 2213 if (pos > 0) { |
| 2183 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); | 2214 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); |
| 2184 } | 2215 } |
| 2185 m_csFindWhatArray.push_back(curStr); | 2216 m_csFindWhatArray.push_back(curStr); |
| 2186 if (pos == csWord.GetLength() - 1) { | 2217 if (pos == csWord.GetLength() - 1) { |
| 2187 csWord.clear(); | 2218 csWord.clear(); |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2299 return resEnd - resStart + 1; | 2330 return resEnd - resStart + 1; |
| 2300 } | 2331 } |
| 2301 | 2332 |
| 2302 CPDF_LinkExtract::CPDF_LinkExtract() | 2333 CPDF_LinkExtract::CPDF_LinkExtract() |
| 2303 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} |
| 2304 | 2335 |
| 2305 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2306 DeleteLinkList(); | 2337 DeleteLinkList(); |
| 2307 } | 2338 } |
| 2308 | 2339 |
| 2309 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { | 2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { |
| 2310 if (!pTextPage || !pTextPage->IsParsed()) | 2341 if (!pTextPage || !pTextPage->IsParsed()) |
| 2311 return FALSE; | 2342 return FALSE; |
| 2312 | 2343 |
| 2313 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
| 2314 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2345 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2315 DeleteLinkList(); | 2346 DeleteLinkList(); |
| 2316 if (m_strPageText.IsEmpty()) { | 2347 if (m_strPageText.IsEmpty()) { |
| 2317 return FALSE; | 2348 return FALSE; |
| 2318 } | 2349 } |
| 2319 ParseLink(); | 2350 ParseLink(); |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2510 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2511 return; | 2542 return; |
| 2512 } | 2543 } |
| 2513 CPDF_LinkExt* link = NULL; | 2544 CPDF_LinkExt* link = NULL; |
| 2514 link = m_LinkList.GetAt(index); | 2545 link = m_LinkList.GetAt(index); |
| 2515 if (!link) { | 2546 if (!link) { |
| 2516 return; | 2547 return; |
| 2517 } | 2548 } |
| 2518 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2519 } | 2550 } |
| OLD | NEW |