OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/fpdftext/fpdf_text_int.h" | |
8 | |
9 #include <algorithm> | 7 #include <algorithm> |
10 #include <cctype> | 8 #include <cctype> |
11 #include <cwctype> | 9 #include <cwctype> |
12 #include <memory> | 10 #include <memory> |
13 #include <utility> | 11 #include <utility> |
14 #include <vector> | 12 #include <vector> |
15 | 13 |
16 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" | 14 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" |
| 15 #include "core/fpdfapi/fpdf_page/include/cpdf_form.h" |
17 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" | 16 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" |
| 17 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h" |
18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" | 18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" |
19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" | 19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" |
20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" | 20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" |
21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" | 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" |
22 #include "core/fpdftext/include/ipdf_linkextract.h" | 22 #include "core/fpdftext/include/cpdf_linkextract.h" |
23 #include "core/fpdftext/include/ipdf_textpage.h" | 23 #include "core/fpdftext/include/cpdf_textpage.h" |
24 #include "core/fpdftext/include/ipdf_textpagefind.h" | 24 #include "core/fpdftext/include/cpdf_textpagefind.h" |
25 #include "core/fpdftext/unicodenormalization.h" | 25 #include "core/fpdftext/unicodenormalizationdata.h" |
26 #include "core/fxcrt/fx_bidi.h" | 26 #include "core/fxcrt/fx_bidi.h" |
27 #include "core/fxcrt/include/fx_ext.h" | 27 #include "core/fxcrt/include/fx_ext.h" |
28 #include "core/fxcrt/include/fx_ucd.h" | 28 #include "core/fxcrt/include/fx_ucd.h" |
29 #include "third_party/base/stl_util.h" | 29 #include "third_party/base/stl_util.h" |
30 | 30 |
31 #define FPDFTEXT_RLTB 1 | 31 #define FPDFTEXT_RLTB 1 |
32 #define FPDFTEXT_LEFT -1 | 32 #define FPDFTEXT_LEFT -1 |
33 #define FPDFTEXT_RIGHT 1 | 33 #define FPDFTEXT_RIGHT 1 |
34 | 34 |
35 #define FPDFTEXT_MATCHCASE 0x00000001 | 35 #define FPDFTEXT_MATCHCASE 0x00000001 |
36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
37 #define FPDFTEXT_CONSECUTIVE 0x00000004 | 37 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
38 | 38 |
| 39 #define FPDFTEXT_CHAR_ERROR -1 |
| 40 #define FPDFTEXT_CHAR_NORMAL 0 |
| 41 #define FPDFTEXT_CHAR_GENERATED 1 |
| 42 #define FPDFTEXT_CHAR_UNUNICODE 2 |
| 43 #define FPDFTEXT_CHAR_HYPHEN 3 |
| 44 #define FPDFTEXT_CHAR_PIECE 4 |
| 45 #define FPDFTEXT_MC_PASS 0 |
| 46 #define FPDFTEXT_MC_DONE 1 |
| 47 #define FPDFTEXT_MC_DELAY 2 |
| 48 |
39 namespace { | 49 namespace { |
40 | 50 |
41 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { | 51 const FX_FLOAT kDefaultFontSize = 1.0f; |
| 52 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = { |
| 53 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2, |
| 54 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4}; |
| 55 |
| 56 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
42 if (curChar < 255) { | 57 if (curChar < 255) { |
43 return FALSE; | 58 return FALSE; |
44 } | 59 } |
45 if ((curChar >= 0x0600 && curChar <= 0x06FF) || | 60 if ((curChar >= 0x0600 && curChar <= 0x06FF) || |
46 (curChar >= 0xFE70 && curChar <= 0xFEFF) || | 61 (curChar >= 0xFE70 && curChar <= 0xFEFF) || |
47 (curChar >= 0xFB50 && curChar <= 0xFDFF) || | 62 (curChar >= 0xFB50 && curChar <= 0xFDFF) || |
48 (curChar >= 0x0400 && curChar <= 0x04FF) || | 63 (curChar >= 0x0400 && curChar <= 0x04FF) || |
49 (curChar >= 0x0500 && curChar <= 0x052F) || | 64 (curChar >= 0x0500 && curChar <= 0x052F) || |
50 (curChar >= 0xA640 && curChar <= 0xA69F) || | 65 (curChar >= 0xA640 && curChar <= 0xA69F) || |
51 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || | 66 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || |
52 (curChar >= 0x2000 && curChar <= 0x206F)) { | 67 (curChar >= 0x2000 && curChar <= 0x206F)) { |
53 return FALSE; | 68 return FALSE; |
54 } | 69 } |
55 return TRUE; | 70 return TRUE; |
56 } | 71 } |
57 | 72 |
58 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { | 73 FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) { |
59 if (threshold < 300) { | 74 if (threshold < 300) { |
60 return threshold / 2.0f; | 75 return threshold / 2.0f; |
61 } | 76 } |
62 if (threshold < 500) { | 77 if (threshold < 500) { |
63 return threshold / 4.0f; | 78 return threshold / 4.0f; |
64 } | 79 } |
65 if (threshold < 700) { | 80 if (threshold < 700) { |
66 return threshold / 5.0f; | 81 return threshold / 5.0f; |
67 } | 82 } |
68 return threshold / 6.0f; | 83 return threshold / 6.0f; |
69 } | 84 } |
70 | 85 |
71 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | 86 FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
72 const CFX_Matrix& matrix) { | 87 const CFX_Matrix& matrix) { |
73 FX_FLOAT baseSpace = 0.0; | 88 FX_FLOAT baseSpace = 0.0; |
74 const int nItems = pTextObj->CountItems(); | 89 const int nItems = pTextObj->CountItems(); |
75 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { | 90 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
76 FX_BOOL bAllChar = TRUE; | 91 FX_BOOL bAllChar = TRUE; |
77 FX_FLOAT spacing = matrix.TransformDistance( | 92 FX_FLOAT spacing = matrix.TransformDistance( |
78 pTextObj->m_TextState.GetObject()->m_CharSpace); | 93 pTextObj->m_TextState.GetObject()->m_CharSpace); |
79 baseSpace = spacing; | 94 baseSpace = spacing; |
80 for (int i = 0; i < nItems; i++) { | 95 for (int i = 0; i < nItems; i++) { |
81 CPDF_TextObjectItem item; | 96 CPDF_TextObjectItem item; |
82 pTextObj->GetItemInfo(i, &item); | 97 pTextObj->GetItemInfo(i, &item); |
83 if (item.m_CharCode == (uint32_t)-1) { | 98 if (item.m_CharCode == (uint32_t)-1) { |
84 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 99 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
85 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; | 100 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; |
86 baseSpace = std::min(baseSpace, kerning + spacing); | 101 baseSpace = std::min(baseSpace, kerning + spacing); |
87 bAllChar = FALSE; | 102 bAllChar = FALSE; |
88 } | 103 } |
89 } | 104 } |
90 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 105 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
91 baseSpace = 0.0; | 106 baseSpace = 0.0; |
92 } | 107 } |
93 } | 108 } |
94 return baseSpace; | 109 return baseSpace; |
95 } | 110 } |
96 | 111 |
97 const FX_FLOAT kDefaultFontSize = 1.0f; | 112 FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) { |
| 113 wch = wch & 0xFFFF; |
| 114 FX_WCHAR wFind = g_UnicodeData_Normalization[wch]; |
| 115 if (!wFind) { |
| 116 if (pDst) { |
| 117 *pDst = wch; |
| 118 } |
| 119 return 1; |
| 120 } |
| 121 if (wFind >= 0x8000) { |
| 122 wch = wFind - 0x8000; |
| 123 wFind = 1; |
| 124 } else { |
| 125 wch = wFind & 0x0FFF; |
| 126 wFind >>= 12; |
| 127 } |
| 128 const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind]; |
| 129 if (pMap == g_UnicodeData_Normalization_Map4) { |
| 130 pMap = g_UnicodeData_Normalization_Map4 + wch; |
| 131 wFind = (FX_WCHAR)(*pMap++); |
| 132 } else { |
| 133 pMap += wch; |
| 134 } |
| 135 if (pDst) { |
| 136 FX_WCHAR n = wFind; |
| 137 while (n--) { |
| 138 *pDst++ = *pMap++; |
| 139 } |
| 140 } |
| 141 return (FX_STRSIZE)wFind; |
| 142 } |
98 | 143 |
99 } // namespace | 144 } // namespace |
100 | 145 |
101 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | |
102 int flags) { | |
103 return new CPDF_TextPage(pPage, flags); | |
104 } | |
105 | |
106 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | |
107 const IPDF_TextPage* pTextPage) { | |
108 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; | |
109 } | |
110 | |
111 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | |
112 return new CPDF_LinkExtract(); | |
113 } | |
114 | |
115 #define TEXT_BLANK_CHAR L' ' | 146 #define TEXT_BLANK_CHAR L' ' |
116 #define TEXT_LINEFEED_CHAR L'\n' | 147 #define TEXT_LINEFEED_CHAR L'\n' |
117 #define TEXT_RETURN_CHAR L'\r' | 148 #define TEXT_RETURN_CHAR L'\r' |
118 #define TEXT_EMPTY L"" | 149 #define TEXT_EMPTY L"" |
119 #define TEXT_BLANK L" " | 150 #define TEXT_BLANK L" " |
120 #define TEXT_RETURN_LINEFEED L"\r\n" | 151 #define TEXT_RETURN_LINEFEED L"\r\n" |
121 #define TEXT_LINEFEED L"\n" | 152 #define TEXT_LINEFEED L"\n" |
122 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 153 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
123 | 154 |
124 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 155 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
(...skipping 800 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
925 } | 956 } |
926 } | 957 } |
927 } | 958 } |
928 | 959 |
929 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, | 960 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
930 PAGECHAR_INFO info) { | 961 PAGECHAR_INFO info) { |
931 if (!IsControlChar(info)) { | 962 if (!IsControlChar(info)) { |
932 info.m_Index = m_TextBuf.GetLength(); | 963 info.m_Index = m_TextBuf.GetLength(); |
933 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 964 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
934 FX_WCHAR* pDst = NULL; | 965 FX_WCHAR* pDst = NULL; |
935 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 966 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
936 if (nCount >= 1) { | 967 if (nCount >= 1) { |
937 pDst = FX_Alloc(FX_WCHAR, nCount); | 968 pDst = FX_Alloc(FX_WCHAR, nCount); |
938 FX_Unicode_GetNormalization(wChar, pDst); | 969 Unicode_GetNormalization(wChar, pDst); |
939 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 970 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
940 PAGECHAR_INFO info2 = info; | 971 PAGECHAR_INFO info2 = info; |
941 info2.m_Unicode = pDst[nIndex]; | 972 info2.m_Unicode = pDst[nIndex]; |
942 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 973 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
943 m_TextBuf.AppendChar(info2.m_Unicode); | 974 m_TextBuf.AppendChar(info2.m_Unicode); |
944 m_CharList.push_back(info2); | 975 m_CharList.push_back(info2); |
945 } | 976 } |
946 FX_Free(pDst); | 977 FX_Free(pDst); |
947 return; | 978 return; |
948 } | 979 } |
949 } | 980 } |
950 m_TextBuf.AppendChar(wChar); | 981 m_TextBuf.AppendChar(wChar); |
951 } else { | 982 } else { |
952 info.m_Index = -1; | 983 info.m_Index = -1; |
953 } | 984 } |
954 m_CharList.push_back(info); | 985 m_CharList.push_back(info); |
955 } | 986 } |
956 | 987 |
957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, | 988 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
958 PAGECHAR_INFO info) { | 989 PAGECHAR_INFO info) { |
959 if (!IsControlChar(info)) { | 990 if (!IsControlChar(info)) { |
960 info.m_Index = m_TextBuf.GetLength(); | 991 info.m_Index = m_TextBuf.GetLength(); |
961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | 992 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
962 FX_WCHAR* pDst = NULL; | 993 FX_WCHAR* pDst = NULL; |
963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 994 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); |
964 if (nCount >= 1) { | 995 if (nCount >= 1) { |
965 pDst = FX_Alloc(FX_WCHAR, nCount); | 996 pDst = FX_Alloc(FX_WCHAR, nCount); |
966 FX_Unicode_GetNormalization(wChar, pDst); | 997 Unicode_GetNormalization(wChar, pDst); |
967 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 998 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
968 PAGECHAR_INFO info2 = info; | 999 PAGECHAR_INFO info2 = info; |
969 info2.m_Unicode = pDst[nIndex]; | 1000 info2.m_Unicode = pDst[nIndex]; |
970 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1001 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
971 m_TextBuf.AppendChar(info2.m_Unicode); | 1002 m_TextBuf.AppendChar(info2.m_Unicode); |
972 m_CharList.push_back(info2); | 1003 m_CharList.push_back(info2); |
973 } | 1004 } |
974 FX_Free(pDst); | 1005 FX_Free(pDst); |
975 return; | 1006 return; |
976 } | 1007 } |
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1370 } | 1401 } |
1371 if (FPDFTEXT_MC_DELAY == bPreMKC) { | 1402 if (FPDFTEXT_MC_DELAY == bPreMKC) { |
1372 ProcessMarkedContent(Obj); | 1403 ProcessMarkedContent(Obj); |
1373 m_pPreTextObj = pTextObj; | 1404 m_pPreTextObj = pTextObj; |
1374 m_perMatrix.Copy(formMatrix); | 1405 m_perMatrix.Copy(formMatrix); |
1375 return; | 1406 return; |
1376 } | 1407 } |
1377 m_pPreTextObj = pTextObj; | 1408 m_pPreTextObj = pTextObj; |
1378 m_perMatrix.Copy(formMatrix); | 1409 m_perMatrix.Copy(formMatrix); |
1379 int nItems = pTextObj->CountItems(); | 1410 int nItems = pTextObj->CountItems(); |
1380 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1411 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix); |
1381 | 1412 |
1382 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1413 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
1383 const FX_BOOL bIsBidiAndMirrorInverse = | 1414 const FX_BOOL bIsBidiAndMirrorInverse = |
1384 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1415 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
1385 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1416 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
1386 int32_t iCharListStartAppend = | 1417 int32_t iCharListStartAppend = |
1387 pdfium::CollectionSize<int32_t>(m_TempCharList); | 1418 pdfium::CollectionSize<int32_t>(m_TempCharList); |
1388 | 1419 |
1389 FX_FLOAT spacing = 0; | 1420 FX_FLOAT spacing = 0; |
1390 for (int i = 0; i < nItems; i++) { | 1421 for (int i = 0; i < nItems; i++) { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1423 if (threshold > fontsize_h / 3) { | 1454 if (threshold > fontsize_h / 3) { |
1424 threshold = 0; | 1455 threshold = 0; |
1425 } else { | 1456 } else { |
1426 threshold /= 2; | 1457 threshold /= 2; |
1427 } | 1458 } |
1428 if (threshold == 0) { | 1459 if (threshold == 0) { |
1429 threshold = fontsize_h; | 1460 threshold = fontsize_h; |
1430 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); | 1461 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
1431 threshold = this_width > last_width ? (FX_FLOAT)this_width | 1462 threshold = this_width > last_width ? (FX_FLOAT)this_width |
1432 : (FX_FLOAT)last_width; | 1463 : (FX_FLOAT)last_width; |
1433 threshold = _NormalizeThreshold(threshold); | 1464 threshold = NormalizeThreshold(threshold); |
1434 threshold = fontsize_h * threshold / 1000; | 1465 threshold = fontsize_h * threshold / 1000; |
1435 } | 1466 } |
1436 if (threshold && (spacing && spacing >= threshold)) { | 1467 if (threshold && (spacing && spacing >= threshold)) { |
1437 charinfo.m_Unicode = TEXT_BLANK_CHAR; | 1468 charinfo.m_Unicode = TEXT_BLANK_CHAR; |
1438 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; | 1469 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; |
1439 charinfo.m_pTextObj = pTextObj; | 1470 charinfo.m_pTextObj = pTextObj; |
1440 charinfo.m_Index = m_TextBuf.GetLength(); | 1471 charinfo.m_Index = m_TextBuf.GetLength(); |
1441 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1472 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
1442 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; | 1473 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; |
1443 charinfo.m_Matrix.Copy(formMatrix); | 1474 charinfo.m_Matrix.Copy(formMatrix); |
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1891 } | 1922 } |
1892 if (unicode > L'Z' && unicode < L'a') { | 1923 if (unicode > L'Z' && unicode < L'a') { |
1893 return FALSE; | 1924 return FALSE; |
1894 } | 1925 } |
1895 if (unicode > L'z') { | 1926 if (unicode > L'z') { |
1896 return FALSE; | 1927 return FALSE; |
1897 } | 1928 } |
1898 return TRUE; | 1929 return TRUE; |
1899 } | 1930 } |
1900 | 1931 |
1901 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) | 1932 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) |
1902 : m_pTextPage(pTextPage), | 1933 : m_pTextPage(pTextPage), |
1903 m_flags(0), | 1934 m_flags(0), |
1904 m_findNextStart(-1), | 1935 m_findNextStart(-1), |
1905 m_findPreStart(-1), | 1936 m_findPreStart(-1), |
1906 m_bMatchCase(FALSE), | 1937 m_bMatchCase(FALSE), |
1907 m_bMatchWholeWord(FALSE), | 1938 m_bMatchWholeWord(FALSE), |
1908 m_resStart(0), | 1939 m_resStart(0), |
1909 m_resEnd(-1), | 1940 m_resEnd(-1), |
1910 m_IsFind(FALSE) { | 1941 m_IsFind(FALSE) { |
1911 m_strText = m_pTextPage->GetPageText(); | 1942 m_strText = m_pTextPage->GetPageText(); |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2047 if (iWord == 0) { | 2078 if (iWord == 0) { |
2048 m_resStart = nResultPos; | 2079 m_resStart = nResultPos; |
2049 } | 2080 } |
2050 FX_BOOL bMatch = TRUE; | 2081 FX_BOOL bMatch = TRUE; |
2051 if (iWord != 0 && !bSpaceStart) { | 2082 if (iWord != 0 && !bSpaceStart) { |
2052 int PreResEndPos = nStartPos; | 2083 int PreResEndPos = nStartPos; |
2053 int curChar = csWord.GetAt(0); | 2084 int curChar = csWord.GetAt(0); |
2054 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; | 2085 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; |
2055 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); | 2086 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); |
2056 if (nStartPos == nResultPos && | 2087 if (nStartPos == nResultPos && |
2057 !(_IsIgnoreSpaceCharacter(lastChar) || | 2088 !(IsIgnoreSpaceCharacter(lastChar) || |
2058 _IsIgnoreSpaceCharacter(curChar))) { | 2089 IsIgnoreSpaceCharacter(curChar))) { |
2059 bMatch = FALSE; | 2090 bMatch = FALSE; |
2060 } | 2091 } |
2061 for (int d = PreResEndPos; d < nResultPos; d++) { | 2092 for (int d = PreResEndPos; d < nResultPos; d++) { |
2062 FX_WCHAR strInsert = m_strText.GetAt(d); | 2093 FX_WCHAR strInsert = m_strText.GetAt(d); |
2063 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && | 2094 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && |
2064 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | 2095 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
2065 bMatch = FALSE; | 2096 bMatch = FALSE; |
2066 break; | 2097 break; |
2067 } | 2098 } |
2068 } | 2099 } |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2167 index++; | 2198 index++; |
2168 continue; | 2199 continue; |
2169 } else { | 2200 } else { |
2170 break; | 2201 break; |
2171 } | 2202 } |
2172 } | 2203 } |
2173 int pos = 0; | 2204 int pos = 0; |
2174 while (pos < csWord.GetLength()) { | 2205 while (pos < csWord.GetLength()) { |
2175 CFX_WideString curStr = csWord.Mid(pos, 1); | 2206 CFX_WideString curStr = csWord.Mid(pos, 1); |
2176 FX_WCHAR curChar = csWord.GetAt(pos); | 2207 FX_WCHAR curChar = csWord.GetAt(pos); |
2177 if (_IsIgnoreSpaceCharacter(curChar)) { | 2208 if (IsIgnoreSpaceCharacter(curChar)) { |
2178 if (pos > 0 && curChar == 0x2019) { | 2209 if (pos > 0 && curChar == 0x2019) { |
2179 pos++; | 2210 pos++; |
2180 continue; | 2211 continue; |
2181 } | 2212 } |
2182 if (pos > 0) { | 2213 if (pos > 0) { |
2183 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); | 2214 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); |
2184 } | 2215 } |
2185 m_csFindWhatArray.push_back(curStr); | 2216 m_csFindWhatArray.push_back(curStr); |
2186 if (pos == csWord.GetLength() - 1) { | 2217 if (pos == csWord.GetLength() - 1) { |
2187 csWord.clear(); | 2218 csWord.clear(); |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2299 return resEnd - resStart + 1; | 2330 return resEnd - resStart + 1; |
2300 } | 2331 } |
2301 | 2332 |
2302 CPDF_LinkExtract::CPDF_LinkExtract() | 2333 CPDF_LinkExtract::CPDF_LinkExtract() |
2303 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} |
2304 | 2335 |
2305 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { |
2306 DeleteLinkList(); | 2337 DeleteLinkList(); |
2307 } | 2338 } |
2308 | 2339 |
2309 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { | 2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { |
2310 if (!pTextPage || !pTextPage->IsParsed()) | 2341 if (!pTextPage || !pTextPage->IsParsed()) |
2311 return FALSE; | 2342 return FALSE; |
2312 | 2343 |
2313 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
2314 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2345 m_strPageText = m_pTextPage->GetPageText(0, -1); |
2315 DeleteLinkList(); | 2346 DeleteLinkList(); |
2316 if (m_strPageText.IsEmpty()) { | 2347 if (m_strPageText.IsEmpty()) { |
2317 return FALSE; | 2348 return FALSE; |
2318 } | 2349 } |
2319 ParseLink(); | 2350 ParseLink(); |
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2510 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2511 return; | 2542 return; |
2512 } | 2543 } |
2513 CPDF_LinkExt* link = NULL; | 2544 CPDF_LinkExt* link = NULL; |
2514 link = m_LinkList.GetAt(index); | 2545 link = m_LinkList.GetAt(index); |
2515 if (!link) { | 2546 if (!link) { |
2516 return; | 2547 return; |
2517 } | 2548 } |
2518 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2519 } | 2550 } |
OLD | NEW |