Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 1897993002: Remove IPDF_TextPage, IPDF_TextPageFind and IPDF_LinkExtract interfaces. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/fpdftext/fpdf_text_int.h ('k') | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/fpdftext/fpdf_text_int.h"
8
9 #include <algorithm> 7 #include <algorithm>
10 #include <cctype> 8 #include <cctype>
11 #include <cwctype> 9 #include <cwctype>
12 #include <memory> 10 #include <memory>
13 #include <utility> 11 #include <utility>
14 #include <vector> 12 #include <vector>
15 13
16 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" 14 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h"
15 #include "core/fpdfapi/fpdf_page/include/cpdf_form.h"
17 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" 16 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h"
17 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" 18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h"
19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" 19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h"
20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" 20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h"
21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h"
22 #include "core/fpdftext/include/ipdf_linkextract.h" 22 #include "core/fpdftext/include/cpdf_linkextract.h"
23 #include "core/fpdftext/include/ipdf_textpage.h" 23 #include "core/fpdftext/include/cpdf_textpage.h"
24 #include "core/fpdftext/include/ipdf_textpagefind.h" 24 #include "core/fpdftext/include/cpdf_textpagefind.h"
25 #include "core/fpdftext/unicodenormalization.h" 25 #include "core/fpdftext/unicodenormalizationdata.h"
26 #include "core/fxcrt/fx_bidi.h" 26 #include "core/fxcrt/fx_bidi.h"
27 #include "core/fxcrt/include/fx_ext.h" 27 #include "core/fxcrt/include/fx_ext.h"
28 #include "core/fxcrt/include/fx_ucd.h" 28 #include "core/fxcrt/include/fx_ucd.h"
29 #include "third_party/base/stl_util.h" 29 #include "third_party/base/stl_util.h"
30 30
31 #define FPDFTEXT_RLTB 1 31 #define FPDFTEXT_RLTB 1
32 #define FPDFTEXT_LEFT -1 32 #define FPDFTEXT_LEFT -1
33 #define FPDFTEXT_RIGHT 1 33 #define FPDFTEXT_RIGHT 1
34 34
35 #define FPDFTEXT_MATCHCASE 0x00000001 35 #define FPDFTEXT_MATCHCASE 0x00000001
36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
37 #define FPDFTEXT_CONSECUTIVE 0x00000004 37 #define FPDFTEXT_CONSECUTIVE 0x00000004
38 38
39 #define FPDFTEXT_CHAR_ERROR -1
40 #define FPDFTEXT_CHAR_NORMAL 0
41 #define FPDFTEXT_CHAR_GENERATED 1
42 #define FPDFTEXT_CHAR_UNUNICODE 2
43 #define FPDFTEXT_CHAR_HYPHEN 3
44 #define FPDFTEXT_CHAR_PIECE 4
45 #define FPDFTEXT_MC_PASS 0
46 #define FPDFTEXT_MC_DONE 1
47 #define FPDFTEXT_MC_DELAY 2
48
39 namespace { 49 namespace {
40 50
41 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { 51 const FX_FLOAT kDefaultFontSize = 1.0f;
52 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = {
53 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
54 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
55
56 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
42 if (curChar < 255) { 57 if (curChar < 255) {
43 return FALSE; 58 return FALSE;
44 } 59 }
45 if ((curChar >= 0x0600 && curChar <= 0x06FF) || 60 if ((curChar >= 0x0600 && curChar <= 0x06FF) ||
46 (curChar >= 0xFE70 && curChar <= 0xFEFF) || 61 (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
47 (curChar >= 0xFB50 && curChar <= 0xFDFF) || 62 (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
48 (curChar >= 0x0400 && curChar <= 0x04FF) || 63 (curChar >= 0x0400 && curChar <= 0x04FF) ||
49 (curChar >= 0x0500 && curChar <= 0x052F) || 64 (curChar >= 0x0500 && curChar <= 0x052F) ||
50 (curChar >= 0xA640 && curChar <= 0xA69F) || 65 (curChar >= 0xA640 && curChar <= 0xA69F) ||
51 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || 66 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
52 (curChar >= 0x2000 && curChar <= 0x206F)) { 67 (curChar >= 0x2000 && curChar <= 0x206F)) {
53 return FALSE; 68 return FALSE;
54 } 69 }
55 return TRUE; 70 return TRUE;
56 } 71 }
57 72
58 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { 73 FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) {
59 if (threshold < 300) { 74 if (threshold < 300) {
60 return threshold / 2.0f; 75 return threshold / 2.0f;
61 } 76 }
62 if (threshold < 500) { 77 if (threshold < 500) {
63 return threshold / 4.0f; 78 return threshold / 4.0f;
64 } 79 }
65 if (threshold < 700) { 80 if (threshold < 700) {
66 return threshold / 5.0f; 81 return threshold / 5.0f;
67 } 82 }
68 return threshold / 6.0f; 83 return threshold / 6.0f;
69 } 84 }
70 85
71 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, 86 FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj,
72 const CFX_Matrix& matrix) { 87 const CFX_Matrix& matrix) {
73 FX_FLOAT baseSpace = 0.0; 88 FX_FLOAT baseSpace = 0.0;
74 const int nItems = pTextObj->CountItems(); 89 const int nItems = pTextObj->CountItems();
75 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { 90 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
76 FX_BOOL bAllChar = TRUE; 91 FX_BOOL bAllChar = TRUE;
77 FX_FLOAT spacing = matrix.TransformDistance( 92 FX_FLOAT spacing = matrix.TransformDistance(
78 pTextObj->m_TextState.GetObject()->m_CharSpace); 93 pTextObj->m_TextState.GetObject()->m_CharSpace);
79 baseSpace = spacing; 94 baseSpace = spacing;
80 for (int i = 0; i < nItems; i++) { 95 for (int i = 0; i < nItems; i++) {
81 CPDF_TextObjectItem item; 96 CPDF_TextObjectItem item;
82 pTextObj->GetItemInfo(i, &item); 97 pTextObj->GetItemInfo(i, &item);
83 if (item.m_CharCode == (uint32_t)-1) { 98 if (item.m_CharCode == (uint32_t)-1) {
84 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); 99 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
85 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; 100 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
86 baseSpace = std::min(baseSpace, kerning + spacing); 101 baseSpace = std::min(baseSpace, kerning + spacing);
87 bAllChar = FALSE; 102 bAllChar = FALSE;
88 } 103 }
89 } 104 }
90 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { 105 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
91 baseSpace = 0.0; 106 baseSpace = 0.0;
92 } 107 }
93 } 108 }
94 return baseSpace; 109 return baseSpace;
95 } 110 }
96 111
97 const FX_FLOAT kDefaultFontSize = 1.0f; 112 FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) {
113 wch = wch & 0xFFFF;
114 FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
115 if (!wFind) {
116 if (pDst) {
117 *pDst = wch;
118 }
119 return 1;
120 }
121 if (wFind >= 0x8000) {
122 wch = wFind - 0x8000;
123 wFind = 1;
124 } else {
125 wch = wFind & 0x0FFF;
126 wFind >>= 12;
127 }
128 const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind];
129 if (pMap == g_UnicodeData_Normalization_Map4) {
130 pMap = g_UnicodeData_Normalization_Map4 + wch;
131 wFind = (FX_WCHAR)(*pMap++);
132 } else {
133 pMap += wch;
134 }
135 if (pDst) {
136 FX_WCHAR n = wFind;
137 while (n--) {
138 *pDst++ = *pMap++;
139 }
140 }
141 return (FX_STRSIZE)wFind;
142 }
98 143
99 } // namespace 144 } // namespace
100 145
101 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
102 int flags) {
103 return new CPDF_TextPage(pPage, flags);
104 }
105
106 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
107 const IPDF_TextPage* pTextPage) {
108 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
109 }
110
111 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
112 return new CPDF_LinkExtract();
113 }
114
115 #define TEXT_BLANK_CHAR L' ' 146 #define TEXT_BLANK_CHAR L' '
116 #define TEXT_LINEFEED_CHAR L'\n' 147 #define TEXT_LINEFEED_CHAR L'\n'
117 #define TEXT_RETURN_CHAR L'\r' 148 #define TEXT_RETURN_CHAR L'\r'
118 #define TEXT_EMPTY L"" 149 #define TEXT_EMPTY L""
119 #define TEXT_BLANK L" " 150 #define TEXT_BLANK L" "
120 #define TEXT_RETURN_LINEFEED L"\r\n" 151 #define TEXT_RETURN_LINEFEED L"\r\n"
121 #define TEXT_LINEFEED L"\n" 152 #define TEXT_LINEFEED L"\n"
122 #define TEXT_CHARRATIO_GAPDELTA 0.070 153 #define TEXT_CHARRATIO_GAPDELTA 0.070
123 154
124 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 155 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
(...skipping 800 matching lines...) Expand 10 before | Expand all | Expand 10 after
925 } 956 }
926 } 957 }
927 } 958 }
928 959
929 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, 960 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
930 PAGECHAR_INFO info) { 961 PAGECHAR_INFO info) {
931 if (!IsControlChar(info)) { 962 if (!IsControlChar(info)) {
932 info.m_Index = m_TextBuf.GetLength(); 963 info.m_Index = m_TextBuf.GetLength();
933 if (wChar >= 0xFB00 && wChar <= 0xFB06) { 964 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
934 FX_WCHAR* pDst = NULL; 965 FX_WCHAR* pDst = NULL;
935 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 966 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
936 if (nCount >= 1) { 967 if (nCount >= 1) {
937 pDst = FX_Alloc(FX_WCHAR, nCount); 968 pDst = FX_Alloc(FX_WCHAR, nCount);
938 FX_Unicode_GetNormalization(wChar, pDst); 969 Unicode_GetNormalization(wChar, pDst);
939 for (int nIndex = 0; nIndex < nCount; nIndex++) { 970 for (int nIndex = 0; nIndex < nCount; nIndex++) {
940 PAGECHAR_INFO info2 = info; 971 PAGECHAR_INFO info2 = info;
941 info2.m_Unicode = pDst[nIndex]; 972 info2.m_Unicode = pDst[nIndex];
942 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 973 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
943 m_TextBuf.AppendChar(info2.m_Unicode); 974 m_TextBuf.AppendChar(info2.m_Unicode);
944 m_CharList.push_back(info2); 975 m_CharList.push_back(info2);
945 } 976 }
946 FX_Free(pDst); 977 FX_Free(pDst);
947 return; 978 return;
948 } 979 }
949 } 980 }
950 m_TextBuf.AppendChar(wChar); 981 m_TextBuf.AppendChar(wChar);
951 } else { 982 } else {
952 info.m_Index = -1; 983 info.m_Index = -1;
953 } 984 }
954 m_CharList.push_back(info); 985 m_CharList.push_back(info);
955 } 986 }
956 987
957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, 988 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
958 PAGECHAR_INFO info) { 989 PAGECHAR_INFO info) {
959 if (!IsControlChar(info)) { 990 if (!IsControlChar(info)) {
960 info.m_Index = m_TextBuf.GetLength(); 991 info.m_Index = m_TextBuf.GetLength();
961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); 992 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
962 FX_WCHAR* pDst = NULL; 993 FX_WCHAR* pDst = NULL;
963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 994 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
964 if (nCount >= 1) { 995 if (nCount >= 1) {
965 pDst = FX_Alloc(FX_WCHAR, nCount); 996 pDst = FX_Alloc(FX_WCHAR, nCount);
966 FX_Unicode_GetNormalization(wChar, pDst); 997 Unicode_GetNormalization(wChar, pDst);
967 for (int nIndex = 0; nIndex < nCount; nIndex++) { 998 for (int nIndex = 0; nIndex < nCount; nIndex++) {
968 PAGECHAR_INFO info2 = info; 999 PAGECHAR_INFO info2 = info;
969 info2.m_Unicode = pDst[nIndex]; 1000 info2.m_Unicode = pDst[nIndex];
970 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 1001 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
971 m_TextBuf.AppendChar(info2.m_Unicode); 1002 m_TextBuf.AppendChar(info2.m_Unicode);
972 m_CharList.push_back(info2); 1003 m_CharList.push_back(info2);
973 } 1004 }
974 FX_Free(pDst); 1005 FX_Free(pDst);
975 return; 1006 return;
976 } 1007 }
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after
1370 } 1401 }
1371 if (FPDFTEXT_MC_DELAY == bPreMKC) { 1402 if (FPDFTEXT_MC_DELAY == bPreMKC) {
1372 ProcessMarkedContent(Obj); 1403 ProcessMarkedContent(Obj);
1373 m_pPreTextObj = pTextObj; 1404 m_pPreTextObj = pTextObj;
1374 m_perMatrix.Copy(formMatrix); 1405 m_perMatrix.Copy(formMatrix);
1375 return; 1406 return;
1376 } 1407 }
1377 m_pPreTextObj = pTextObj; 1408 m_pPreTextObj = pTextObj;
1378 m_perMatrix.Copy(formMatrix); 1409 m_perMatrix.Copy(formMatrix);
1379 int nItems = pTextObj->CountItems(); 1410 int nItems = pTextObj->CountItems();
1380 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); 1411 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix);
1381 1412
1382 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); 1413 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems);
1383 const FX_BOOL bIsBidiAndMirrorInverse = 1414 const FX_BOOL bIsBidiAndMirrorInverse =
1384 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; 1415 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0;
1385 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); 1416 int32_t iBufStartAppend = m_TempTextBuf.GetLength();
1386 int32_t iCharListStartAppend = 1417 int32_t iCharListStartAppend =
1387 pdfium::CollectionSize<int32_t>(m_TempCharList); 1418 pdfium::CollectionSize<int32_t>(m_TempCharList);
1388 1419
1389 FX_FLOAT spacing = 0; 1420 FX_FLOAT spacing = 0;
1390 for (int i = 0; i < nItems; i++) { 1421 for (int i = 0; i < nItems; i++) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1423 if (threshold > fontsize_h / 3) { 1454 if (threshold > fontsize_h / 3) {
1424 threshold = 0; 1455 threshold = 0;
1425 } else { 1456 } else {
1426 threshold /= 2; 1457 threshold /= 2;
1427 } 1458 }
1428 if (threshold == 0) { 1459 if (threshold == 0) {
1429 threshold = fontsize_h; 1460 threshold = fontsize_h;
1430 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); 1461 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
1431 threshold = this_width > last_width ? (FX_FLOAT)this_width 1462 threshold = this_width > last_width ? (FX_FLOAT)this_width
1432 : (FX_FLOAT)last_width; 1463 : (FX_FLOAT)last_width;
1433 threshold = _NormalizeThreshold(threshold); 1464 threshold = NormalizeThreshold(threshold);
1434 threshold = fontsize_h * threshold / 1000; 1465 threshold = fontsize_h * threshold / 1000;
1435 } 1466 }
1436 if (threshold && (spacing && spacing >= threshold)) { 1467 if (threshold && (spacing && spacing >= threshold)) {
1437 charinfo.m_Unicode = TEXT_BLANK_CHAR; 1468 charinfo.m_Unicode = TEXT_BLANK_CHAR;
1438 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; 1469 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
1439 charinfo.m_pTextObj = pTextObj; 1470 charinfo.m_pTextObj = pTextObj;
1440 charinfo.m_Index = m_TextBuf.GetLength(); 1471 charinfo.m_Index = m_TextBuf.GetLength();
1441 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1472 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
1442 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; 1473 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode;
1443 charinfo.m_Matrix.Copy(formMatrix); 1474 charinfo.m_Matrix.Copy(formMatrix);
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
1891 } 1922 }
1892 if (unicode > L'Z' && unicode < L'a') { 1923 if (unicode > L'Z' && unicode < L'a') {
1893 return FALSE; 1924 return FALSE;
1894 } 1925 }
1895 if (unicode > L'z') { 1926 if (unicode > L'z') {
1896 return FALSE; 1927 return FALSE;
1897 } 1928 }
1898 return TRUE; 1929 return TRUE;
1899 } 1930 }
1900 1931
1901 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) 1932 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
1902 : m_pTextPage(pTextPage), 1933 : m_pTextPage(pTextPage),
1903 m_flags(0), 1934 m_flags(0),
1904 m_findNextStart(-1), 1935 m_findNextStart(-1),
1905 m_findPreStart(-1), 1936 m_findPreStart(-1),
1906 m_bMatchCase(FALSE), 1937 m_bMatchCase(FALSE),
1907 m_bMatchWholeWord(FALSE), 1938 m_bMatchWholeWord(FALSE),
1908 m_resStart(0), 1939 m_resStart(0),
1909 m_resEnd(-1), 1940 m_resEnd(-1),
1910 m_IsFind(FALSE) { 1941 m_IsFind(FALSE) {
1911 m_strText = m_pTextPage->GetPageText(); 1942 m_strText = m_pTextPage->GetPageText();
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
2047 if (iWord == 0) { 2078 if (iWord == 0) {
2048 m_resStart = nResultPos; 2079 m_resStart = nResultPos;
2049 } 2080 }
2050 FX_BOOL bMatch = TRUE; 2081 FX_BOOL bMatch = TRUE;
2051 if (iWord != 0 && !bSpaceStart) { 2082 if (iWord != 0 && !bSpaceStart) {
2052 int PreResEndPos = nStartPos; 2083 int PreResEndPos = nStartPos;
2053 int curChar = csWord.GetAt(0); 2084 int curChar = csWord.GetAt(0);
2054 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; 2085 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
2055 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); 2086 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
2056 if (nStartPos == nResultPos && 2087 if (nStartPos == nResultPos &&
2057 !(_IsIgnoreSpaceCharacter(lastChar) || 2088 !(IsIgnoreSpaceCharacter(lastChar) ||
2058 _IsIgnoreSpaceCharacter(curChar))) { 2089 IsIgnoreSpaceCharacter(curChar))) {
2059 bMatch = FALSE; 2090 bMatch = FALSE;
2060 } 2091 }
2061 for (int d = PreResEndPos; d < nResultPos; d++) { 2092 for (int d = PreResEndPos; d < nResultPos; d++) {
2062 FX_WCHAR strInsert = m_strText.GetAt(d); 2093 FX_WCHAR strInsert = m_strText.GetAt(d);
2063 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && 2094 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR &&
2064 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { 2095 strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
2065 bMatch = FALSE; 2096 bMatch = FALSE;
2066 break; 2097 break;
2067 } 2098 }
2068 } 2099 }
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
2167 index++; 2198 index++;
2168 continue; 2199 continue;
2169 } else { 2200 } else {
2170 break; 2201 break;
2171 } 2202 }
2172 } 2203 }
2173 int pos = 0; 2204 int pos = 0;
2174 while (pos < csWord.GetLength()) { 2205 while (pos < csWord.GetLength()) {
2175 CFX_WideString curStr = csWord.Mid(pos, 1); 2206 CFX_WideString curStr = csWord.Mid(pos, 1);
2176 FX_WCHAR curChar = csWord.GetAt(pos); 2207 FX_WCHAR curChar = csWord.GetAt(pos);
2177 if (_IsIgnoreSpaceCharacter(curChar)) { 2208 if (IsIgnoreSpaceCharacter(curChar)) {
2178 if (pos > 0 && curChar == 0x2019) { 2209 if (pos > 0 && curChar == 0x2019) {
2179 pos++; 2210 pos++;
2180 continue; 2211 continue;
2181 } 2212 }
2182 if (pos > 0) { 2213 if (pos > 0) {
2183 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); 2214 m_csFindWhatArray.push_back(csWord.Mid(0, pos));
2184 } 2215 }
2185 m_csFindWhatArray.push_back(curStr); 2216 m_csFindWhatArray.push_back(curStr);
2186 if (pos == csWord.GetLength() - 1) { 2217 if (pos == csWord.GetLength() - 1) {
2187 csWord.clear(); 2218 csWord.clear();
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
2299 return resEnd - resStart + 1; 2330 return resEnd - resStart + 1;
2300 } 2331 }
2301 2332
2302 CPDF_LinkExtract::CPDF_LinkExtract() 2333 CPDF_LinkExtract::CPDF_LinkExtract()
2303 : m_pTextPage(nullptr), m_bIsParsed(false) {} 2334 : m_pTextPage(nullptr), m_bIsParsed(false) {}
2304 2335
2305 CPDF_LinkExtract::~CPDF_LinkExtract() { 2336 CPDF_LinkExtract::~CPDF_LinkExtract() {
2306 DeleteLinkList(); 2337 DeleteLinkList();
2307 } 2338 }
2308 2339
2309 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { 2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {
2310 if (!pTextPage || !pTextPage->IsParsed()) 2341 if (!pTextPage || !pTextPage->IsParsed())
2311 return FALSE; 2342 return FALSE;
2312 2343
2313 m_pTextPage = (const CPDF_TextPage*)pTextPage; 2344 m_pTextPage = (const CPDF_TextPage*)pTextPage;
2314 m_strPageText = m_pTextPage->GetPageText(0, -1); 2345 m_strPageText = m_pTextPage->GetPageText(0, -1);
2315 DeleteLinkList(); 2346 DeleteLinkList();
2316 if (m_strPageText.IsEmpty()) { 2347 if (m_strPageText.IsEmpty()) {
2317 return FALSE; 2348 return FALSE;
2318 } 2349 }
2319 ParseLink(); 2350 ParseLink();
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
2510 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2511 return; 2542 return;
2512 } 2543 }
2513 CPDF_LinkExt* link = NULL; 2544 CPDF_LinkExt* link = NULL;
2514 link = m_LinkList.GetAt(index); 2545 link = m_LinkList.GetAt(index);
2515 if (!link) { 2546 if (!link) {
2516 return; 2547 return;
2517 } 2548 }
2518 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2519 } 2550 }
OLDNEW
« no previous file with comments | « core/fpdftext/fpdf_text_int.h ('k') | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698