Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: core/fpdftext/fpdf_text_int.cpp

Issue 1897993002: Remove IPDF_TextPage, IPDF_TextPageFind and IPDF_LinkExtract interfaces. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Fix? Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « core/fpdftext/fpdf_text_int.h ('k') | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/fpdftext/fpdf_text_int.h"
8
9 #include <algorithm> 7 #include <algorithm>
10 #include <cctype> 8 #include <cctype>
11 #include <cwctype> 9 #include <cwctype>
12 #include <memory> 10 #include <memory>
13 #include <utility> 11 #include <utility>
14 #include <vector> 12 #include <vector>
15 13
16 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h" 14 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h"
15 #include "core/fpdfapi/fpdf_page/include/cpdf_form.h"
17 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h" 16 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h"
17 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h" 18 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h"
19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h" 19 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h"
20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" 20 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h"
21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h" 21 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h"
22 #include "core/fpdftext/include/ipdf_linkextract.h" 22 #include "core/fpdftext/include/cpdf_linkextract.h"
23 #include "core/fpdftext/include/ipdf_textpage.h" 23 #include "core/fpdftext/include/cpdf_textpage.h"
24 #include "core/fpdftext/include/ipdf_textpagefind.h" 24 #include "core/fpdftext/include/cpdf_textpagefind.h"
25 #include "core/fpdftext/unicodenormalization.h" 25 #include "core/fpdftext/unicodenormalizationdata.h"
26 #include "core/fxcrt/fx_bidi.h" 26 #include "core/fxcrt/fx_bidi.h"
27 #include "core/fxcrt/include/fx_ext.h" 27 #include "core/fxcrt/include/fx_ext.h"
28 #include "core/fxcrt/include/fx_ucd.h" 28 #include "core/fxcrt/include/fx_ucd.h"
29 #include "third_party/base/stl_util.h" 29 #include "third_party/base/stl_util.h"
30 30
31 #define FPDFTEXT_RLTB 1 31 #define FPDFTEXT_RLTB 1
32 #define FPDFTEXT_LEFT -1 32 #define FPDFTEXT_LEFT -1
33 #define FPDFTEXT_RIGHT 1 33 #define FPDFTEXT_RIGHT 1
34 34
35 #define FPDFTEXT_MATCHCASE 0x00000001 35 #define FPDFTEXT_MATCHCASE 0x00000001
36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 36 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
37 #define FPDFTEXT_CONSECUTIVE 0x00000004 37 #define FPDFTEXT_CONSECUTIVE 0x00000004
38 38
39 #define FPDFTEXT_CHAR_ERROR -1
40 #define FPDFTEXT_CHAR_NORMAL 0
41 #define FPDFTEXT_CHAR_GENERATED 1
42 #define FPDFTEXT_CHAR_UNUNICODE 2
43 #define FPDFTEXT_CHAR_HYPHEN 3
44 #define FPDFTEXT_CHAR_PIECE 4
45 #define FPDFTEXT_MC_PASS 0
46 #define FPDFTEXT_MC_DONE 1
47 #define FPDFTEXT_MC_DELAY 2
48
39 namespace { 49 namespace {
40 50
41 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { 51 FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
42 if (curChar < 255) { 52 if (curChar < 255) {
43 return FALSE; 53 return FALSE;
44 } 54 }
45 if ((curChar >= 0x0600 && curChar <= 0x06FF) || 55 if ((curChar >= 0x0600 && curChar <= 0x06FF) ||
46 (curChar >= 0xFE70 && curChar <= 0xFEFF) || 56 (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
47 (curChar >= 0xFB50 && curChar <= 0xFDFF) || 57 (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
48 (curChar >= 0x0400 && curChar <= 0x04FF) || 58 (curChar >= 0x0400 && curChar <= 0x04FF) ||
49 (curChar >= 0x0500 && curChar <= 0x052F) || 59 (curChar >= 0x0500 && curChar <= 0x052F) ||
50 (curChar >= 0xA640 && curChar <= 0xA69F) || 60 (curChar >= 0xA640 && curChar <= 0xA69F) ||
51 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || 61 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
52 (curChar >= 0x2000 && curChar <= 0x206F)) { 62 (curChar >= 0x2000 && curChar <= 0x206F)) {
53 return FALSE; 63 return FALSE;
54 } 64 }
55 return TRUE; 65 return TRUE;
56 } 66 }
57 67
58 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { 68 FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) {
59 if (threshold < 300) { 69 if (threshold < 300) {
60 return threshold / 2.0f; 70 return threshold / 2.0f;
61 } 71 }
62 if (threshold < 500) { 72 if (threshold < 500) {
63 return threshold / 4.0f; 73 return threshold / 4.0f;
64 } 74 }
65 if (threshold < 700) { 75 if (threshold < 700) {
66 return threshold / 5.0f; 76 return threshold / 5.0f;
67 } 77 }
68 return threshold / 6.0f; 78 return threshold / 6.0f;
69 } 79 }
70 80
71 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, 81 FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj,
72 const CFX_Matrix& matrix) { 82 const CFX_Matrix& matrix) {
73 FX_FLOAT baseSpace = 0.0; 83 FX_FLOAT baseSpace = 0.0;
74 const int nItems = pTextObj->CountItems(); 84 const int nItems = pTextObj->CountItems();
75 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { 85 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
76 FX_BOOL bAllChar = TRUE; 86 FX_BOOL bAllChar = TRUE;
77 FX_FLOAT spacing = matrix.TransformDistance( 87 FX_FLOAT spacing = matrix.TransformDistance(
78 pTextObj->m_TextState.GetObject()->m_CharSpace); 88 pTextObj->m_TextState.GetObject()->m_CharSpace);
79 baseSpace = spacing; 89 baseSpace = spacing;
80 for (int i = 0; i < nItems; i++) { 90 for (int i = 0; i < nItems; i++) {
81 CPDF_TextObjectItem item; 91 CPDF_TextObjectItem item;
82 pTextObj->GetItemInfo(i, &item); 92 pTextObj->GetItemInfo(i, &item);
83 if (item.m_CharCode == (uint32_t)-1) { 93 if (item.m_CharCode == (uint32_t)-1) {
84 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); 94 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
85 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; 95 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
86 baseSpace = std::min(baseSpace, kerning + spacing); 96 baseSpace = std::min(baseSpace, kerning + spacing);
87 bAllChar = FALSE; 97 bAllChar = FALSE;
88 } 98 }
89 } 99 }
90 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { 100 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
91 baseSpace = 0.0; 101 baseSpace = 0.0;
92 } 102 }
93 } 103 }
94 return baseSpace; 104 return baseSpace;
95 } 105 }
96 106
107 const uint16_t* const g_UnicodeData_Normalization_Maps[5] = {
Tom Sepez 2016/04/18 22:50:13 nit: data should probably go ahead of the function
dsinclair 2016/04/19 13:08:07 Done.
108 nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
109 g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
110
111 FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) {
112 wch = wch & 0xFFFF;
113 FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
114 if (!wFind) {
115 if (pDst) {
116 *pDst = wch;
117 }
118 return 1;
119 }
120 if (wFind >= 0x8000) {
121 wch = wFind - 0x8000;
122 wFind = 1;
123 } else {
124 wch = wFind & 0x0FFF;
125 wFind >>= 12;
126 }
127 const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind];
128 if (pMap == g_UnicodeData_Normalization_Map4) {
129 pMap = g_UnicodeData_Normalization_Map4 + wch;
130 wFind = (FX_WCHAR)(*pMap++);
131 } else {
132 pMap += wch;
133 }
134 if (pDst) {
135 FX_WCHAR n = wFind;
136 while (n--) {
137 *pDst++ = *pMap++;
138 }
139 }
140 return (FX_STRSIZE)wFind;
141 }
142
97 const FX_FLOAT kDefaultFontSize = 1.0f; 143 const FX_FLOAT kDefaultFontSize = 1.0f;
98 144
99 } // namespace 145 } // namespace
100 146
101 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
102 int flags) {
103 return new CPDF_TextPage(pPage, flags);
104 }
105
106 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
107 const IPDF_TextPage* pTextPage) {
108 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
109 }
110
111 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
112 return new CPDF_LinkExtract();
113 }
114
115 #define TEXT_BLANK_CHAR L' ' 147 #define TEXT_BLANK_CHAR L' '
116 #define TEXT_LINEFEED_CHAR L'\n' 148 #define TEXT_LINEFEED_CHAR L'\n'
117 #define TEXT_RETURN_CHAR L'\r' 149 #define TEXT_RETURN_CHAR L'\r'
118 #define TEXT_EMPTY L"" 150 #define TEXT_EMPTY L""
119 #define TEXT_BLANK L" " 151 #define TEXT_BLANK L" "
120 #define TEXT_RETURN_LINEFEED L"\r\n" 152 #define TEXT_RETURN_LINEFEED L"\r\n"
121 #define TEXT_LINEFEED L"\n" 153 #define TEXT_LINEFEED L"\n"
122 #define TEXT_CHARRATIO_GAPDELTA 0.070 154 #define TEXT_CHARRATIO_GAPDELTA 0.070
123 155
124 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 156 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
(...skipping 800 matching lines...) Expand 10 before | Expand all | Expand 10 after
925 } 957 }
926 } 958 }
927 } 959 }
928 960
929 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, 961 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
930 PAGECHAR_INFO info) { 962 PAGECHAR_INFO info) {
931 if (!IsControlChar(info)) { 963 if (!IsControlChar(info)) {
932 info.m_Index = m_TextBuf.GetLength(); 964 info.m_Index = m_TextBuf.GetLength();
933 if (wChar >= 0xFB00 && wChar <= 0xFB06) { 965 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
934 FX_WCHAR* pDst = NULL; 966 FX_WCHAR* pDst = NULL;
935 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 967 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
936 if (nCount >= 1) { 968 if (nCount >= 1) {
937 pDst = FX_Alloc(FX_WCHAR, nCount); 969 pDst = FX_Alloc(FX_WCHAR, nCount);
938 FX_Unicode_GetNormalization(wChar, pDst); 970 Unicode_GetNormalization(wChar, pDst);
939 for (int nIndex = 0; nIndex < nCount; nIndex++) { 971 for (int nIndex = 0; nIndex < nCount; nIndex++) {
940 PAGECHAR_INFO info2 = info; 972 PAGECHAR_INFO info2 = info;
941 info2.m_Unicode = pDst[nIndex]; 973 info2.m_Unicode = pDst[nIndex];
942 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 974 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
943 m_TextBuf.AppendChar(info2.m_Unicode); 975 m_TextBuf.AppendChar(info2.m_Unicode);
944 m_CharList.push_back(info2); 976 m_CharList.push_back(info2);
945 } 977 }
946 FX_Free(pDst); 978 FX_Free(pDst);
947 return; 979 return;
948 } 980 }
949 } 981 }
950 m_TextBuf.AppendChar(wChar); 982 m_TextBuf.AppendChar(wChar);
951 } else { 983 } else {
952 info.m_Index = -1; 984 info.m_Index = -1;
953 } 985 }
954 m_CharList.push_back(info); 986 m_CharList.push_back(info);
955 } 987 }
956 988
957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, 989 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
958 PAGECHAR_INFO info) { 990 PAGECHAR_INFO info) {
959 if (!IsControlChar(info)) { 991 if (!IsControlChar(info)) {
960 info.m_Index = m_TextBuf.GetLength(); 992 info.m_Index = m_TextBuf.GetLength();
961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); 993 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
962 FX_WCHAR* pDst = NULL; 994 FX_WCHAR* pDst = NULL;
963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 995 FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
964 if (nCount >= 1) { 996 if (nCount >= 1) {
965 pDst = FX_Alloc(FX_WCHAR, nCount); 997 pDst = FX_Alloc(FX_WCHAR, nCount);
966 FX_Unicode_GetNormalization(wChar, pDst); 998 Unicode_GetNormalization(wChar, pDst);
967 for (int nIndex = 0; nIndex < nCount; nIndex++) { 999 for (int nIndex = 0; nIndex < nCount; nIndex++) {
968 PAGECHAR_INFO info2 = info; 1000 PAGECHAR_INFO info2 = info;
969 info2.m_Unicode = pDst[nIndex]; 1001 info2.m_Unicode = pDst[nIndex];
970 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 1002 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
971 m_TextBuf.AppendChar(info2.m_Unicode); 1003 m_TextBuf.AppendChar(info2.m_Unicode);
972 m_CharList.push_back(info2); 1004 m_CharList.push_back(info2);
973 } 1005 }
974 FX_Free(pDst); 1006 FX_Free(pDst);
975 return; 1007 return;
976 } 1008 }
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after
1370 } 1402 }
1371 if (FPDFTEXT_MC_DELAY == bPreMKC) { 1403 if (FPDFTEXT_MC_DELAY == bPreMKC) {
1372 ProcessMarkedContent(Obj); 1404 ProcessMarkedContent(Obj);
1373 m_pPreTextObj = pTextObj; 1405 m_pPreTextObj = pTextObj;
1374 m_perMatrix.Copy(formMatrix); 1406 m_perMatrix.Copy(formMatrix);
1375 return; 1407 return;
1376 } 1408 }
1377 m_pPreTextObj = pTextObj; 1409 m_pPreTextObj = pTextObj;
1378 m_perMatrix.Copy(formMatrix); 1410 m_perMatrix.Copy(formMatrix);
1379 int nItems = pTextObj->CountItems(); 1411 int nItems = pTextObj->CountItems();
1380 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); 1412 FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix);
1381 1413
1382 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); 1414 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems);
1383 const FX_BOOL bIsBidiAndMirrorInverse = 1415 const FX_BOOL bIsBidiAndMirrorInverse =
1384 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; 1416 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0;
1385 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); 1417 int32_t iBufStartAppend = m_TempTextBuf.GetLength();
1386 int32_t iCharListStartAppend = 1418 int32_t iCharListStartAppend =
1387 pdfium::CollectionSize<int32_t>(m_TempCharList); 1419 pdfium::CollectionSize<int32_t>(m_TempCharList);
1388 1420
1389 FX_FLOAT spacing = 0; 1421 FX_FLOAT spacing = 0;
1390 for (int i = 0; i < nItems; i++) { 1422 for (int i = 0; i < nItems; i++) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1423 if (threshold > fontsize_h / 3) { 1455 if (threshold > fontsize_h / 3) {
1424 threshold = 0; 1456 threshold = 0;
1425 } else { 1457 } else {
1426 threshold /= 2; 1458 threshold /= 2;
1427 } 1459 }
1428 if (threshold == 0) { 1460 if (threshold == 0) {
1429 threshold = fontsize_h; 1461 threshold = fontsize_h;
1430 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); 1462 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
1431 threshold = this_width > last_width ? (FX_FLOAT)this_width 1463 threshold = this_width > last_width ? (FX_FLOAT)this_width
1432 : (FX_FLOAT)last_width; 1464 : (FX_FLOAT)last_width;
1433 threshold = _NormalizeThreshold(threshold); 1465 threshold = NormalizeThreshold(threshold);
1434 threshold = fontsize_h * threshold / 1000; 1466 threshold = fontsize_h * threshold / 1000;
1435 } 1467 }
1436 if (threshold && (spacing && spacing >= threshold)) { 1468 if (threshold && (spacing && spacing >= threshold)) {
1437 charinfo.m_Unicode = TEXT_BLANK_CHAR; 1469 charinfo.m_Unicode = TEXT_BLANK_CHAR;
1438 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; 1470 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
1439 charinfo.m_pTextObj = pTextObj; 1471 charinfo.m_pTextObj = pTextObj;
1440 charinfo.m_Index = m_TextBuf.GetLength(); 1472 charinfo.m_Index = m_TextBuf.GetLength();
1441 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1473 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
1442 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; 1474 charinfo.m_CharCode = CPDF_Font::kInvalidCharCode;
1443 charinfo.m_Matrix.Copy(formMatrix); 1475 charinfo.m_Matrix.Copy(formMatrix);
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
1891 } 1923 }
1892 if (unicode > L'Z' && unicode < L'a') { 1924 if (unicode > L'Z' && unicode < L'a') {
1893 return FALSE; 1925 return FALSE;
1894 } 1926 }
1895 if (unicode > L'z') { 1927 if (unicode > L'z') {
1896 return FALSE; 1928 return FALSE;
1897 } 1929 }
1898 return TRUE; 1930 return TRUE;
1899 } 1931 }
1900 1932
1901 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) 1933 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
1902 : m_pTextPage(pTextPage), 1934 : m_pTextPage(pTextPage),
1903 m_flags(0), 1935 m_flags(0),
1904 m_findNextStart(-1), 1936 m_findNextStart(-1),
1905 m_findPreStart(-1), 1937 m_findPreStart(-1),
1906 m_bMatchCase(FALSE), 1938 m_bMatchCase(FALSE),
1907 m_bMatchWholeWord(FALSE), 1939 m_bMatchWholeWord(FALSE),
1908 m_resStart(0), 1940 m_resStart(0),
1909 m_resEnd(-1), 1941 m_resEnd(-1),
1910 m_IsFind(FALSE) { 1942 m_IsFind(FALSE) {
1911 m_strText = m_pTextPage->GetPageText(); 1943 m_strText = m_pTextPage->GetPageText();
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
2047 if (iWord == 0) { 2079 if (iWord == 0) {
2048 m_resStart = nResultPos; 2080 m_resStart = nResultPos;
2049 } 2081 }
2050 FX_BOOL bMatch = TRUE; 2082 FX_BOOL bMatch = TRUE;
2051 if (iWord != 0 && !bSpaceStart) { 2083 if (iWord != 0 && !bSpaceStart) {
2052 int PreResEndPos = nStartPos; 2084 int PreResEndPos = nStartPos;
2053 int curChar = csWord.GetAt(0); 2085 int curChar = csWord.GetAt(0);
2054 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; 2086 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
2055 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); 2087 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
2056 if (nStartPos == nResultPos && 2088 if (nStartPos == nResultPos &&
2057 !(_IsIgnoreSpaceCharacter(lastChar) || 2089 !(IsIgnoreSpaceCharacter(lastChar) ||
2058 _IsIgnoreSpaceCharacter(curChar))) { 2090 IsIgnoreSpaceCharacter(curChar))) {
2059 bMatch = FALSE; 2091 bMatch = FALSE;
2060 } 2092 }
2061 for (int d = PreResEndPos; d < nResultPos; d++) { 2093 for (int d = PreResEndPos; d < nResultPos; d++) {
2062 FX_WCHAR strInsert = m_strText.GetAt(d); 2094 FX_WCHAR strInsert = m_strText.GetAt(d);
2063 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && 2095 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR &&
2064 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { 2096 strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
2065 bMatch = FALSE; 2097 bMatch = FALSE;
2066 break; 2098 break;
2067 } 2099 }
2068 } 2100 }
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
2167 index++; 2199 index++;
2168 continue; 2200 continue;
2169 } else { 2201 } else {
2170 break; 2202 break;
2171 } 2203 }
2172 } 2204 }
2173 int pos = 0; 2205 int pos = 0;
2174 while (pos < csWord.GetLength()) { 2206 while (pos < csWord.GetLength()) {
2175 CFX_WideString curStr = csWord.Mid(pos, 1); 2207 CFX_WideString curStr = csWord.Mid(pos, 1);
2176 FX_WCHAR curChar = csWord.GetAt(pos); 2208 FX_WCHAR curChar = csWord.GetAt(pos);
2177 if (_IsIgnoreSpaceCharacter(curChar)) { 2209 if (IsIgnoreSpaceCharacter(curChar)) {
2178 if (pos > 0 && curChar == 0x2019) { 2210 if (pos > 0 && curChar == 0x2019) {
2179 pos++; 2211 pos++;
2180 continue; 2212 continue;
2181 } 2213 }
2182 if (pos > 0) { 2214 if (pos > 0) {
2183 m_csFindWhatArray.push_back(csWord.Mid(0, pos)); 2215 m_csFindWhatArray.push_back(csWord.Mid(0, pos));
2184 } 2216 }
2185 m_csFindWhatArray.push_back(curStr); 2217 m_csFindWhatArray.push_back(curStr);
2186 if (pos == csWord.GetLength() - 1) { 2218 if (pos == csWord.GetLength() - 1) {
2187 csWord.clear(); 2219 csWord.clear();
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
2299 return resEnd - resStart + 1; 2331 return resEnd - resStart + 1;
2300 } 2332 }
2301 2333
2302 CPDF_LinkExtract::CPDF_LinkExtract() 2334 CPDF_LinkExtract::CPDF_LinkExtract()
2303 : m_pTextPage(nullptr), m_bIsParsed(false) {} 2335 : m_pTextPage(nullptr), m_bIsParsed(false) {}
2304 2336
2305 CPDF_LinkExtract::~CPDF_LinkExtract() { 2337 CPDF_LinkExtract::~CPDF_LinkExtract() {
2306 DeleteLinkList(); 2338 DeleteLinkList();
2307 } 2339 }
2308 2340
2309 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { 2341 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {
2310 if (!pTextPage || !pTextPage->IsParsed()) 2342 if (!pTextPage || !pTextPage->IsParsed())
2311 return FALSE; 2343 return FALSE;
2312 2344
2313 m_pTextPage = (const CPDF_TextPage*)pTextPage; 2345 m_pTextPage = (const CPDF_TextPage*)pTextPage;
2314 m_strPageText = m_pTextPage->GetPageText(0, -1); 2346 m_strPageText = m_pTextPage->GetPageText(0, -1);
2315 DeleteLinkList(); 2347 DeleteLinkList();
2316 if (m_strPageText.IsEmpty()) { 2348 if (m_strPageText.IsEmpty()) {
2317 return FALSE; 2349 return FALSE;
2318 } 2350 }
2319 ParseLink(); 2351 ParseLink();
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
2510 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2542 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2511 return; 2543 return;
2512 } 2544 }
2513 CPDF_LinkExt* link = NULL; 2545 CPDF_LinkExt* link = NULL;
2514 link = m_LinkList.GetAt(index); 2546 link = m_LinkList.GetAt(index);
2515 if (!link) { 2547 if (!link) {
2516 return; 2548 return;
2517 } 2549 }
2518 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2550 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2519 } 2551 }
OLDNEW
« no previous file with comments | « core/fpdftext/fpdf_text_int.h ('k') | core/fpdftext/fpdf_text_int_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698