OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/fpdftext/text_int.h" | 7 #include "core/fpdftext/text_int.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <cctype> | 10 #include <cctype> |
11 #include <cwctype> | 11 #include <cwctype> |
12 #include <memory> | 12 #include <memory> |
13 #include <utility> | 13 #include <utility> |
14 #include <vector> | 14 #include <vector> |
15 | 15 |
| 16 #include "core/fpdftext/include/ipdf_linkextract.h" |
| 17 #include "core/fpdftext/include/ipdf_textpage.h" |
| 18 #include "core/fpdftext/include/ipdf_textpagefind.h" |
16 #include "core/include/fpdfapi/cpdf_dictionary.h" | 19 #include "core/include/fpdfapi/cpdf_dictionary.h" |
17 #include "core/include/fpdfapi/cpdf_string.h" | 20 #include "core/include/fpdfapi/cpdf_string.h" |
18 #include "core/include/fpdfapi/fpdf_module.h" | 21 #include "core/include/fpdfapi/fpdf_module.h" |
19 #include "core/include/fpdfapi/fpdf_page.h" | 22 #include "core/include/fpdfapi/fpdf_page.h" |
20 #include "core/include/fpdfapi/fpdf_pageobj.h" | 23 #include "core/include/fpdfapi/fpdf_pageobj.h" |
21 #include "core/include/fpdfapi/fpdf_resource.h" | 24 #include "core/include/fpdfapi/fpdf_resource.h" |
22 #include "core/include/fpdftext/fpdf_text.h" | |
23 #include "core/include/fxcrt/fx_bidi.h" | 25 #include "core/include/fxcrt/fx_bidi.h" |
24 #include "core/include/fxcrt/fx_ext.h" | 26 #include "core/include/fxcrt/fx_ext.h" |
25 #include "core/include/fxcrt/fx_ucd.h" | 27 #include "core/include/fxcrt/fx_ucd.h" |
26 #include "third_party/base/stl_util.h" | 28 #include "third_party/base/stl_util.h" |
27 | 29 |
| 30 #define FPDFTEXT_RLTB 1 |
| 31 #define FPDFTEXT_LEFT -1 |
| 32 #define FPDFTEXT_RIGHT 1 |
| 33 |
| 34 #define FPDFTEXT_MATCHCASE 0x00000001 |
| 35 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
| 36 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| 37 |
28 namespace { | 38 namespace { |
29 | 39 |
30 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { | 40 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
31 if (curChar < 255) { | 41 if (curChar < 255) { |
32 return FALSE; | 42 return FALSE; |
33 } | 43 } |
34 if ((curChar >= 0x0600 && curChar <= 0x06FF) || | 44 if ((curChar >= 0x0600 && curChar <= 0x06FF) || |
35 (curChar >= 0xFE70 && curChar <= 0xFEFF) || | 45 (curChar >= 0xFE70 && curChar <= 0xFEFF) || |
36 (curChar >= 0xFB50 && curChar <= 0xFDFF) || | 46 (curChar >= 0xFB50 && curChar <= 0xFDFF) || |
37 (curChar >= 0x0400 && curChar <= 0x04FF) || | 47 (curChar >= 0x0400 && curChar <= 0x04FF) || |
(...skipping 1854 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1892 m_IsFind(FALSE) { | 1902 m_IsFind(FALSE) { |
1893 m_strText = m_pTextPage->GetPageText(); | 1903 m_strText = m_pTextPage->GetPageText(); |
1894 int nCount = pTextPage->CountChars(); | 1904 int nCount = pTextPage->CountChars(); |
1895 if (nCount) { | 1905 if (nCount) { |
1896 m_CharIndex.push_back(0); | 1906 m_CharIndex.push_back(0); |
1897 } | 1907 } |
1898 for (int i = 0; i < nCount; i++) { | 1908 for (int i = 0; i < nCount; i++) { |
1899 FPDF_CHAR_INFO info; | 1909 FPDF_CHAR_INFO info; |
1900 pTextPage->GetCharInfo(i, &info); | 1910 pTextPage->GetCharInfo(i, &info); |
1901 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 1911 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
1902 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { | 1912 if (info.m_Flag == FPDFTEXT_CHAR_NORMAL || |
| 1913 info.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
1903 if (indexSize % 2) { | 1914 if (indexSize % 2) { |
1904 m_CharIndex.push_back(1); | 1915 m_CharIndex.push_back(1); |
1905 } else { | 1916 } else { |
1906 if (indexSize <= 0) { | 1917 if (indexSize <= 0) { |
1907 continue; | 1918 continue; |
1908 } | 1919 } |
1909 m_CharIndex[indexSize - 1] += 1; | 1920 m_CharIndex[indexSize - 1] += 1; |
1910 } | 1921 } |
1911 } else { | 1922 } else { |
1912 if (indexSize % 2) { | 1923 if (indexSize % 2) { |
(...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2318 } | 2329 } |
2319 return m_LinkList.GetSize(); | 2330 return m_LinkList.GetSize(); |
2320 } | 2331 } |
2321 | 2332 |
2322 void CPDF_LinkExtract::ParseLink() { | 2333 void CPDF_LinkExtract::ParseLink() { |
2323 int start = 0, pos = 0; | 2334 int start = 0, pos = 0; |
2324 int TotalChar = m_pTextPage->CountChars(); | 2335 int TotalChar = m_pTextPage->CountChars(); |
2325 while (pos < TotalChar) { | 2336 while (pos < TotalChar) { |
2326 FPDF_CHAR_INFO pageChar; | 2337 FPDF_CHAR_INFO pageChar; |
2327 m_pTextPage->GetCharInfo(pos, &pageChar); | 2338 m_pTextPage->GetCharInfo(pos, &pageChar); |
2328 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2339 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || |
2329 pos == TotalChar - 1) { | 2340 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { |
2330 int nCount = pos - start; | 2341 int nCount = pos - start; |
2331 if (pos == TotalChar - 1) { | 2342 if (pos == TotalChar - 1) { |
2332 nCount++; | 2343 nCount++; |
2333 } | 2344 } |
2334 CFX_WideString strBeCheck; | 2345 CFX_WideString strBeCheck; |
2335 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2346 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
2336 if (strBeCheck.GetLength() > 5) { | 2347 if (strBeCheck.GetLength() > 5) { |
2337 while (strBeCheck.GetLength() > 0) { | 2348 while (strBeCheck.GetLength() > 0) { |
2338 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | 2349 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
2339 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | 2350 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2491 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2502 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2492 return; | 2503 return; |
2493 } | 2504 } |
2494 CPDF_LinkExt* link = NULL; | 2505 CPDF_LinkExt* link = NULL; |
2495 link = m_LinkList.GetAt(index); | 2506 link = m_LinkList.GetAt(index); |
2496 if (!link) { | 2507 if (!link) { |
2497 return; | 2508 return; |
2498 } | 2509 } |
2499 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2510 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2500 } | 2511 } |
OLD | NEW |