OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 2312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2323 int CPDF_TextPageFind::GetCurOrder() const { | 2323 int CPDF_TextPageFind::GetCurOrder() const { |
2324 return GetCharIndex(m_resStart); | 2324 return GetCharIndex(m_resStart); |
2325 } | 2325 } |
2326 | 2326 |
2327 int CPDF_TextPageFind::GetMatchedCount() const { | 2327 int CPDF_TextPageFind::GetMatchedCount() const { |
2328 int resStart = GetCharIndex(m_resStart); | 2328 int resStart = GetCharIndex(m_resStart); |
2329 int resEnd = GetCharIndex(m_resEnd); | 2329 int resEnd = GetCharIndex(m_resEnd); |
2330 return resEnd - resStart + 1; | 2330 return resEnd - resStart + 1; |
2331 } | 2331 } |
2332 | 2332 |
2333 CPDF_LinkExtract::CPDF_LinkExtract() | 2333 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage) |
2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2334 : m_pTextPage(pTextPage), m_bIsParsed(false) {} |
2335 | 2335 |
2336 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { |
2337 DeleteLinkList(); | |
2338 } | 2337 } |
2339 | 2338 |
2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { | 2339 FX_BOOL CPDF_LinkExtract::ExtractLinks() { |
2341 if (!pTextPage || !pTextPage->IsParsed()) | 2340 if (!m_pTextPage->IsParsed()) |
2342 return FALSE; | 2341 return FALSE; |
2343 | 2342 |
2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2343 m_LinkList.clear(); |
2345 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2344 m_strPageText = m_pTextPage->GetPageText(0, -1); |
2346 DeleteLinkList(); | 2345 if (m_strPageText.IsEmpty()) |
2347 if (m_strPageText.IsEmpty()) { | |
2348 return FALSE; | 2346 return FALSE; |
2349 } | 2347 |
2350 ParseLink(); | 2348 ParseLink(); |
2351 m_bIsParsed = true; | 2349 m_bIsParsed = true; |
2352 return TRUE; | 2350 return TRUE; |
2353 } | 2351 } |
2354 | 2352 |
2355 void CPDF_LinkExtract::DeleteLinkList() { | |
2356 while (m_LinkList.GetSize()) { | |
2357 CPDF_LinkExt* linkinfo = NULL; | |
2358 linkinfo = m_LinkList.GetAt(0); | |
2359 m_LinkList.RemoveAt(0); | |
2360 delete linkinfo; | |
2361 } | |
2362 m_LinkList.RemoveAll(); | |
2363 } | |
2364 | |
2365 int CPDF_LinkExtract::CountLinks() const { | 2353 int CPDF_LinkExtract::CountLinks() const { |
2366 if (!m_bIsParsed) { | 2354 return m_bIsParsed ? pdfium::CollectionSize<int>(m_LinkList) : -1; |
2367 return -1; | |
2368 } | |
2369 return m_LinkList.GetSize(); | |
2370 } | 2355 } |
2371 | 2356 |
2372 void CPDF_LinkExtract::ParseLink() { | 2357 void CPDF_LinkExtract::ParseLink() { |
2373 int start = 0, pos = 0; | 2358 int start = 0, pos = 0; |
2374 int TotalChar = m_pTextPage->CountChars(); | 2359 int TotalChar = m_pTextPage->CountChars(); |
2375 while (pos < TotalChar) { | 2360 while (pos < TotalChar) { |
2376 FPDF_CHAR_INFO pageChar; | 2361 FPDF_CHAR_INFO pageChar; |
2377 m_pTextPage->GetCharInfo(pos, &pageChar); | 2362 m_pTextPage->GetCharInfo(pos, &pageChar); |
2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || | 2363 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || |
2379 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { | 2364 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { |
2380 int nCount = pos - start; | 2365 int nCount = pos - start; |
2381 if (pos == TotalChar - 1) { | 2366 if (pos == TotalChar - 1) { |
2382 nCount++; | 2367 nCount++; |
2383 } | 2368 } |
2384 CFX_WideString strBeCheck; | 2369 CFX_WideString strBeCheck; |
2385 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2370 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
2386 if (strBeCheck.GetLength() > 5) { | 2371 if (strBeCheck.GetLength() > 5) { |
2387 while (strBeCheck.GetLength() > 0) { | 2372 while (strBeCheck.GetLength() > 0) { |
2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | 2373 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
2389 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | 2374 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); | 2375 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); |
2391 nCount--; | 2376 nCount--; |
2392 } else { | 2377 } else { |
2393 break; | 2378 break; |
2394 } | 2379 } |
2395 } | 2380 } |
2396 if (nCount > 5 && | 2381 if (nCount > 5 && |
2397 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { | 2382 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
2398 AppendToLinkList(start, nCount, strBeCheck); | 2383 m_LinkList.push_back( |
2384 std::unique_ptr<Item>(new Item(start, nCount, strBeCheck))); | |
2399 } | 2385 } |
2400 } | 2386 } |
2401 start = ++pos; | 2387 start = ++pos; |
2402 } else { | 2388 } else { |
2403 pos++; | 2389 pos++; |
2404 } | 2390 } |
2405 } | 2391 } |
2406 } | 2392 } |
2407 | 2393 |
2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { | 2394 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2494 } | 2480 } |
2495 pPos = i; | 2481 pPos = i; |
2496 } | 2482 } |
2497 | 2483 |
2498 if (str.Find(L"mailto:") == -1) { | 2484 if (str.Find(L"mailto:") == -1) { |
2499 str = L"mailto:" + str; | 2485 str = L"mailto:" + str; |
2500 } | 2486 } |
2501 return TRUE; | 2487 return TRUE; |
2502 } | 2488 } |
2503 | 2489 |
2504 void CPDF_LinkExtract::AppendToLinkList(int start, | 2490 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
dsinclair
2016/04/20 13:05:19
Can index be changed from an int to a size_t so we
Tom Sepez
2016/04/20 19:07:39
Done. Pushed negative value checking up to API ca
| |
2505 int count, | 2491 if (!m_bIsParsed || index < 0 || |
Lei Zhang
2016/04/19 23:52:06
Helper function instead of repeating this thrice?
Tom Sepez
2016/04/20 19:07:39
Nah, short enough to prefer transparency over cons
| |
2506 const CFX_WideString& strUrl) { | 2492 index >= pdfium::CollectionSize<int>(m_LinkList)) { |
2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2493 return L""; |
2508 linkInfo->m_strUrl = strUrl; | 2494 } |
2509 linkInfo->m_Start = start; | 2495 return m_LinkList[index]->m_strUrl; |
2510 linkInfo->m_Count = count; | |
2511 m_LinkList.Add(linkInfo); | |
2512 } | 2496 } |
2513 | 2497 |
2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | |
2515 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
2516 return L""; | |
2517 } | |
2518 CPDF_LinkExt* link = NULL; | |
2519 link = m_LinkList.GetAt(index); | |
2520 if (!link) { | |
2521 return L""; | |
2522 } | |
2523 return link->m_strUrl; | |
2524 } | |
2525 void CPDF_LinkExtract::GetBoundedSegment(int index, | 2498 void CPDF_LinkExtract::GetBoundedSegment(int index, |
dsinclair
2016/04/20 13:05:19
size_t?
Tom Sepez
2016/04/20 19:07:39
Done.
| |
2526 int& start, | 2499 int& start, |
2527 int& count) const { | 2500 int& count) const { |
2528 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2501 if (!m_bIsParsed || index < 0 || |
2502 index >= pdfium::CollectionSize<int>(m_LinkList)) { | |
2529 return; | 2503 return; |
2530 } | 2504 } |
2531 CPDF_LinkExt* link = NULL; | 2505 start = m_LinkList[index]->m_Start; |
2532 link = m_LinkList.GetAt(index); | 2506 count = m_LinkList[index]->m_Count; |
2533 if (!link) { | |
2534 return; | |
2535 } | |
2536 start = link->m_Start; | |
2537 count = link->m_Count; | |
2538 } | 2507 } |
2539 | 2508 |
2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2509 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
dsinclair
2016/04/20 13:05:19
size_t?
Tom Sepez
2016/04/20 19:07:39
Done.
| |
2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2510 if (!m_bIsParsed || index < 0 || |
2511 index >= pdfium::CollectionSize<int>(m_LinkList)) { | |
2542 return; | 2512 return; |
2543 } | 2513 } |
2544 CPDF_LinkExt* link = NULL; | 2514 m_pTextPage->GetRectArray(m_LinkList[index]->m_Start, |
2545 link = m_LinkList.GetAt(index); | 2515 m_LinkList[index]->m_Count, rects); |
2546 if (!link) { | |
2547 return; | |
2548 } | |
2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | |
2550 } | 2516 } |
OLD | NEW |