Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #include <cwctype> | 9 #include <cwctype> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 2312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2323 int CPDF_TextPageFind::GetCurOrder() const { | 2323 int CPDF_TextPageFind::GetCurOrder() const { |
| 2324 return GetCharIndex(m_resStart); | 2324 return GetCharIndex(m_resStart); |
| 2325 } | 2325 } |
| 2326 | 2326 |
| 2327 int CPDF_TextPageFind::GetMatchedCount() const { | 2327 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2328 int resStart = GetCharIndex(m_resStart); | 2328 int resStart = GetCharIndex(m_resStart); |
| 2329 int resEnd = GetCharIndex(m_resEnd); | 2329 int resEnd = GetCharIndex(m_resEnd); |
| 2330 return resEnd - resStart + 1; | 2330 return resEnd - resStart + 1; |
| 2331 } | 2331 } |
| 2332 | 2332 |
| 2333 CPDF_LinkExtract::CPDF_LinkExtract() | 2333 CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage) |
| 2334 : m_pTextPage(nullptr), m_bIsParsed(false) {} | 2334 : m_pTextPage(pTextPage), m_bIsParsed(false) {} |
| 2335 | 2335 |
| 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { | 2336 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2337 DeleteLinkList(); | |
| 2338 } | 2337 } |
| 2339 | 2338 |
| 2340 FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { | 2339 FX_BOOL CPDF_LinkExtract::ExtractLinks() { |
| 2341 if (!pTextPage || !pTextPage->IsParsed()) | 2340 if (!m_pTextPage->IsParsed()) |
| 2342 return FALSE; | 2341 return FALSE; |
| 2343 | 2342 |
| 2344 m_pTextPage = (const CPDF_TextPage*)pTextPage; | 2343 m_LinkList.clear(); |
| 2345 m_strPageText = m_pTextPage->GetPageText(0, -1); | 2344 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2346 DeleteLinkList(); | 2345 if (m_strPageText.IsEmpty()) |
| 2347 if (m_strPageText.IsEmpty()) { | |
| 2348 return FALSE; | 2346 return FALSE; |
| 2349 } | 2347 |
| 2350 ParseLink(); | 2348 ParseLink(); |
| 2351 m_bIsParsed = true; | 2349 m_bIsParsed = true; |
| 2352 return TRUE; | 2350 return TRUE; |
| 2353 } | 2351 } |
| 2354 | 2352 |
| 2355 void CPDF_LinkExtract::DeleteLinkList() { | |
| 2356 while (m_LinkList.GetSize()) { | |
| 2357 CPDF_LinkExt* linkinfo = NULL; | |
| 2358 linkinfo = m_LinkList.GetAt(0); | |
| 2359 m_LinkList.RemoveAt(0); | |
| 2360 delete linkinfo; | |
| 2361 } | |
| 2362 m_LinkList.RemoveAll(); | |
| 2363 } | |
| 2364 | |
| 2365 int CPDF_LinkExtract::CountLinks() const { | 2353 int CPDF_LinkExtract::CountLinks() const { |
| 2366 if (!m_bIsParsed) { | 2354 return m_bIsParsed ? pdfium::CollectionSize<int>(m_LinkList) : -1; |
| 2367 return -1; | |
| 2368 } | |
| 2369 return m_LinkList.GetSize(); | |
| 2370 } | 2355 } |
| 2371 | 2356 |
| 2372 void CPDF_LinkExtract::ParseLink() { | 2357 void CPDF_LinkExtract::ParseLink() { |
| 2373 int start = 0, pos = 0; | 2358 int start = 0, pos = 0; |
| 2374 int TotalChar = m_pTextPage->CountChars(); | 2359 int TotalChar = m_pTextPage->CountChars(); |
| 2375 while (pos < TotalChar) { | 2360 while (pos < TotalChar) { |
| 2376 FPDF_CHAR_INFO pageChar; | 2361 FPDF_CHAR_INFO pageChar; |
| 2377 m_pTextPage->GetCharInfo(pos, &pageChar); | 2362 m_pTextPage->GetCharInfo(pos, &pageChar); |
| 2378 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || | 2363 if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || |
| 2379 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { | 2364 pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { |
| 2380 int nCount = pos - start; | 2365 int nCount = pos - start; |
| 2381 if (pos == TotalChar - 1) { | 2366 if (pos == TotalChar - 1) { |
| 2382 nCount++; | 2367 nCount++; |
| 2383 } | 2368 } |
| 2384 CFX_WideString strBeCheck; | 2369 CFX_WideString strBeCheck; |
| 2385 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2370 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
| 2386 if (strBeCheck.GetLength() > 5) { | 2371 if (strBeCheck.GetLength() > 5) { |
| 2387 while (strBeCheck.GetLength() > 0) { | 2372 while (strBeCheck.GetLength() > 0) { |
| 2388 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | 2373 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
| 2389 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | 2374 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
| 2390 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); | 2375 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); |
| 2391 nCount--; | 2376 nCount--; |
| 2392 } else { | 2377 } else { |
| 2393 break; | 2378 break; |
| 2394 } | 2379 } |
| 2395 } | 2380 } |
| 2396 if (nCount > 5 && | 2381 if (nCount > 5 && |
| 2397 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { | 2382 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
| 2398 AppendToLinkList(start, nCount, strBeCheck); | 2383 m_LinkList.push_back( |
| 2384 std::unique_ptr<Item>(new Item(start, nCount, strBeCheck))); | |
| 2399 } | 2385 } |
| 2400 } | 2386 } |
| 2401 start = ++pos; | 2387 start = ++pos; |
| 2402 } else { | 2388 } else { |
| 2403 pos++; | 2389 pos++; |
| 2404 } | 2390 } |
| 2405 } | 2391 } |
| 2406 } | 2392 } |
| 2407 | 2393 |
| 2408 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { | 2394 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2494 } | 2480 } |
| 2495 pPos = i; | 2481 pPos = i; |
| 2496 } | 2482 } |
| 2497 | 2483 |
| 2498 if (str.Find(L"mailto:") == -1) { | 2484 if (str.Find(L"mailto:") == -1) { |
| 2499 str = L"mailto:" + str; | 2485 str = L"mailto:" + str; |
| 2500 } | 2486 } |
| 2501 return TRUE; | 2487 return TRUE; |
| 2502 } | 2488 } |
| 2503 | 2489 |
| 2504 void CPDF_LinkExtract::AppendToLinkList(int start, | 2490 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
|
dsinclair
2016/04/20 13:05:19
Can index be changed from an int to a size_t so we
Tom Sepez
2016/04/20 19:07:39
Done. Pushed negative value checking up to API ca
| |
| 2505 int count, | 2491 if (!m_bIsParsed || index < 0 || |
|
Lei Zhang
2016/04/19 23:52:06
Helper function instead of repeating this thrice?
Tom Sepez
2016/04/20 19:07:39
Nah, short enough to prefer transparency over cons
| |
| 2506 const CFX_WideString& strUrl) { | 2492 index >= pdfium::CollectionSize<int>(m_LinkList)) { |
| 2507 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2493 return L""; |
| 2508 linkInfo->m_strUrl = strUrl; | 2494 } |
| 2509 linkInfo->m_Start = start; | 2495 return m_LinkList[index]->m_strUrl; |
| 2510 linkInfo->m_Count = count; | |
| 2511 m_LinkList.Add(linkInfo); | |
| 2512 } | 2496 } |
| 2513 | 2497 |
| 2514 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { | |
| 2515 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | |
| 2516 return L""; | |
| 2517 } | |
| 2518 CPDF_LinkExt* link = NULL; | |
| 2519 link = m_LinkList.GetAt(index); | |
| 2520 if (!link) { | |
| 2521 return L""; | |
| 2522 } | |
| 2523 return link->m_strUrl; | |
| 2524 } | |
| 2525 void CPDF_LinkExtract::GetBoundedSegment(int index, | 2498 void CPDF_LinkExtract::GetBoundedSegment(int index, |
|
dsinclair
2016/04/20 13:05:19
size_t?
Tom Sepez
2016/04/20 19:07:39
Done.
| |
| 2526 int& start, | 2499 int& start, |
| 2527 int& count) const { | 2500 int& count) const { |
| 2528 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2501 if (!m_bIsParsed || index < 0 || |
| 2502 index >= pdfium::CollectionSize<int>(m_LinkList)) { | |
| 2529 return; | 2503 return; |
| 2530 } | 2504 } |
| 2531 CPDF_LinkExt* link = NULL; | 2505 start = m_LinkList[index]->m_Start; |
| 2532 link = m_LinkList.GetAt(index); | 2506 count = m_LinkList[index]->m_Count; |
| 2533 if (!link) { | |
| 2534 return; | |
| 2535 } | |
| 2536 start = link->m_Start; | |
| 2537 count = link->m_Count; | |
| 2538 } | 2507 } |
| 2539 | 2508 |
| 2540 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2509 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
|
dsinclair
2016/04/20 13:05:19
size_t?
Tom Sepez
2016/04/20 19:07:39
Done.
| |
| 2541 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2510 if (!m_bIsParsed || index < 0 || |
| 2511 index >= pdfium::CollectionSize<int>(m_LinkList)) { | |
| 2542 return; | 2512 return; |
| 2543 } | 2513 } |
| 2544 CPDF_LinkExt* link = NULL; | 2514 m_pTextPage->GetRectArray(m_LinkList[index]->m_Start, |
| 2545 link = m_LinkList.GetAt(index); | 2515 m_LinkList[index]->m_Count, rects); |
| 2546 if (!link) { | |
| 2547 return; | |
| 2548 } | |
| 2549 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | |
| 2550 } | 2516 } |
| OLD | NEW |