OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <cctype> | 7 #include <cctype> |
8 #include <cwctype> | 8 #include <cwctype> |
9 #include <algorithm> | 9 #include <algorithm> |
10 | 10 |
(...skipping 2592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2603 if (str.Find(L"www.") != -1) { | 2603 if (str.Find(L"www.") != -1) { |
2604 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2604 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
2605 strBeCheck = L"http://" + strBeCheck; | 2605 strBeCheck = L"http://" + strBeCheck; |
2606 return TRUE; | 2606 return TRUE; |
2607 } | 2607 } |
2608 return FALSE; | 2608 return FALSE; |
2609 } | 2609 } |
2610 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { | 2610 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
2611 str.MakeLower(); | 2611 str.MakeLower(); |
2612 int aPos = str.Find(L'@'); | 2612 int aPos = str.Find(L'@'); |
2613 // Invalid when no '@'. | |
2613 if (aPos < 1) { | 2614 if (aPos < 1) { |
2614 return FALSE; | 2615 return FALSE; |
2615 } | 2616 } |
2616 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { | |
2617 return FALSE; | |
2618 } | |
2619 int i; | 2617 int i; |
jun_fang
2015/12/17 13:30:45
nit: prefer "for (int i = aPos - 1; i >= 0; i--)"
Wei Li
2015/12/17 17:41:41
Done.
I was not sure what style I should follow w
| |
2618 int pPos = aPos; // Used to track the position of '@' or '.'. | |
2620 for (i = aPos - 1; i >= 0; i--) { | 2619 for (i = aPos - 1; i >= 0; i--) { |
2621 FX_WCHAR ch = str.GetAt(i); | 2620 FX_WCHAR ch = str.GetAt(i); |
2622 if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || | 2621 if (ch == L'_' || ch == L'-' || (ch >= L'a' && ch <= L'z') || |
jun_fang
2015/12/17 13:30:45
Should we handle upper-case letters in email addre
Wei Li
2015/12/17 17:41:41
Done.
I don't know why the original code chose to
| |
2623 (ch >= L'0' && ch <= L'9')) { | 2622 (ch >= L'0' && ch <= L'9')) { |
2624 continue; | 2623 continue; |
2625 } else { | 2624 } |
2625 if (ch != L'.' || i == pPos - 1 || i == 0) { | |
2626 if (i == aPos - 1) { | 2626 if (i == aPos - 1) { |
2627 // There is '.' or invalid char before '@'. | |
2627 return FALSE; | 2628 return FALSE; |
2628 } | 2629 } |
2629 str = str.Right(str.GetLength() - i - 1); | 2630 // End extracting for other invalid chars, '.' at the beginning, or |
2631 // consecutive '.'. | |
2632 int removed_len = i == pPos - 1 ? i + 2 : i + 1; | |
2633 str = str.Right(str.GetLength() - removed_len); | |
2630 break; | 2634 break; |
2635 } else { | |
2636 // Found a valid '.'. | |
2637 pPos = i; | |
2631 } | 2638 } |
2632 } | 2639 } |
2633 aPos = str.Find(L'@'); | 2640 |
2634 if (aPos < 1) { | 2641 // Check the host name part. |
2635 return FALSE; | |
2636 } | |
2637 CFX_WideString strtemp = L""; | |
2638 for (i = 0; i < aPos; i++) { | |
2639 FX_WCHAR wch = str.GetAt(i); | |
2640 if (wch >= L'a' && wch <= L'z') { | |
2641 break; | |
2642 } else { | |
2643 strtemp = str.Right(str.GetLength() - i + 1); | |
2644 } | |
2645 } | |
2646 if (strtemp != L"") { | |
2647 str = strtemp; | |
2648 } | |
2649 aPos = str.Find(L'@'); | 2642 aPos = str.Find(L'@'); |
2650 if (aPos < 1) { | 2643 if (aPos < 1) { |
2651 return FALSE; | 2644 return FALSE; |
2652 } | 2645 } |
2653 str.TrimRight(L'.'); | 2646 str.TrimRight(L'.'); |
2654 strtemp = str; | 2647 CFX_WideString strtemp = str; |
2655 int ePos = str.Find(L'.'); | 2648 // At least one '.' in host name, but not at the beginning. |
2656 if (ePos == -1) { | 2649 int ePos = str.Find(L'.', aPos + 1); |
2650 if (ePos == -1 || ePos == aPos + 1) { | |
2657 return FALSE; | 2651 return FALSE; |
2658 } | 2652 } |
2659 while (ePos != -1) { | 2653 // Validate all other chars in host name. |
2660 strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1); | 2654 int nLen = str.GetLength(); |
2661 ePos = strtemp.Find('.'); | 2655 pPos = 0; // Used to track the position of '.'. |
2662 } | 2656 for (i = aPos + 1; i < nLen; i++) { |
jun_fang
2015/12/17 13:30:45
nit: for (int i = aPos + 1; i < nLen; i++).
Wei Li
2015/12/17 17:41:41
Done.
| |
2663 ePos = strtemp.GetLength(); | |
2664 for (i = 0; i < ePos; i++) { | |
2665 FX_WCHAR wch = str.GetAt(i); | 2657 FX_WCHAR wch = str.GetAt(i); |
2666 if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { | 2658 if (wch == L'-' || (wch >= L'a' && wch <= L'z') || |
jun_fang
2015/12/17 13:30:45
How about upper-case letters?
Wei Li
2015/12/17 17:41:41
See above.
| |
2659 (wch >= L'0' && wch <= L'9')) { | |
2667 continue; | 2660 continue; |
2661 } | |
2662 if (wch != L'.' || i == pPos + 1) { | |
2663 // Host name should end before invalid char. | |
2664 int host_end = i == pPos + 1 ? i - 2 : i - 1; | |
2665 if (pPos > 0 && host_end - aPos >= 3) { | |
2666 // Trim the ending invalid chars if there is at least one '.' and name. | |
2667 str = str.Left(host_end + 1); | |
2668 break; | |
2669 } | |
2670 return FALSE; | |
2668 } else { | 2671 } else { |
2669 str = str.Left(str.GetLength() - ePos + i + 1); | 2672 pPos = i; |
2670 ePos = ePos - i - 1; | |
2671 break; | |
2672 } | 2673 } |
2673 } | 2674 } |
2674 int nLen = str.GetLength(); | 2675 |
2675 for (i = aPos + 1; i < nLen - ePos; i++) { | |
2676 FX_WCHAR wch = str.GetAt(i); | |
2677 if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || | |
2678 (wch >= L'0' && wch <= L'9')) { | |
2679 continue; | |
2680 } else { | |
2681 return FALSE; | |
2682 } | |
2683 } | |
2684 if (str.Find(L"mailto:") == -1) { | 2676 if (str.Find(L"mailto:") == -1) { |
2685 str = L"mailto:" + str; | 2677 str = L"mailto:" + str; |
2686 } | 2678 } |
2687 return TRUE; | 2679 return TRUE; |
2688 } | 2680 } |
2689 | 2681 |
2690 void CPDF_LinkExtract::AppendToLinkList(int start, | 2682 void CPDF_LinkExtract::AppendToLinkList(int start, |
2691 int count, | 2683 int count, |
2692 const CFX_WideString& strUrl) { | 2684 const CFX_WideString& strUrl) { |
2693 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2685 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2726 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2727 return; | 2719 return; |
2728 } | 2720 } |
2729 CPDF_LinkExt* link = NULL; | 2721 CPDF_LinkExt* link = NULL; |
2730 link = m_LinkList.GetAt(index); | 2722 link = m_LinkList.GetAt(index); |
2731 if (!link) { | 2723 if (!link) { |
2732 return; | 2724 return; |
2733 } | 2725 } |
2734 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2735 } | 2727 } |
OLD | NEW |