OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <cctype> | 7 #include <cctype> |
8 #include <cwctype> | 8 #include <cwctype> |
9 #include <algorithm> | 9 #include <algorithm> |
10 | 10 |
(...skipping 2590 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2601 return TRUE; | 2601 return TRUE; |
2602 } | 2602 } |
2603 if (str.Find(L"www.") != -1) { | 2603 if (str.Find(L"www.") != -1) { |
2604 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2604 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
2605 strBeCheck = L"http://" + strBeCheck; | 2605 strBeCheck = L"http://" + strBeCheck; |
2606 return TRUE; | 2606 return TRUE; |
2607 } | 2607 } |
2608 return FALSE; | 2608 return FALSE; |
2609 } | 2609 } |
2610 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { | 2610 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
2611 str.MakeLower(); | |
2612 int aPos = str.Find(L'@'); | 2611 int aPos = str.Find(L'@'); |
2612 // Invalid when no '@'. | |
2613 if (aPos < 1) { | 2613 if (aPos < 1) { |
2614 return FALSE; | 2614 return FALSE; |
2615 } | 2615 } |
2616 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { | 2616 |
Lei Zhang
2015/12/17 19:13:29
Maybe add a comment to say checking the local part
Wei Li
2015/12/17 19:42:27
Done.
| |
2617 return FALSE; | 2617 int pPos = aPos; // Used to track the position of '@' or '.'. |
2618 } | 2618 for (int i = aPos - 1; i >= 0; i--) { |
2619 int i; | |
2620 for (i = aPos - 1; i >= 0; i--) { | |
2621 FX_WCHAR ch = str.GetAt(i); | 2619 FX_WCHAR ch = str.GetAt(i); |
2622 if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || | 2620 if (ch == L'_' || ch == L'-' || (ch >= L'A' && ch <= L'Z') || |
2623 (ch >= L'0' && ch <= L'9')) { | 2621 (ch >= L'a' && ch <= L'z') || (ch >= L'0' && ch <= L'9')) { |
2624 continue; | 2622 continue; |
2625 } else { | 2623 } |
2624 if (ch != L'.' || i == pPos - 1 || i == 0) { | |
2626 if (i == aPos - 1) { | 2625 if (i == aPos - 1) { |
2626 // There is '.' or invalid char before '@'. | |
2627 return FALSE; | 2627 return FALSE; |
2628 } | 2628 } |
2629 str = str.Right(str.GetLength() - i - 1); | 2629 // End extracting for other invalid chars, '.' at the beginning, or |
2630 // consecutive '.'. | |
2631 int removed_len = i == pPos - 1 ? i + 2 : i + 1; | |
2632 str = str.Right(str.GetLength() - removed_len); | |
2630 break; | 2633 break; |
2634 } else { | |
Lei Zhang
2015/12/17 19:13:29
no need for else after a break.
Wei Li
2015/12/17 19:42:27
Done.
| |
2635 // Found a valid '.'. | |
2636 pPos = i; | |
2631 } | 2637 } |
2632 } | 2638 } |
2633 aPos = str.Find(L'@'); | 2639 |
2634 if (aPos < 1) { | 2640 // Check the host name part. |
2635 return FALSE; | |
2636 } | |
2637 CFX_WideString strtemp = L""; | |
2638 for (i = 0; i < aPos; i++) { | |
2639 FX_WCHAR wch = str.GetAt(i); | |
2640 if (wch >= L'a' && wch <= L'z') { | |
2641 break; | |
2642 } else { | |
2643 strtemp = str.Right(str.GetLength() - i + 1); | |
2644 } | |
2645 } | |
2646 if (strtemp != L"") { | |
2647 str = strtemp; | |
2648 } | |
2649 aPos = str.Find(L'@'); | 2641 aPos = str.Find(L'@'); |
2650 if (aPos < 1) { | 2642 if (aPos < 1) { |
2651 return FALSE; | 2643 return FALSE; |
2652 } | 2644 } |
2653 str.TrimRight(L'.'); | 2645 str.TrimRight(L'.'); |
2654 strtemp = str; | 2646 CFX_WideString strtemp = str; |
2655 int ePos = str.Find(L'.'); | 2647 // At least one '.' in host name, but not at the beginning. |
2656 if (ePos == -1) { | 2648 int ePos = str.Find(L'.', aPos + 1); |
2649 if (ePos == -1 || ePos == aPos + 1) { | |
2657 return FALSE; | 2650 return FALSE; |
2658 } | 2651 } |
2659 while (ePos != -1) { | 2652 // Validate all other chars in host name. |
2660 strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1); | 2653 int nLen = str.GetLength(); |
2661 ePos = strtemp.Find('.'); | 2654 pPos = 0; // Used to track the position of '.'. |
2662 } | 2655 for (int i = aPos + 1; i < nLen; i++) { |
2663 ePos = strtemp.GetLength(); | |
2664 for (i = 0; i < ePos; i++) { | |
2665 FX_WCHAR wch = str.GetAt(i); | 2656 FX_WCHAR wch = str.GetAt(i); |
2666 if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { | 2657 if (wch == L'-' || (wch >= L'A' && wch <= L'Z') || |
Lei Zhang
2015/12/17 19:13:29
Maybe add some helpers like IsLetter() and IsNumbe
Wei Li
2015/12/17 19:42:27
Done.
| |
2658 (wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { | |
2667 continue; | 2659 continue; |
2660 } | |
2661 if (wch != L'.' || i == pPos + 1) { | |
2662 // Host name should end before invalid char. | |
2663 int host_end = i == pPos + 1 ? i - 2 : i - 1; | |
2664 if (pPos > 0 && host_end - aPos >= 3) { | |
2665 // Trim the ending invalid chars if there is at least one '.' and name. | |
2666 str = str.Left(host_end + 1); | |
2667 break; | |
2668 } | |
2669 return FALSE; | |
2668 } else { | 2670 } else { |
Lei Zhang
2015/12/17 19:13:29
ditto
Wei Li
2015/12/17 19:42:27
Done.
| |
2669 str = str.Left(str.GetLength() - ePos + i + 1); | 2671 pPos = i; |
2670 ePos = ePos - i - 1; | |
2671 break; | |
2672 } | 2672 } |
2673 } | 2673 } |
2674 int nLen = str.GetLength(); | 2674 |
2675 for (i = aPos + 1; i < nLen - ePos; i++) { | |
2676 FX_WCHAR wch = str.GetAt(i); | |
2677 if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || | |
2678 (wch >= L'0' && wch <= L'9')) { | |
2679 continue; | |
2680 } else { | |
2681 return FALSE; | |
2682 } | |
2683 } | |
2684 if (str.Find(L"mailto:") == -1) { | 2675 if (str.Find(L"mailto:") == -1) { |
2685 str = L"mailto:" + str; | 2676 str = L"mailto:" + str; |
2686 } | 2677 } |
2687 return TRUE; | 2678 return TRUE; |
2688 } | 2679 } |
2689 | 2680 |
2690 void CPDF_LinkExtract::AppendToLinkList(int start, | 2681 void CPDF_LinkExtract::AppendToLinkList(int start, |
2691 int count, | 2682 int count, |
2692 const CFX_WideString& strUrl) { | 2683 const CFX_WideString& strUrl) { |
2693 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | 2684 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2726 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2717 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2727 return; | 2718 return; |
2728 } | 2719 } |
2729 CPDF_LinkExt* link = NULL; | 2720 CPDF_LinkExt* link = NULL; |
2730 link = m_LinkList.GetAt(index); | 2721 link = m_LinkList.GetAt(index); |
2731 if (!link) { | 2722 if (!link) { |
2732 return; | 2723 return; |
2733 } | 2724 } |
2734 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2725 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2735 } | 2726 } |
OLD | NEW |