Index: core/src/fpdftext/fpdf_text_int.cpp |
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp |
index 1e6d54d13398eff87710b2747300d9ad45f0b3f5..d7a9c47519e74db436647b1eed4907e7ccb313c3 100644 |
--- a/core/src/fpdftext/fpdf_text_int.cpp |
+++ b/core/src/fpdftext/fpdf_text_int.cpp |
@@ -14,6 +14,7 @@ |
#include "core/include/fpdfapi/fpdf_resource.h" |
#include "core/include/fpdftext/fpdf_text.h" |
#include "core/include/fxcrt/fx_bidi.h" |
+#include "core/include/fxcrt/fx_ext.h" |
#include "core/include/fxcrt/fx_ucd.h" |
#include "text_int.h" |
#include "third_party/base/nonstd_unique_ptr.h" |
@@ -2607,80 +2608,69 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
} |
return FALSE; |
} |
-FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
- str.MakeLower(); |
+bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
int aPos = str.Find(L'@'); |
+ // Invalid when no '@'. |
if (aPos < 1) { |
return FALSE; |
} |
- if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { |
- return FALSE; |
- } |
- int i; |
- for (i = aPos - 1; i >= 0; i--) { |
+ |
+ // Check the local part. |
+ int pPos = aPos; // Used to track the position of '@' or '.'. |
+ for (int i = aPos - 1; i >= 0; i--) { |
FX_WCHAR ch = str.GetAt(i); |
- if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || |
- (ch >= L'0' && ch <= L'9')) { |
+ if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) { |
continue; |
- } else { |
+ } |
+ if (ch != L'.' || i == pPos - 1 || i == 0) { |
if (i == aPos - 1) { |
+ // There is '.' or invalid char before '@'. |
return FALSE; |
} |
- str = str.Right(str.GetLength() - i - 1); |
+ // End extracting for other invalid chars, '.' at the beginning, or |
+ // consecutive '.'. |
+ int removed_len = i == pPos - 1 ? i + 2 : i + 1; |
+ str = str.Right(str.GetLength() - removed_len); |
break; |
} |
+ // Found a valid '.'. |
+ pPos = i; |
} |
- aPos = str.Find(L'@'); |
- if (aPos < 1) { |
- return FALSE; |
- } |
- CFX_WideString strtemp = L""; |
- for (i = 0; i < aPos; i++) { |
- FX_WCHAR wch = str.GetAt(i); |
- if (wch >= L'a' && wch <= L'z') { |
- break; |
- } else { |
- strtemp = str.Right(str.GetLength() - i + 1); |
- } |
- } |
- if (strtemp != L"") { |
- str = strtemp; |
- } |
+ |
+ // Check the domain name part. |
aPos = str.Find(L'@'); |
if (aPos < 1) { |
return FALSE; |
} |
str.TrimRight(L'.'); |
- strtemp = str; |
- int ePos = str.Find(L'.'); |
- if (ePos == -1) { |
+ // At least one '.' in domain name, but not at the beginning. |
+ // TODO(weili): RFC5322 allows domain names to be a local name without '.'. |
+ // Check whether we should remove this check. |
+ int ePos = str.Find(L'.', aPos + 1); |
+ if (ePos == -1 || ePos == aPos + 1) { |
return FALSE; |
} |
- while (ePos != -1) { |
- strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1); |
- ePos = strtemp.Find('.'); |
- } |
- ePos = strtemp.GetLength(); |
- for (i = 0; i < ePos; i++) { |
- FX_WCHAR wch = str.GetAt(i); |
- if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { |
- continue; |
- } else { |
- str = str.Left(str.GetLength() - ePos + i + 1); |
- ePos = ePos - i - 1; |
- break; |
- } |
- } |
+ // Validate all other chars in domain name. |
int nLen = str.GetLength(); |
- for (i = aPos + 1; i < nLen - ePos; i++) { |
+ pPos = 0; // Used to track the position of '.'. |
+ for (int i = aPos + 1; i < nLen; i++) { |
FX_WCHAR wch = str.GetAt(i); |
- if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || |
- (wch >= L'0' && wch <= L'9')) { |
+ if (wch == L'-' || FXSYS_iswalnum(wch)) { |
continue; |
- } else { |
+ } |
+ if (wch != L'.' || i == pPos + 1) { |
+ // Domain name should end before invalid char. |
+ int host_end = i == pPos + 1 ? i - 2 : i - 1; |
+ if (pPos > 0 && host_end - aPos >= 3) { |
+ // Trim the ending invalid chars if there is at least one '.' and name. |
+ str = str.Left(host_end + 1); |
+ break; |
+ } |
return FALSE; |
} |
+ pPos = i; |
} |
+ |
if (str.Find(L"mailto:") == -1) { |
str = L"mailto:" + str; |
} |