| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "content/renderer/android/email_detector.h" | |
| 6 | |
| 7 #include <memory> | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "base/strings/utf_string_conversions.h" | |
| 11 #include "content/public/renderer/android_content_detection_prefixes.h" | |
| 12 #include "net/base/escape.h" | |
| 13 #include "third_party/icu/source/i18n/unicode/regex.h" | |
| 14 | |
| 15 namespace { | |
| 16 | |
| 17 // Maximum length of an email address. | |
| 18 const size_t kMaximumEmailLength = 254; | |
| 19 | |
| 20 // Regex to match email addresses. | |
| 21 // This is more specific than RFC 2822 (uncommon special characters are | |
| 22 // disallowed) in order to avoid false positives. | |
| 23 // Delimiters are word boundaries to allow punctuation, quote marks etc. around | |
| 24 // the address. | |
| 25 const char kEmailRegex[] = | |
| 26 "\\b[A-Z0-9._%+-]+@[A-Z0-9-]+(\\.[A-Z0-9-]+)*(\\.[A-Z]{2,6})\\b"; | |
| 27 | |
| 28 } // anonymous namespace | |
| 29 | |
| 30 namespace content { | |
| 31 | |
| 32 EmailDetector::EmailDetector() { | |
| 33 } | |
| 34 | |
| 35 size_t EmailDetector::GetMaximumContentLength() { | |
| 36 return kMaximumEmailLength; | |
| 37 } | |
| 38 | |
| 39 GURL EmailDetector::GetIntentURL(const std::string& content_text) { | |
| 40 if (content_text.empty()) | |
| 41 return GURL(); | |
| 42 | |
| 43 return GURL(kEmailPrefix + | |
| 44 net::EscapeQueryParamValue(content_text, true)); | |
| 45 } | |
| 46 | |
| 47 bool EmailDetector::FindContent(const base::string16::const_iterator& begin, | |
| 48 const base::string16::const_iterator& end, | |
| 49 size_t* start_pos, | |
| 50 size_t* end_pos, | |
| 51 std::string* content_text) { | |
| 52 base::string16 utf16_input = base::string16(begin, end); | |
| 53 icu::UnicodeString pattern(kEmailRegex); | |
| 54 icu::UnicodeString input(utf16_input.data(), utf16_input.length()); | |
| 55 UErrorCode status = U_ZERO_ERROR; | |
| 56 std::unique_ptr<icu::RegexMatcher> matcher( | |
| 57 new icu::RegexMatcher(pattern, input, UREGEX_CASE_INSENSITIVE, status)); | |
| 58 if (matcher->find()) { | |
| 59 *start_pos = matcher->start(status); | |
| 60 DCHECK(U_SUCCESS(status)); | |
| 61 *end_pos = matcher->end(status); | |
| 62 DCHECK(U_SUCCESS(status)); | |
| 63 icu::UnicodeString content_ustr(matcher->group(status)); | |
| 64 DCHECK(U_SUCCESS(status)); | |
| 65 content_text->clear(); | |
| 66 content_ustr.toUTF8String(*content_text); | |
| 67 return true; | |
| 68 } | |
| 69 | |
| 70 return false; | |
| 71 } | |
| 72 | |
| 73 } // namespace content | |
| OLD | NEW |