Index: components/url_formatter/url_formatter.cc |
diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc |
index b8a802c96f0a9fff647364645bf903fdf92c2bf3..d54b67913ce76d400907266963475bae6920cdd9 100644 |
--- a/components/url_formatter/url_formatter.cc |
+++ b/components/url_formatter/url_formatter.cc |
@@ -324,8 +324,9 @@ IDNSpoofChecker::IDNSpoofChecker() { |
non_ascii_latin_letters_.freeze(); |
// These letters are parts of |dangerous_patterns_|. |
- kana_letters_exceptions_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE( |
- "[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb\\u30fc]"), status); |
+ kana_letters_exceptions_ = icu::UnicodeSet( |
+ UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"), |
+ status); |
kana_letters_exceptions_.freeze(); |
// These Cyrillic letters look like Latin. A domain label entirely made of |
@@ -406,6 +407,8 @@ bool IDNSpoofChecker::Check(base::StringPiece16 label, bool is_tld_ascii) { |
// TODO(jshin): adjust the pattern once the above ICU bug is fixed. |
// - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana |
// Prolonged Sound) used out-of-context. |
+ // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
+ // unless they're preceded by a Katakana. |
// - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
// (U+30D[8-A]) that look exactly like each other when they're used in a |
// label otherwise entirely in Katakna or Hiragana. |
@@ -417,15 +420,16 @@ bool IDNSpoofChecker::Check(base::StringPiece16 label, bool is_tld_ascii) { |
"[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" |
"[\\u30ce\\u30f3\\u30bd\\u30be]" |
"[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" |
- "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|" |
- "\\u30fc[^\\p{scx=kana}\\p{scx=hira}]|" |
+ "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" |
+ "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" |
"^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" |
"^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" |
"[a-z]\\u30fb|\\u30fb[a-z]|" |
"^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" |
"[a-z][\\u0585\\u0581]+[a-z]|" |
"^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" |
- "[\\p{scx=armn}][og]+[\\p{scx=armn}]", -1, US_INV), |
+ "[\\p{scx=armn}][og]+[\\p{scx=armn}]", |
+ -1, US_INV), |
0, status); |
tls_index.Set(dangerous_pattern); |
} |