Chromium Code Reviews| Index: components/url_formatter/idn_spoof_checker.cc |
| diff --git a/components/url_formatter/idn_spoof_checker.cc b/components/url_formatter/idn_spoof_checker.cc |
| index c3209cc6d4a699c012225de86f165a7fd3203613..f6a1a51b3d43ef2c8b394e15d7d5da984fb48cfa 100644 |
| --- a/components/url_formatter/idn_spoof_checker.cc |
| +++ b/components/url_formatter/idn_spoof_checker.cc |
| @@ -232,11 +232,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
| // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| // (U+30D[8-A]) that look exactly like each other when they're used in a |
| // label otherwise entirely in Katakna or Hiragana. |
| - // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| - // Letter Co) to be next to Latin. |
| - // - Disallow Latin 'o' and 'g' next to Armenian. |
| + // - Disalow mixing of Latin and Armenian |
|
Peter Kasting
2017/08/29 04:08:30
Nit: Period at end (2 places)
|
| // - Disalow mixing of Latin and Canadian Syllabary. |
| // - Disalow mixing of Latin and Tifinagh. |
| + // - Disalow mixing of Latin and Miao |
| // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC |
| // character. Other combining diacritical marks are not in the allowed |
| // character set. |
| @@ -250,14 +249,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
| R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
| R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
| R"([a-z]\u30fb|\u30fb[a-z]|)" |
| - R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
| - R"([a-z][\u0585\u0581]+[a-z]|)" |
| - R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
| - R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
| + R"([\p{sc=armn}].*[a-z]|[a-z].*[\p{sc=armn}]|)" |
| R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
| R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}]|)" |
| - R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", |
| - -1, US_INV), |
|
Peter Kasting
2017/08/29 04:08:29
Were these just default values?
|
| + R"([\p{sc=miao}].*[a-z]|[a-z].*[\p{sc=miao}]|)" |
| + R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])"), |
| 0, status); |
| tls_index.Set(dangerous_pattern); |
| } |
| @@ -330,17 +326,16 @@ void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) { |
| // identifiers. Therefore, only characters belonging to |
| // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational' |
| // section at |
| -// http://www.unicode.org/Public/security/latest/xidmodifications.txt) are |
| -// are added to the allowed set. The list has to be updated when a new |
| +// http://www.unicode.org/Public/security/9.0.0/IdentifierType.txt) are |
| +// added to the allowed set. The list has to be updated when a new |
| // version of Unicode is released. The current version is 9.0.0 and ICU 60 |
| // will have Unicode 10.0 data. |
| +// Note that Mongolian is dropped because it's written vertically. |
| #if U_ICU_VERSION_MAJOR_NUM < 60 |
| const icu::UnicodeSet aspirational_scripts( |
| icu::UnicodeString( |
| // Unified Canadian Syllabics |
| "[\\u1401-\\u166C\\u166F-\\u167F" |
| - // Mongolian |
| - "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA" |
| // Unified Canadian Syllabics |
| "\\u18B0-\\u18F5" |
| // Tifinagh |