| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
| 6 | 6 |
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
| (...skipping 241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 252 // Miao | 252 // Miao |
| 253 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E" | 253 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E" |
| 254 "\\U00016F8F-\\U00016F9F]", | 254 "\\U00016F8F-\\U00016F9F]", |
| 255 -1, US_INV), | 255 -1, US_INV), |
| 256 *status); | 256 *status); |
| 257 allowed_set.addAll(aspirational_scripts); | 257 allowed_set.addAll(aspirational_scripts); |
| 258 #else | 258 #else |
| 259 #error "Update aspirational_scripts per Unicode 10.0" | 259 #error "Update aspirational_scripts per Unicode 10.0" |
| 260 #endif | 260 #endif |
| 261 | 261 |
| 262 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in | 262 // The sections below refer to Mozilla's IDN blacklist: |
| 263 // the inclusion set. However, they are blacklisted as a part of Mozilla's | 263 // http://kb.mozillazine.org/Network.IDN.blacklist_chars |
| 264 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). | 264 // |
| 265 // U+2010 is in the inclusion set, but we drop it because it can be confused | 265 // U+0338 (Combining Long Solidus Overlay) is included in the recommended set, |
| 266 // with an ASCII U+002D (Hyphen-Minus). | 266 // but is blacklisted by Mozilla. It is dropped because it can look like a |
| 267 // U+0338 and U+2027 are dropped; the former can look like a slash when | 267 // slash when rendered with a broken font. |
| 268 // rendered with a broken font, and the latter can be confused with U+30FB | 268 allowed_set.remove(0x338u); |
| 269 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, | 269 // U+05F4 (Hebrew Punctuation Gershayim) is in the inclusion set, but is |
| 270 // even though it can look like a double quotation mark. Using it in Hebrew | 270 // blacklisted by Mozilla. We keep it, even though it can look like a double |
| 271 // should be safe. When used with a non-Hebrew script, it'd be filtered by | 271 // quotation mark. Using it in Hebrew should be safe. When used with a |
| 272 // other checks in place. | 272 // non-Hebrew script, it'd be filtered by other checks in place. |
| 273 allowed_set.remove(0x338u); // Combining Long Solidus Overlay | 273 // |
| 274 allowed_set.remove(0x2010u); // Hyphen | 274 // U+2010 (Hyphen) is in the inclusion set, but we drop it because it can be |
| 275 allowed_set.remove(0x2027u); // Hyphenation Point | 275 // confused with an ASCII U+002D (Hyphen-Minus). |
| 276 allowed_set.remove(0x2010u); |
| 277 // U+2027 (Hyphenation Point) is in the inclusion set, but is blacklisted by |
| 278 // Mozilla. It is dropped, as it can be confused with U+30FB (Katakana Middle |
| 279 // Dot). |
| 280 allowed_set.remove(0x2027u); |
| 276 | 281 |
| 277 #if defined(OS_MACOSX) | 282 #if defined(OS_MACOSX) |
| 278 // The following characters are reported as present in the default macOS | 283 // The following characters are reported as present in the default macOS |
| 279 // system UI font, but they render as blank. Remove them from the allowed | 284 // system UI font, but they render as blank. Remove them from the allowed |
| 280 // set to prevent spoofing. | 285 // set to prevent spoofing. |
| 281 // Tibetan characters used for transliteration of ancient texts: | 286 // Tibetan characters used for transliteration of ancient texts: |
| 282 allowed_set.remove(0x0F8Cu); | 287 allowed_set.remove(0x0F8Cu); |
| 283 allowed_set.remove(0x0F8Du); | 288 allowed_set.remove(0x0F8Du); |
| 284 allowed_set.remove(0x0F8Eu); | 289 allowed_set.remove(0x0F8Eu); |
| 285 allowed_set.remove(0x0F8Fu); | 290 allowed_set.remove(0x0F8Fu); |
| 286 #endif | 291 #endif |
| 287 | 292 |
| 288 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 293 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 289 } | 294 } |
| 290 | 295 |
| 291 } // namespace url_formatter | 296 } // namespace url_formatter |
| OLD | NEW |