| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 | 9 |
| 10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
| (...skipping 444 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 455 -1, US_INV), | 455 -1, US_INV), |
| 456 *status); | 456 *status); |
| 457 allowed_set.addAll(aspirational_scripts); | 457 allowed_set.addAll(aspirational_scripts); |
| 458 #else | 458 #else |
| 459 #error "Update aspirational_scripts per Unicode 10.0" | 459 #error "Update aspirational_scripts per Unicode 10.0" |
| 460 #endif | 460 #endif |
| 461 | 461 |
| 462 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in | 462 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in |
| 463 // the inclusion set. However, they are blacklisted as a part of Mozilla's | 463 // the inclusion set. However, they are blacklisted as a part of Mozilla's |
| 464 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). | 464 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). |
| 465 // U+2010 is in the inclusion set, but we drop it because it can be confused |
| 466 // with an ASCII U+002D (Hyphen-Minus). |
| 465 // U+0338 and U+2027 are dropped; the former can look like a slash when | 467 // U+0338 and U+2027 are dropped; the former can look like a slash when |
| 466 // rendered with a broken font, and the latter can be confused with U+30FB | 468 // rendered with a broken font, and the latter can be confused with U+30FB |
| 467 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, | 469 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, |
| 468 // even though it can look like a double quotation mark. Using it in Hebrew | 470 // even though it can look like a double quotation mark. Using it in Hebrew |
| 469 // should be safe. When used with a non-Hebrew script, it'd be filtered by | 471 // should be safe. When used with a non-Hebrew script, it'd be filtered by |
| 470 // other checks in place. | 472 // other checks in place. |
| 471 allowed_set.remove(0x338u); // Combining Long Solidus Overlay | 473 allowed_set.remove(0x338u); // Combining Long Solidus Overlay |
| 474 allowed_set.remove(0x2010u); // Hyphen |
| 472 allowed_set.remove(0x2027u); // Hyphenation Point | 475 allowed_set.remove(0x2027u); // Hyphenation Point |
| 473 | 476 |
| 474 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 477 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 475 } | 478 } |
| 476 | 479 |
| 477 // Returns true if the given Unicode host component is safe to display to the | 480 // Returns true if the given Unicode host component is safe to display to the |
| 478 // user. Note that this function does not deal with pure ASCII domain labels at | 481 // user. Note that this function does not deal with pure ASCII domain labels at |
| 479 // all even though it's possible to make up look-alike labels with ASCII | 482 // all even though it's possible to make up look-alike labels with ASCII |
| 480 // characters alone. | 483 // characters alone. |
| 481 bool IsIDNComponentSafe(base::StringPiece16 label) { | 484 bool IsIDNComponentSafe(base::StringPiece16 label) { |
| (...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 796 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 799 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
| 797 ? text.substr(www.length()) : text; | 800 ? text.substr(www.length()) : text; |
| 798 } | 801 } |
| 799 | 802 |
| 800 base::string16 StripWWWFromHost(const GURL& url) { | 803 base::string16 StripWWWFromHost(const GURL& url) { |
| 801 DCHECK(url.is_valid()); | 804 DCHECK(url.is_valid()); |
| 802 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 805 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
| 803 } | 806 } |
| 804 | 807 |
| 805 } // namespace url_formatter | 808 } // namespace url_formatter |
| OLD | NEW |