Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(244)

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2436113003: Update aspirational_scripts per Unicode 9 (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/lazy_instance.h" 10 #include "base/lazy_instance.h"
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
421 allowed_set.addAll(*inclusion_set); 421 allowed_set.addAll(*inclusion_set);
422 422
423 // Five aspirational scripts are taken from UTR 31 Table 6 at 423 // Five aspirational scripts are taken from UTR 31 Table 6 at
424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts . 424 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts .
425 // Not all the characters of aspirational scripts are suitable for 425 // Not all the characters of aspirational scripts are suitable for
426 // identifiers. Therefore, only characters belonging to 426 // identifiers. Therefore, only characters belonging to
427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational' 427 // [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational'
428 // section at 428 // section at
429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are 429 // http://www.unicode.org/Public/security/latest/xidmodifications.txt) are
430 // are added to the allowed set. The list has to be updated when a new 430 // are added to the allowed set. The list has to be updated when a new
431 // version of Unicode is released. The current version is 8.0.0 and ICU 58 431 // version of Unicode is released. The current version is 9.0.0 and ICU 60
432 // will have Unicode 9.0 data. 432 // will have Unicode 10.0 data.
433 #if U_ICU_VERSION_MAJOR_NUM < 58 433 #if U_ICU_VERSION_MAJOR_NUM < 60
434 const icu::UnicodeSet aspirational_scripts( 434 const icu::UnicodeSet aspirational_scripts(
435 icu::UnicodeString( 435 icu::UnicodeString(
436 // Unified Canadian Syllabics 436 // Unified Canadian Syllabics
437 "[\\u1401-\\u166C\\u166F-\\u167F" 437 "[\\u1401-\\u166C\\u166F-\\u167F"
438 // Mongolian 438 // Mongolian
439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA" 439 "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA"
440 // Unified Canadian Syllabics 440 // Unified Canadian Syllabics
441 "\\u18B0-\\u18F5" 441 "\\u18B0-\\u18F5"
442 // Tifinagh 442 // Tifinagh
443 "\\u2D30-\\u2D67\\u2D7F" 443 "\\u2D30-\\u2D67\\u2D7F"
444 // Yi 444 // Yi
445 "\\uA000-\\uA48C" 445 "\\uA000-\\uA48C"
446 // Miao 446 // Miao
447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7F" 447 "\\U00016F00-\\U00016F44\\U00016F50-\\U00016F7E"
448 "\\U00016F8F-\\U00016F9F]", 448 "\\U00016F8F-\\U00016F9F]",
449 -1, US_INV), 449 -1, US_INV),
450 *status); 450 *status);
451 allowed_set.addAll(aspirational_scripts); 451 allowed_set.addAll(aspirational_scripts);
452 #else 452 #else
453 #error "Update aspirational_scripts per Unicode 9.0" 453 #error "Update aspirational_scripts per Unicode 10.0"
454 #endif 454 #endif
455 455
456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in 456 // U+0338 is included in the recommended set, while U+05F4 and U+2027 are in
457 // the inclusion set. However, they are blacklisted as a part of Mozilla's 457 // the inclusion set. However, they are blacklisted as a part of Mozilla's
458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars). 458 // IDN blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars).
459 // U+0338 and U+2027 are dropped; the former can look like a slash when 459 // U+0338 and U+2027 are dropped; the former can look like a slash when
460 // rendered with a broken font, and the latter can be confused with U+30FB 460 // rendered with a broken font, and the latter can be confused with U+30FB
461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept, 461 // (Katakana Middle Dot). U+05F4 (Hebrew Punctuation Gershayim) is kept,
462 // even though it can look like a double quotation mark. Using it in Hebrew 462 // even though it can look like a double quotation mark. Using it in Hebrew
463 // should be safe. When used with a non-Hebrew script, it'd be filtered by 463 // should be safe. When used with a non-Hebrew script, it'd be filtered by
(...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after
790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) 790 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)
791 ? text.substr(www.length()) : text; 791 ? text.substr(www.length()) : text;
792 } 792 }
793 793
794 base::string16 StripWWWFromHost(const GURL& url) { 794 base::string16 StripWWWFromHost(const GURL& url) {
795 DCHECK(url.is_valid()); 795 DCHECK(url.is_valid());
796 return StripWWW(base::ASCIIToUTF16(url.host_piece())); 796 return StripWWW(base::ASCIIToUTF16(url.host_piece()));
797 } 797 }
798 798
799 } // namespace url_formatter 799 } // namespace url_formatter
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698