Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: net/base/net_util.cc

Issue 171012: Use 'icu::' namespace explicitly (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/common/time_format.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <algorithm> 5 #include <algorithm>
6 #include <map> 6 #include <map>
7 #include <unicode/ucnv.h> 7 #include <unicode/ucnv.h>
8 #include <unicode/uidna.h> 8 #include <unicode/uidna.h>
9 #include <unicode/ulocdata.h> 9 #include <unicode/ulocdata.h>
10 #include <unicode/uniset.h> 10 #include <unicode/uniset.h>
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after
487 // Latin letters in the ASCII range. 487 // Latin letters in the ASCII range.
488 bool IsCompatibleWithASCIILetters(const std::string& lang) { 488 bool IsCompatibleWithASCIILetters(const std::string& lang) {
489 // For now, just list Chinese, Japanese and Korean (positive list). 489 // For now, just list Chinese, Japanese and Korean (positive list).
490 // An alternative is negative-listing (languages using Greek and 490 // An alternative is negative-listing (languages using Greek and
491 // Cyrillic letters), but it can be more dangerous. 491 // Cyrillic letters), but it can be more dangerous.
492 return !lang.substr(0, 2).compare("zh") || 492 return !lang.substr(0, 2).compare("zh") ||
493 !lang.substr(0, 2).compare("ja") || 493 !lang.substr(0, 2).compare("ja") ||
494 !lang.substr(0, 2).compare("ko"); 494 !lang.substr(0, 2).compare("ko");
495 } 495 }
496 496
497 typedef std::map<std::string, UnicodeSet*> LangToExemplarSetMap; 497 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;
498 498
499 class LangToExemplarSet { 499 class LangToExemplarSet {
500 private: 500 private:
501 LangToExemplarSetMap map; 501 LangToExemplarSetMap map;
502 LangToExemplarSet() { } 502 LangToExemplarSet() { }
503 ~LangToExemplarSet() { 503 ~LangToExemplarSet() {
504 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); 504 STLDeleteContainerPairSecondPointers(map.begin(), map.end());
505 } 505 }
506 506
507 friend class Singleton<LangToExemplarSet>; 507 friend class Singleton<LangToExemplarSet>;
508 friend struct DefaultSingletonTraits<LangToExemplarSet>; 508 friend struct DefaultSingletonTraits<LangToExemplarSet>;
509 friend bool GetExemplarSetForLang(const std::string&, UnicodeSet**); 509 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);
510 friend void SetExemplarSetForLang(const std::string&, UnicodeSet*); 510 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);
511 511
512 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); 512 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);
513 }; 513 };
514 514
515 bool GetExemplarSetForLang(const std::string& lang, UnicodeSet** lang_set) { 515 bool GetExemplarSetForLang(const std::string& lang,
516 icu::UnicodeSet** lang_set) {
516 const LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; 517 const LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map;
517 LangToExemplarSetMap::const_iterator pos = map.find(lang); 518 LangToExemplarSetMap::const_iterator pos = map.find(lang);
518 if (pos != map.end()) { 519 if (pos != map.end()) {
519 *lang_set = pos->second; 520 *lang_set = pos->second;
520 return true; 521 return true;
521 } 522 }
522 return false; 523 return false;
523 } 524 }
524 525
525 void SetExemplarSetForLang(const std::string& lang, UnicodeSet* lang_set) { 526 void SetExemplarSetForLang(const std::string& lang,
527 icu::UnicodeSet* lang_set) {
526 LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; 528 LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map;
527 map.insert(std::make_pair(lang, lang_set)); 529 map.insert(std::make_pair(lang, lang_set));
528 } 530 }
529 531
530 static Lock lang_set_lock; 532 static Lock lang_set_lock;
531 533
532 // Returns true if all the characters in component_characters are used by 534 // Returns true if all the characters in component_characters are used by
533 // the language |lang|. 535 // the language |lang|.
534 bool IsComponentCoveredByLang(const UnicodeSet& component_characters, 536 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,
535 const std::string& lang) { 537 const std::string& lang) {
536 static const UnicodeSet kASCIILetters(0x61, 0x7a); // [a-z] 538 static const icu::UnicodeSet kASCIILetters(0x61, 0x7a); // [a-z]
537 UnicodeSet* lang_set; 539 icu::UnicodeSet* lang_set;
538 // We're called from both the UI thread and the history thread. 540 // We're called from both the UI thread and the history thread.
539 { 541 {
540 AutoLock lock(lang_set_lock); 542 AutoLock lock(lang_set_lock);
541 if (!GetExemplarSetForLang(lang, &lang_set)) { 543 if (!GetExemplarSetForLang(lang, &lang_set)) {
542 UErrorCode status = U_ZERO_ERROR; 544 UErrorCode status = U_ZERO_ERROR;
543 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); 545 ULocaleData* uld = ulocdata_open(lang.c_str(), &status);
544 // TODO(jungshik) Turn this check on when the ICU data file is 546 // TODO(jungshik) Turn this check on when the ICU data file is
545 // rebuilt with the minimal subset of locale data for languages 547 // rebuilt with the minimal subset of locale data for languages
546 // to which Chrome is not localized but which we offer in the list 548 // to which Chrome is not localized but which we offer in the list
547 // of languages selectable for Accept-Languages. With the rebuilt ICU 549 // of languages selectable for Accept-Languages. With the rebuilt ICU
548 // data, ulocdata_open never should fall back to the default locale. 550 // data, ulocdata_open never should fall back to the default locale.
549 // (issue 2078) 551 // (issue 2078)
550 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); 552 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);
551 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { 553 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {
552 lang_set = reinterpret_cast<UnicodeSet *>( 554 lang_set = reinterpret_cast<icu::UnicodeSet *>(
553 ulocdata_getExemplarSet(uld, NULL, 0, 555 ulocdata_getExemplarSet(uld, NULL, 0,
554 ULOCDATA_ES_STANDARD, &status)); 556 ULOCDATA_ES_STANDARD, &status));
555 // If |lang| is compatible with ASCII Latin letters, add them. 557 // If |lang| is compatible with ASCII Latin letters, add them.
556 if (IsCompatibleWithASCIILetters(lang)) 558 if (IsCompatibleWithASCIILetters(lang))
557 lang_set->addAll(kASCIILetters); 559 lang_set->addAll(kASCIILetters);
558 } else { 560 } else {
559 lang_set = new UnicodeSet(1, 0); 561 lang_set = new icu::UnicodeSet(1, 0);
560 } 562 }
561 lang_set->freeze(); 563 lang_set->freeze();
562 SetExemplarSetForLang(lang, lang_set); 564 SetExemplarSetForLang(lang, lang_set);
563 ulocdata_close(uld); 565 ulocdata_close(uld);
564 } 566 }
565 } 567 }
566 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); 568 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);
567 } 569 }
568 570
569 // Returns true if the given Unicode host component is safe to display to the 571 // Returns true if the given Unicode host component is safe to display to the
(...skipping 21 matching lines...) Expand all
591 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 593 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
592 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 594 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
593 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 595 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
594 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 596 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
595 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 597 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
596 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 598 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
597 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 599 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
598 L"[\ufffa-\ufffd]]"), status); 600 L"[\ufffa-\ufffd]]"), status);
599 #else 601 #else
600 UnicodeSet dangerous_characters(UnicodeString( 602 UnicodeSet dangerous_characters(UnicodeString(
601 "[[\\ \\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 603 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
602 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 604 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
603 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 605 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
604 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 606 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
605 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 607 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
606 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 608 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
607 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 609 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
608 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 610 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
609 #endif 611 #endif
610 DCHECK(U_SUCCESS(status)); 612 DCHECK(U_SUCCESS(status));
611 UnicodeSet component_characters; 613 icu::UnicodeSet component_characters;
612 component_characters.addAll(UnicodeString(str, str_len)); 614 component_characters.addAll(icu::UnicodeString(str, str_len));
613 if (dangerous_characters.containsSome(component_characters)) 615 if (dangerous_characters.containsSome(component_characters))
614 return false; 616 return false;
615 617
616 // If the language list is empty, the result is completely determined 618 // If the language list is empty, the result is completely determined
617 // by whether a component is a single script or not. This will block 619 // by whether a component is a single script or not. This will block
618 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are 620 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
619 // allowed with |languages| (while it blocks Chinese + Latin letters with 621 // allowed with |languages| (while it blocks Chinese + Latin letters with
620 // an accent as should be the case), but we want to err on the safe side 622 // an accent as should be the case), but we want to err on the safe side
621 // when |languages| is empty. 623 // when |languages| is empty.
622 if (languages.empty()) 624 if (languages.empty())
623 return IsIDNComponentInSingleScript(str, str_len); 625 return IsIDNComponentInSingleScript(str, str_len);
624 626
625 // |common_characters| is made up of ASCII numbers, hyphen, plus and 627 // |common_characters| is made up of ASCII numbers, hyphen, plus and
626 // underscore that are used across scripts and allowed in domain names. 628 // underscore that are used across scripts and allowed in domain names.
627 // (sync'd with characters allowed in url_canon_host with square 629 // (sync'd with characters allowed in url_canon_host with square
628 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. 630 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
629 UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), 631 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
630 status); 632 status);
631 DCHECK(U_SUCCESS(status)); 633 DCHECK(U_SUCCESS(status));
632 // Subtract common characters because they're always allowed so that 634 // Subtract common characters because they're always allowed so that
633 // we just have to check if a language-specific set contains 635 // we just have to check if a language-specific set contains
634 // the remainder. 636 // the remainder.
635 component_characters.removeAll(common_characters); 637 component_characters.removeAll(common_characters);
636 638
637 std::string languages_list(WideToASCII(languages)); 639 std::string languages_list(WideToASCII(languages));
638 StringTokenizer t(languages_list, ","); 640 StringTokenizer t(languages_list, ",");
639 while (t.GetNext()) { 641 while (t.GetNext()) {
640 if (IsComponentCoveredByLang(component_characters, t.token())) 642 if (IsComponentCoveredByLang(component_characters, t.token()))
(...skipping 659 matching lines...) Expand 10 before | Expand all | Expand 10 after
1300 GURL SimplifyUrlForRequest(const GURL& url) { 1302 GURL SimplifyUrlForRequest(const GURL& url) {
1301 DCHECK(url.is_valid()); 1303 DCHECK(url.is_valid());
1302 GURL::Replacements replacements; 1304 GURL::Replacements replacements;
1303 replacements.ClearUsername(); 1305 replacements.ClearUsername();
1304 replacements.ClearPassword(); 1306 replacements.ClearPassword();
1305 replacements.ClearRef(); 1307 replacements.ClearRef();
1306 return url.ReplaceComponents(replacements); 1308 return url.ReplaceComponents(replacements);
1307 } 1309 }
1308 1310
1309 } // namespace net 1311 } // namespace net
OLDNEW
« no previous file with comments | « chrome/common/time_format.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698