OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 #include <map> | 6 #include <map> |
7 #include <unicode/ucnv.h> | 7 #include <unicode/ucnv.h> |
8 #include <unicode/uidna.h> | 8 #include <unicode/uidna.h> |
9 #include <unicode/ulocdata.h> | 9 #include <unicode/ulocdata.h> |
10 #include <unicode/uniset.h> | 10 #include <unicode/uniset.h> |
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
487 // Latin letters in the ASCII range. | 487 // Latin letters in the ASCII range. |
488 bool IsCompatibleWithASCIILetters(const std::string& lang) { | 488 bool IsCompatibleWithASCIILetters(const std::string& lang) { |
489 // For now, just list Chinese, Japanese and Korean (positive list). | 489 // For now, just list Chinese, Japanese and Korean (positive list). |
490 // An alternative is negative-listing (languages using Greek and | 490 // An alternative is negative-listing (languages using Greek and |
491 // Cyrillic letters), but it can be more dangerous. | 491 // Cyrillic letters), but it can be more dangerous. |
492 return !lang.substr(0, 2).compare("zh") || | 492 return !lang.substr(0, 2).compare("zh") || |
493 !lang.substr(0, 2).compare("ja") || | 493 !lang.substr(0, 2).compare("ja") || |
494 !lang.substr(0, 2).compare("ko"); | 494 !lang.substr(0, 2).compare("ko"); |
495 } | 495 } |
496 | 496 |
497 typedef std::map<std::string, UnicodeSet*> LangToExemplarSetMap; | 497 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; |
498 | 498 |
499 class LangToExemplarSet { | 499 class LangToExemplarSet { |
500 private: | 500 private: |
501 LangToExemplarSetMap map; | 501 LangToExemplarSetMap map; |
502 LangToExemplarSet() { } | 502 LangToExemplarSet() { } |
503 ~LangToExemplarSet() { | 503 ~LangToExemplarSet() { |
504 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); | 504 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); |
505 } | 505 } |
506 | 506 |
507 friend class Singleton<LangToExemplarSet>; | 507 friend class Singleton<LangToExemplarSet>; |
508 friend struct DefaultSingletonTraits<LangToExemplarSet>; | 508 friend struct DefaultSingletonTraits<LangToExemplarSet>; |
509 friend bool GetExemplarSetForLang(const std::string&, UnicodeSet**); | 509 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); |
510 friend void SetExemplarSetForLang(const std::string&, UnicodeSet*); | 510 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); |
511 | 511 |
512 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); | 512 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); |
513 }; | 513 }; |
514 | 514 |
515 bool GetExemplarSetForLang(const std::string& lang, UnicodeSet** lang_set) { | 515 bool GetExemplarSetForLang(const std::string& lang, |
| 516 icu::UnicodeSet** lang_set) { |
516 const LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; | 517 const LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; |
517 LangToExemplarSetMap::const_iterator pos = map.find(lang); | 518 LangToExemplarSetMap::const_iterator pos = map.find(lang); |
518 if (pos != map.end()) { | 519 if (pos != map.end()) { |
519 *lang_set = pos->second; | 520 *lang_set = pos->second; |
520 return true; | 521 return true; |
521 } | 522 } |
522 return false; | 523 return false; |
523 } | 524 } |
524 | 525 |
525 void SetExemplarSetForLang(const std::string& lang, UnicodeSet* lang_set) { | 526 void SetExemplarSetForLang(const std::string& lang, |
| 527 icu::UnicodeSet* lang_set) { |
526 LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; | 528 LangToExemplarSetMap& map = Singleton<LangToExemplarSet>()->map; |
527 map.insert(std::make_pair(lang, lang_set)); | 529 map.insert(std::make_pair(lang, lang_set)); |
528 } | 530 } |
529 | 531 |
530 static Lock lang_set_lock; | 532 static Lock lang_set_lock; |
531 | 533 |
532 // Returns true if all the characters in component_characters are used by | 534 // Returns true if all the characters in component_characters are used by |
533 // the language |lang|. | 535 // the language |lang|. |
534 bool IsComponentCoveredByLang(const UnicodeSet& component_characters, | 536 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, |
535 const std::string& lang) { | 537 const std::string& lang) { |
536 static const UnicodeSet kASCIILetters(0x61, 0x7a); // [a-z] | 538 static const icu::UnicodeSet kASCIILetters(0x61, 0x7a); // [a-z] |
537 UnicodeSet* lang_set; | 539 icu::UnicodeSet* lang_set; |
538 // We're called from both the UI thread and the history thread. | 540 // We're called from both the UI thread and the history thread. |
539 { | 541 { |
540 AutoLock lock(lang_set_lock); | 542 AutoLock lock(lang_set_lock); |
541 if (!GetExemplarSetForLang(lang, &lang_set)) { | 543 if (!GetExemplarSetForLang(lang, &lang_set)) { |
542 UErrorCode status = U_ZERO_ERROR; | 544 UErrorCode status = U_ZERO_ERROR; |
543 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); | 545 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); |
544 // TODO(jungshik) Turn this check on when the ICU data file is | 546 // TODO(jungshik) Turn this check on when the ICU data file is |
545 // rebuilt with the minimal subset of locale data for languages | 547 // rebuilt with the minimal subset of locale data for languages |
546 // to which Chrome is not localized but which we offer in the list | 548 // to which Chrome is not localized but which we offer in the list |
547 // of languages selectable for Accept-Languages. With the rebuilt ICU | 549 // of languages selectable for Accept-Languages. With the rebuilt ICU |
548 // data, ulocdata_open never should fall back to the default locale. | 550 // data, ulocdata_open never should fall back to the default locale. |
549 // (issue 2078) | 551 // (issue 2078) |
550 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); | 552 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); |
551 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { | 553 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { |
552 lang_set = reinterpret_cast<UnicodeSet *>( | 554 lang_set = reinterpret_cast<icu::UnicodeSet *>( |
553 ulocdata_getExemplarSet(uld, NULL, 0, | 555 ulocdata_getExemplarSet(uld, NULL, 0, |
554 ULOCDATA_ES_STANDARD, &status)); | 556 ULOCDATA_ES_STANDARD, &status)); |
555 // If |lang| is compatible with ASCII Latin letters, add them. | 557 // If |lang| is compatible with ASCII Latin letters, add them. |
556 if (IsCompatibleWithASCIILetters(lang)) | 558 if (IsCompatibleWithASCIILetters(lang)) |
557 lang_set->addAll(kASCIILetters); | 559 lang_set->addAll(kASCIILetters); |
558 } else { | 560 } else { |
559 lang_set = new UnicodeSet(1, 0); | 561 lang_set = new icu::UnicodeSet(1, 0); |
560 } | 562 } |
561 lang_set->freeze(); | 563 lang_set->freeze(); |
562 SetExemplarSetForLang(lang, lang_set); | 564 SetExemplarSetForLang(lang, lang_set); |
563 ulocdata_close(uld); | 565 ulocdata_close(uld); |
564 } | 566 } |
565 } | 567 } |
566 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); | 568 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); |
567 } | 569 } |
568 | 570 |
569 // Returns true if the given Unicode host component is safe to display to the | 571 // Returns true if the given Unicode host component is safe to display to the |
(...skipping 21 matching lines...) Expand all Loading... |
591 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 593 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" |
592 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 594 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
593 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 595 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
594 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 596 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
595 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 597 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
596 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 598 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
597 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 599 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
598 L"[\ufffa-\ufffd]]"), status); | 600 L"[\ufffa-\ufffd]]"), status); |
599 #else | 601 #else |
600 UnicodeSet dangerous_characters(UnicodeString( | 602 UnicodeSet dangerous_characters(UnicodeString( |
601 "[[\\ \\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 603 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
602 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 604 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
603 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 605 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
604 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 606 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
605 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 607 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
606 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 608 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
607 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 609 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
608 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 610 "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
609 #endif | 611 #endif |
610 DCHECK(U_SUCCESS(status)); | 612 DCHECK(U_SUCCESS(status)); |
611 UnicodeSet component_characters; | 613 icu::UnicodeSet component_characters; |
612 component_characters.addAll(UnicodeString(str, str_len)); | 614 component_characters.addAll(icu::UnicodeString(str, str_len)); |
613 if (dangerous_characters.containsSome(component_characters)) | 615 if (dangerous_characters.containsSome(component_characters)) |
614 return false; | 616 return false; |
615 | 617 |
616 // If the language list is empty, the result is completely determined | 618 // If the language list is empty, the result is completely determined |
617 // by whether a component is a single script or not. This will block | 619 // by whether a component is a single script or not. This will block |
618 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are | 620 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are |
619 // allowed with |languages| (while it blocks Chinese + Latin letters with | 621 // allowed with |languages| (while it blocks Chinese + Latin letters with |
620 // an accent as should be the case), but we want to err on the safe side | 622 // an accent as should be the case), but we want to err on the safe side |
621 // when |languages| is empty. | 623 // when |languages| is empty. |
622 if (languages.empty()) | 624 if (languages.empty()) |
623 return IsIDNComponentInSingleScript(str, str_len); | 625 return IsIDNComponentInSingleScript(str, str_len); |
624 | 626 |
625 // |common_characters| is made up of ASCII numbers, hyphen, plus and | 627 // |common_characters| is made up of ASCII numbers, hyphen, plus and |
626 // underscore that are used across scripts and allowed in domain names. | 628 // underscore that are used across scripts and allowed in domain names. |
627 // (sync'd with characters allowed in url_canon_host with square | 629 // (sync'd with characters allowed in url_canon_host with square |
628 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. | 630 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. |
629 UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), | 631 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), |
630 status); | 632 status); |
631 DCHECK(U_SUCCESS(status)); | 633 DCHECK(U_SUCCESS(status)); |
632 // Subtract common characters because they're always allowed so that | 634 // Subtract common characters because they're always allowed so that |
633 // we just have to check if a language-specific set contains | 635 // we just have to check if a language-specific set contains |
634 // the remainder. | 636 // the remainder. |
635 component_characters.removeAll(common_characters); | 637 component_characters.removeAll(common_characters); |
636 | 638 |
637 std::string languages_list(WideToASCII(languages)); | 639 std::string languages_list(WideToASCII(languages)); |
638 StringTokenizer t(languages_list, ","); | 640 StringTokenizer t(languages_list, ","); |
639 while (t.GetNext()) { | 641 while (t.GetNext()) { |
640 if (IsComponentCoveredByLang(component_characters, t.token())) | 642 if (IsComponentCoveredByLang(component_characters, t.token())) |
(...skipping 659 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1300 GURL SimplifyUrlForRequest(const GURL& url) { | 1302 GURL SimplifyUrlForRequest(const GURL& url) { |
1301 DCHECK(url.is_valid()); | 1303 DCHECK(url.is_valid()); |
1302 GURL::Replacements replacements; | 1304 GURL::Replacements replacements; |
1303 replacements.ClearUsername(); | 1305 replacements.ClearUsername(); |
1304 replacements.ClearPassword(); | 1306 replacements.ClearPassword(); |
1305 replacements.ClearRef(); | 1307 replacements.ClearRef(); |
1306 return url.ReplaceComponents(replacements); | 1308 return url.ReplaceComponents(replacements); |
1307 } | 1309 } |
1308 | 1310 |
1309 } // namespace net | 1311 } // namespace net |
OLD | NEW |