| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| 11 #include "base/lazy_instance.h" | 11 #include "base/lazy_instance.h" |
| 12 #include "base/macros.h" | 12 #include "base/macros.h" |
| 13 #include "base/numerics/safe_conversions.h" | 13 #include "base/numerics/safe_conversions.h" |
| 14 #include "base/strings/string_piece.h" | 14 #include "base/strings/string_piece.h" |
| 15 #include "base/strings/string_tokenizer.h" |
| 15 #include "base/strings/string_util.h" | 16 #include "base/strings/string_util.h" |
| 16 #include "base/strings/utf_offset_string_conversions.h" | 17 #include "base/strings/utf_offset_string_conversions.h" |
| 17 #include "base/strings/utf_string_conversions.h" | 18 #include "base/strings/utf_string_conversions.h" |
| 18 #include "base/threading/thread_local_storage.h" | 19 #include "base/threading/thread_local_storage.h" |
| 19 #include "components/url_formatter/idn_spoof_checker.h" | 20 #include "components/url_formatter/idn_spoof_checker.h" |
| 21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| 20 #include "third_party/icu/source/common/unicode/uidna.h" | 22 #include "third_party/icu/source/common/unicode/uidna.h" |
| 21 #include "third_party/icu/source/common/unicode/utypes.h" | 23 #include "third_party/icu/source/common/unicode/utypes.h" |
| 22 #include "url/gurl.h" | 24 #include "url/gurl.h" |
| 23 #include "url/third_party/mozilla/url_parse.h" | 25 #include "url/third_party/mozilla/url_parse.h" |
| 24 | 26 |
| 25 namespace url_formatter { | 27 namespace url_formatter { |
| 26 | 28 |
| 27 namespace { | 29 namespace { |
| 28 | 30 |
| 29 base::string16 IDNToUnicodeWithAdjustments( | 31 base::string16 IDNToUnicodeWithAdjustments( |
| (...skipping 13 matching lines...) Expand all Loading... |
| 43 const std::string& component_text, | 45 const std::string& component_text, |
| 44 base::OffsetAdjuster::Adjustments* adjustments) const = 0; | 46 base::OffsetAdjuster::Adjustments* adjustments) const = 0; |
| 45 | 47 |
| 46 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an | 48 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
| 47 // accessible copy constructor in order to call AppendFormattedComponent() | 49 // accessible copy constructor in order to call AppendFormattedComponent() |
| 48 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). | 50 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). |
| 49 }; | 51 }; |
| 50 | 52 |
| 51 class HostComponentTransform : public AppendComponentTransform { | 53 class HostComponentTransform : public AppendComponentTransform { |
| 52 public: | 54 public: |
| 53 HostComponentTransform() {} | 55 HostComponentTransform(bool trim_trivial_subdomains) |
| 56 : trim_trivial_subdomains_(trim_trivial_subdomains) {} |
| 54 | 57 |
| 55 private: | 58 private: |
| 56 base::string16 Execute( | 59 base::string16 Execute( |
| 57 const std::string& component_text, | 60 const std::string& component_text, |
| 58 base::OffsetAdjuster::Adjustments* adjustments) const override { | 61 base::OffsetAdjuster::Adjustments* adjustments) const override { |
| 59 return IDNToUnicodeWithAdjustments(component_text, adjustments); | 62 if (!trim_trivial_subdomains_) |
| 63 return IDNToUnicodeWithAdjustments(component_text, adjustments); |
| 64 |
| 65 // Exclude the registry and domain from trivial subdomain stripping. |
| 66 // To get the adjustment offset calculations correct, we need to transform |
| 67 // the registry and domain portion of the host as well. |
| 68 std::string domain_and_registry = |
| 69 net::registry_controlled_domains::GetDomainAndRegistry( |
| 70 component_text, |
| 71 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| 72 |
| 73 base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments; |
| 74 base::StringTokenizer tokenizer( |
| 75 component_text.begin(), |
| 76 component_text.end() - domain_and_registry.length(), "."); |
| 77 tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS); |
| 78 |
| 79 std::string transformed_subdomain; |
| 80 while (tokenizer.GetNext()) { |
| 81 // Append delimiters and non-trivial subdomains to the new subdomain part. |
| 82 if (tokenizer.token_is_delim() || |
| 83 (tokenizer.token() != "m" && tokenizer.token() != "www")) { |
| 84 transformed_subdomain += tokenizer.token(); |
| 85 continue; |
| 86 } |
| 87 |
| 88 // We found a trivial subdomain, so we add an adjustment accounting for |
| 89 // the subdomain and the following consumed delimiter. |
| 90 size_t trivial_subdomain_begin = |
| 91 tokenizer.token_begin() - component_text.begin(); |
| 92 trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment( |
| 93 trivial_subdomain_begin, tokenizer.token().length() + 1, 0)); |
| 94 |
| 95 // Consume the next token, which must be a delimiter. |
| 96 bool next_delimiter_found = tokenizer.GetNext(); |
| 97 DCHECK(next_delimiter_found); |
| 98 DCHECK(tokenizer.token_is_delim()); |
| 99 } |
| 100 |
| 101 base::string16 unicode_result = IDNToUnicodeWithAdjustments( |
| 102 transformed_subdomain + domain_and_registry, adjustments); |
| 103 base::OffsetAdjuster::MergeSequentialAdjustments( |
| 104 trivial_subdomains_adjustments, adjustments); |
| 105 return unicode_result; |
| 60 } | 106 } |
| 107 |
| 108 bool trim_trivial_subdomains_; |
| 61 }; | 109 }; |
| 62 | 110 |
| 63 class NonHostComponentTransform : public AppendComponentTransform { | 111 class NonHostComponentTransform : public AppendComponentTransform { |
| 64 public: | 112 public: |
| 65 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) | 113 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) |
| 66 : unescape_rules_(unescape_rules) {} | 114 : unescape_rules_(unescape_rules) {} |
| 67 | 115 |
| 68 private: | 116 private: |
| 69 base::string16 Execute( | 117 base::string16 Execute( |
| 70 const std::string& component_text, | 118 const std::string& component_text, |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 355 | 403 |
| 356 const FormatUrlType kFormatUrlOmitNothing = 0; | 404 const FormatUrlType kFormatUrlOmitNothing = 0; |
| 357 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 405 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
| 358 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 406 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
| 359 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 407 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
| 360 const FormatUrlType kFormatUrlOmitAll = | 408 const FormatUrlType kFormatUrlOmitAll = |
| 361 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | | 409 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | |
| 362 kFormatUrlOmitTrailingSlashOnBareHostname; | 410 kFormatUrlOmitTrailingSlashOnBareHostname; |
| 363 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; | 411 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; |
| 364 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; | 412 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; |
| 413 const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5; |
| 365 | 414 |
| 366 base::string16 FormatUrl(const GURL& url, | 415 base::string16 FormatUrl(const GURL& url, |
| 367 FormatUrlTypes format_types, | 416 FormatUrlTypes format_types, |
| 368 net::UnescapeRule::Type unescape_rules, | 417 net::UnescapeRule::Type unescape_rules, |
| 369 url::Parsed* new_parsed, | 418 url::Parsed* new_parsed, |
| 370 size_t* prefix_end, | 419 size_t* prefix_end, |
| 371 size_t* offset_for_adjustment) { | 420 size_t* offset_for_adjustment) { |
| 372 std::vector<size_t> offsets; | 421 std::vector<size_t> offsets; |
| 373 if (offset_for_adjustment) | 422 if (offset_for_adjustment) |
| 374 offsets.push_back(*offset_for_adjustment); | 423 offsets.push_back(*offset_for_adjustment); |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 474 AppendFormattedComponent(spec, parsed.password, | 523 AppendFormattedComponent(spec, parsed.password, |
| 475 NonHostComponentTransform(unescape_rules), | 524 NonHostComponentTransform(unescape_rules), |
| 476 &url_string, &new_parsed->password, adjustments); | 525 &url_string, &new_parsed->password, adjustments); |
| 477 if (parsed.username.is_valid() || parsed.password.is_valid()) | 526 if (parsed.username.is_valid() || parsed.password.is_valid()) |
| 478 url_string.push_back('@'); | 527 url_string.push_back('@'); |
| 479 } | 528 } |
| 480 if (prefix_end) | 529 if (prefix_end) |
| 481 *prefix_end = static_cast<size_t>(url_string.length()); | 530 *prefix_end = static_cast<size_t>(url_string.length()); |
| 482 | 531 |
| 483 // Host. | 532 // Host. |
| 484 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(), | 533 bool trim_trivial_subdomains = |
| 534 (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0; |
| 535 AppendFormattedComponent(spec, parsed.host, |
| 536 HostComponentTransform(trim_trivial_subdomains), |
| 485 &url_string, &new_parsed->host, adjustments); | 537 &url_string, &new_parsed->host, adjustments); |
| 486 | 538 |
| 487 // Port. | 539 // Port. |
| 488 if (parsed.port.is_nonempty()) { | 540 if (parsed.port.is_nonempty()) { |
| 489 url_string.push_back(':'); | 541 url_string.push_back(':'); |
| 490 new_parsed->port.begin = url_string.length(); | 542 new_parsed->port.begin = url_string.length(); |
| 491 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, | 543 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, |
| 492 spec.begin() + parsed.port.end()); | 544 spec.begin() + parsed.port.end()); |
| 493 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | 545 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| 494 } else { | 546 } else { |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 587 bool CanStripTrailingSlash(const GURL& url) { | 639 bool CanStripTrailingSlash(const GURL& url) { |
| 588 // Omit the path only for standard, non-file URLs with nothing but "/" after | 640 // Omit the path only for standard, non-file URLs with nothing but "/" after |
| 589 // the hostname. | 641 // the hostname. |
| 590 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && | 642 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && |
| 591 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; | 643 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; |
| 592 } | 644 } |
| 593 | 645 |
| 594 void AppendFormattedHost(const GURL& url, base::string16* output) { | 646 void AppendFormattedHost(const GURL& url, base::string16* output) { |
| 595 AppendFormattedComponent( | 647 AppendFormattedComponent( |
| 596 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, | 648 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, |
| 597 HostComponentTransform(), output, NULL, NULL); | 649 HostComponentTransform(false), output, nullptr, nullptr); |
| 598 } | 650 } |
| 599 | 651 |
| 600 base::string16 IDNToUnicode(base::StringPiece host) { | 652 base::string16 IDNToUnicode(base::StringPiece host) { |
| 601 return IDNToUnicodeWithAdjustments(host, nullptr); | 653 return IDNToUnicodeWithAdjustments(host, nullptr); |
| 602 } | 654 } |
| 603 | 655 |
| 604 base::string16 StripWWW(const base::string16& text) { | 656 base::string16 StripWWW(const base::string16& text) { |
| 605 const base::string16 www(base::ASCIIToUTF16("www.")); | 657 const base::string16 www(base::ASCIIToUTF16("www.")); |
| 606 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 658 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
| 607 ? text.substr(www.length()) : text; | 659 ? text.substr(www.length()) : text; |
| 608 } | 660 } |
| 609 | 661 |
| 610 base::string16 StripWWWFromHost(const GURL& url) { | 662 base::string16 StripWWWFromHost(const GURL& url) { |
| 611 DCHECK(url.is_valid()); | 663 DCHECK(url.is_valid()); |
| 612 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 664 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
| 613 } | 665 } |
| 614 | 666 |
| 615 } // namespace url_formatter | 667 } // namespace url_formatter |
| OLD | NEW |