Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| 11 #include "base/lazy_instance.h" | 11 #include "base/lazy_instance.h" |
| 12 #include "base/macros.h" | 12 #include "base/macros.h" |
| 13 #include "base/numerics/safe_conversions.h" | 13 #include "base/numerics/safe_conversions.h" |
| 14 #include "base/strings/string_piece.h" | 14 #include "base/strings/string_piece.h" |
| 15 #include "base/strings/string_tokenizer.h" | |
| 15 #include "base/strings/string_util.h" | 16 #include "base/strings/string_util.h" |
| 16 #include "base/strings/utf_offset_string_conversions.h" | 17 #include "base/strings/utf_offset_string_conversions.h" |
| 17 #include "base/strings/utf_string_conversions.h" | 18 #include "base/strings/utf_string_conversions.h" |
| 18 #include "base/threading/thread_local_storage.h" | 19 #include "base/threading/thread_local_storage.h" |
| 19 #include "components/url_formatter/idn_spoof_checker.h" | 20 #include "components/url_formatter/idn_spoof_checker.h" |
| 21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
| 20 #include "third_party/icu/source/common/unicode/uidna.h" | 22 #include "third_party/icu/source/common/unicode/uidna.h" |
| 21 #include "third_party/icu/source/common/unicode/utypes.h" | 23 #include "third_party/icu/source/common/unicode/utypes.h" |
| 22 #include "url/gurl.h" | 24 #include "url/gurl.h" |
| 23 #include "url/third_party/mozilla/url_parse.h" | 25 #include "url/third_party/mozilla/url_parse.h" |
| 24 | 26 |
| 25 namespace url_formatter { | 27 namespace url_formatter { |
| 26 | 28 |
| 27 namespace { | 29 namespace { |
| 28 | 30 |
| 29 base::string16 IDNToUnicodeWithAdjustments( | 31 base::string16 IDNToUnicodeWithAdjustments( |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 43 const std::string& component_text, | 45 const std::string& component_text, |
| 44 base::OffsetAdjuster::Adjustments* adjustments) const = 0; | 46 base::OffsetAdjuster::Adjustments* adjustments) const = 0; |
| 45 | 47 |
| 46 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an | 48 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
| 47 // accessible copy constructor in order to call AppendFormattedComponent() | 49 // accessible copy constructor in order to call AppendFormattedComponent() |
| 48 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). | 50 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). |
| 49 }; | 51 }; |
| 50 | 52 |
| 51 class HostComponentTransform : public AppendComponentTransform { | 53 class HostComponentTransform : public AppendComponentTransform { |
| 52 public: | 54 public: |
| 53 HostComponentTransform() {} | 55 HostComponentTransform(bool trim_trivial_subdomains) |
| 56 : trim_trivial_subdomains_(trim_trivial_subdomains) {} | |
| 54 | 57 |
| 55 private: | 58 private: |
| 56 base::string16 Execute( | 59 base::string16 Execute( |
| 57 const std::string& component_text, | 60 const std::string& component_text, |
| 58 base::OffsetAdjuster::Adjustments* adjustments) const override { | 61 base::OffsetAdjuster::Adjustments* adjustments) const override { |
| 59 return IDNToUnicodeWithAdjustments(component_text, adjustments); | 62 if (!trim_trivial_subdomains_) |
| 63 return IDNToUnicodeWithAdjustments(component_text, adjustments); | |
| 64 | |
| 65 // Exclude the registry and domain from trivial subdomain stripping. | |
| 66 // To get the adjustment offset calculations correct, we need to transform | |
| 67 // the registry and domain portion of the host as well. | |
| 68 std::string domain_and_registry = | |
| 69 net::registry_controlled_domains::GetDomainAndRegistry( | |
| 70 component_text, | |
| 71 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); | |
| 72 | |
| 73 base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments; | |
| 74 base::StringTokenizer tokenizer( | |
| 75 component_text.begin(), | |
| 76 component_text.end() - domain_and_registry.length(), "."); | |
| 77 tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS); | |
| 78 | |
| 79 std::string transformed_subdomain; | |
| 80 while (tokenizer.GetNext()) { | |
| 81 // Append delimiters and non-trivial subdomains to the new subdomain part. | |
| 82 if (tokenizer.token_is_delim() || | |
| 83 (tokenizer.token() != "m" && tokenizer.token() != "www")) { | |
| 84 transformed_subdomain += tokenizer.token(); | |
| 85 continue; | |
| 86 } | |
| 87 | |
| 88 // We found a trivial subdomain, so we add an adjustment accounting for | |
| 89 // the subdomain and the following consumed delimiter. | |
| 90 size_t trivial_subdomain_begin = | |
| 91 tokenizer.token_begin() - component_text.begin(); | |
| 92 trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment( | |
| 93 trivial_subdomain_begin, tokenizer.token().length() + 1, 0)); | |
| 94 | |
| 95 // Consume the next token, which must be a delimiter. | |
| 96 bool next_delimiter_found = tokenizer.GetNext(); | |
| 97 DCHECK(next_delimiter_found); | |
| 98 DCHECK(tokenizer.token_is_delim()); | |
| 99 } | |
| 100 | |
| 101 std::string new_component_text = | |
| 102 transformed_subdomain + domain_and_registry; | |
|
Peter Kasting
2017/07/06 16:31:47
Nit: Or optionally just inline this into the state
tommycli
2017/07/06 17:24:59
Done.
| |
| 103 base::string16 unicode_result = | |
| 104 IDNToUnicodeWithAdjustments(new_component_text, adjustments); | |
| 105 | |
| 106 base::OffsetAdjuster::MergeSequentialAdjustments( | |
| 107 trivial_subdomains_adjustments, adjustments); | |
| 108 return unicode_result; | |
| 60 } | 109 } |
| 110 | |
| 111 bool trim_trivial_subdomains_; | |
| 61 }; | 112 }; |
| 62 | 113 |
| 63 class NonHostComponentTransform : public AppendComponentTransform { | 114 class NonHostComponentTransform : public AppendComponentTransform { |
| 64 public: | 115 public: |
| 65 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) | 116 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) |
| 66 : unescape_rules_(unescape_rules) {} | 117 : unescape_rules_(unescape_rules) {} |
| 67 | 118 |
| 68 private: | 119 private: |
| 69 base::string16 Execute( | 120 base::string16 Execute( |
| 70 const std::string& component_text, | 121 const std::string& component_text, |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 355 | 406 |
| 356 const FormatUrlType kFormatUrlOmitNothing = 0; | 407 const FormatUrlType kFormatUrlOmitNothing = 0; |
| 357 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 408 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
| 358 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 409 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
| 359 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 410 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
| 360 const FormatUrlType kFormatUrlOmitAll = | 411 const FormatUrlType kFormatUrlOmitAll = |
| 361 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | | 412 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | |
| 362 kFormatUrlOmitTrailingSlashOnBareHostname; | 413 kFormatUrlOmitTrailingSlashOnBareHostname; |
| 363 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; | 414 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; |
| 364 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; | 415 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; |
| 416 const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5; | |
| 365 | 417 |
| 366 base::string16 FormatUrl(const GURL& url, | 418 base::string16 FormatUrl(const GURL& url, |
| 367 FormatUrlTypes format_types, | 419 FormatUrlTypes format_types, |
| 368 net::UnescapeRule::Type unescape_rules, | 420 net::UnescapeRule::Type unescape_rules, |
| 369 url::Parsed* new_parsed, | 421 url::Parsed* new_parsed, |
| 370 size_t* prefix_end, | 422 size_t* prefix_end, |
| 371 size_t* offset_for_adjustment) { | 423 size_t* offset_for_adjustment) { |
| 372 std::vector<size_t> offsets; | 424 std::vector<size_t> offsets; |
| 373 if (offset_for_adjustment) | 425 if (offset_for_adjustment) |
| 374 offsets.push_back(*offset_for_adjustment); | 426 offsets.push_back(*offset_for_adjustment); |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 474 AppendFormattedComponent(spec, parsed.password, | 526 AppendFormattedComponent(spec, parsed.password, |
| 475 NonHostComponentTransform(unescape_rules), | 527 NonHostComponentTransform(unescape_rules), |
| 476 &url_string, &new_parsed->password, adjustments); | 528 &url_string, &new_parsed->password, adjustments); |
| 477 if (parsed.username.is_valid() || parsed.password.is_valid()) | 529 if (parsed.username.is_valid() || parsed.password.is_valid()) |
| 478 url_string.push_back('@'); | 530 url_string.push_back('@'); |
| 479 } | 531 } |
| 480 if (prefix_end) | 532 if (prefix_end) |
| 481 *prefix_end = static_cast<size_t>(url_string.length()); | 533 *prefix_end = static_cast<size_t>(url_string.length()); |
| 482 | 534 |
| 483 // Host. | 535 // Host. |
| 484 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(), | 536 bool trim_trivial_subdomains = |
| 537 (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0; | |
| 538 AppendFormattedComponent(spec, parsed.host, | |
| 539 HostComponentTransform(trim_trivial_subdomains), | |
| 485 &url_string, &new_parsed->host, adjustments); | 540 &url_string, &new_parsed->host, adjustments); |
| 486 | 541 |
| 487 // Port. | 542 // Port. |
| 488 if (parsed.port.is_nonempty()) { | 543 if (parsed.port.is_nonempty()) { |
| 489 url_string.push_back(':'); | 544 url_string.push_back(':'); |
| 490 new_parsed->port.begin = url_string.length(); | 545 new_parsed->port.begin = url_string.length(); |
| 491 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, | 546 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, |
| 492 spec.begin() + parsed.port.end()); | 547 spec.begin() + parsed.port.end()); |
| 493 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | 548 new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| 494 } else { | 549 } else { |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 587 bool CanStripTrailingSlash(const GURL& url) { | 642 bool CanStripTrailingSlash(const GURL& url) { |
| 588 // Omit the path only for standard, non-file URLs with nothing but "/" after | 643 // Omit the path only for standard, non-file URLs with nothing but "/" after |
| 589 // the hostname. | 644 // the hostname. |
| 590 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && | 645 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && |
| 591 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; | 646 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; |
| 592 } | 647 } |
| 593 | 648 |
| 594 void AppendFormattedHost(const GURL& url, base::string16* output) { | 649 void AppendFormattedHost(const GURL& url, base::string16* output) { |
| 595 AppendFormattedComponent( | 650 AppendFormattedComponent( |
| 596 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, | 651 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, |
| 597 HostComponentTransform(), output, NULL, NULL); | 652 HostComponentTransform(false), output, nullptr, nullptr); |
| 598 } | 653 } |
| 599 | 654 |
| 600 base::string16 IDNToUnicode(base::StringPiece host) { | 655 base::string16 IDNToUnicode(base::StringPiece host) { |
| 601 return IDNToUnicodeWithAdjustments(host, nullptr); | 656 return IDNToUnicodeWithAdjustments(host, nullptr); |
| 602 } | 657 } |
| 603 | 658 |
| 604 base::string16 StripWWW(const base::string16& text) { | 659 base::string16 StripWWW(const base::string16& text) { |
| 605 const base::string16 www(base::ASCIIToUTF16("www.")); | 660 const base::string16 www(base::ASCIIToUTF16("www.")); |
| 606 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 661 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
| 607 ? text.substr(www.length()) : text; | 662 ? text.substr(www.length()) : text; |
| 608 } | 663 } |
| 609 | 664 |
| 610 base::string16 StripWWWFromHost(const GURL& url) { | 665 base::string16 StripWWWFromHost(const GURL& url) { |
| 611 DCHECK(url.is_valid()); | 666 DCHECK(url.is_valid()); |
| 612 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 667 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
| 613 } | 668 } |
| 614 | 669 |
| 615 } // namespace url_formatter | 670 } // namespace url_formatter |
| OLD | NEW |