Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(120)

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains (Closed)
Patch Set: fix Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <utility> 8 #include <utility>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/lazy_instance.h" 11 #include "base/lazy_instance.h"
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/numerics/safe_conversions.h" 13 #include "base/numerics/safe_conversions.h"
14 #include "base/strings/string_piece.h" 14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_tokenizer.h"
15 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
16 #include "base/strings/utf_offset_string_conversions.h" 17 #include "base/strings/utf_offset_string_conversions.h"
17 #include "base/strings/utf_string_conversions.h" 18 #include "base/strings/utf_string_conversions.h"
18 #include "base/threading/thread_local_storage.h" 19 #include "base/threading/thread_local_storage.h"
19 #include "components/url_formatter/idn_spoof_checker.h" 20 #include "components/url_formatter/idn_spoof_checker.h"
21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "third_party/icu/source/common/unicode/uidna.h" 22 #include "third_party/icu/source/common/unicode/uidna.h"
21 #include "third_party/icu/source/common/unicode/utypes.h" 23 #include "third_party/icu/source/common/unicode/utypes.h"
22 #include "url/gurl.h" 24 #include "url/gurl.h"
23 #include "url/third_party/mozilla/url_parse.h" 25 #include "url/third_party/mozilla/url_parse.h"
24 26
25 namespace url_formatter { 27 namespace url_formatter {
26 28
27 namespace { 29 namespace {
28 30
29 base::string16 IDNToUnicodeWithAdjustments( 31 base::string16 IDNToUnicodeWithAdjustments(
(...skipping 13 matching lines...) Expand all
43 const std::string& component_text, 45 const std::string& component_text,
44 base::OffsetAdjuster::Adjustments* adjustments) const = 0; 46 base::OffsetAdjuster::Adjustments* adjustments) const = 0;
45 47
46 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an 48 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
47 // accessible copy constructor in order to call AppendFormattedComponent() 49 // accessible copy constructor in order to call AppendFormattedComponent()
48 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). 50 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
49 }; 51 };
50 52
51 class HostComponentTransform : public AppendComponentTransform { 53 class HostComponentTransform : public AppendComponentTransform {
52 public: 54 public:
53 HostComponentTransform() {} 55 HostComponentTransform(bool trim_trivial_subdomains)
56 : trim_trivial_subdomains_(trim_trivial_subdomains) {}
54 57
55 private: 58 private:
56 base::string16 Execute( 59 base::string16 Execute(
57 const std::string& component_text, 60 const std::string& component_text,
58 base::OffsetAdjuster::Adjustments* adjustments) const override { 61 base::OffsetAdjuster::Adjustments* adjustments) const override {
59 return IDNToUnicodeWithAdjustments(component_text, adjustments); 62 if (!trim_trivial_subdomains_)
63 return IDNToUnicodeWithAdjustments(component_text, adjustments);
64
65 // Exclude the registry and domain from trivial subdomain stripping.
66 // To get the adjustment offset calculations correct, we need to transform
67 // the registry and domain portion of the host as well.
68 std::string domain_and_registry =
69 net::registry_controlled_domains::GetDomainAndRegistry(
70 component_text,
71 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
72
73 base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments;
74 base::StringTokenizer tokenizer(
75 component_text.begin(),
76 component_text.end() - domain_and_registry.length(), ".");
77 tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS);
78
79 std::string transformed_subdomain;
80 while (tokenizer.GetNext()) {
81 // Append delimiters and non-trivial subdomains to the new subdomain part.
82 if (tokenizer.token_is_delim() ||
83 (tokenizer.token() != "m" && tokenizer.token() != "www")) {
84 transformed_subdomain += tokenizer.token();
85 continue;
86 }
87
88 // We found a trivial subdomain, so we add an adjustment accounting for
89 // the subdomain and the following consumed delimiter.
90 size_t trivial_subdomain_begin =
91 tokenizer.token_begin() - component_text.begin();
92 trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment(
93 trivial_subdomain_begin, tokenizer.token().length() + 1, 0));
94
95 // Consume the next token, which must be a delimiter.
96 bool next_delimiter_found = tokenizer.GetNext();
97 DCHECK(next_delimiter_found);
98 DCHECK(tokenizer.token_is_delim());
99 }
100
101 std::string new_component_text =
102 transformed_subdomain + domain_and_registry;
Peter Kasting 2017/07/06 16:31:47 Nit: Or optionally just inline this into the state
tommycli 2017/07/06 17:24:59 Done.
103 base::string16 unicode_result =
104 IDNToUnicodeWithAdjustments(new_component_text, adjustments);
105
106 base::OffsetAdjuster::MergeSequentialAdjustments(
107 trivial_subdomains_adjustments, adjustments);
108 return unicode_result;
60 } 109 }
110
111 bool trim_trivial_subdomains_;
61 }; 112 };
62 113
63 class NonHostComponentTransform : public AppendComponentTransform { 114 class NonHostComponentTransform : public AppendComponentTransform {
64 public: 115 public:
65 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) 116 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules)
66 : unescape_rules_(unescape_rules) {} 117 : unescape_rules_(unescape_rules) {}
67 118
68 private: 119 private:
69 base::string16 Execute( 120 base::string16 Execute(
70 const std::string& component_text, 121 const std::string& component_text,
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 406
356 const FormatUrlType kFormatUrlOmitNothing = 0; 407 const FormatUrlType kFormatUrlOmitNothing = 0;
357 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 408 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
358 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 409 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
359 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 410 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
360 const FormatUrlType kFormatUrlOmitAll = 411 const FormatUrlType kFormatUrlOmitAll =
361 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | 412 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP |
362 kFormatUrlOmitTrailingSlashOnBareHostname; 413 kFormatUrlOmitTrailingSlashOnBareHostname;
363 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; 414 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3;
364 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; 415 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4;
416 const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5;
365 417
366 base::string16 FormatUrl(const GURL& url, 418 base::string16 FormatUrl(const GURL& url,
367 FormatUrlTypes format_types, 419 FormatUrlTypes format_types,
368 net::UnescapeRule::Type unescape_rules, 420 net::UnescapeRule::Type unescape_rules,
369 url::Parsed* new_parsed, 421 url::Parsed* new_parsed,
370 size_t* prefix_end, 422 size_t* prefix_end,
371 size_t* offset_for_adjustment) { 423 size_t* offset_for_adjustment) {
372 std::vector<size_t> offsets; 424 std::vector<size_t> offsets;
373 if (offset_for_adjustment) 425 if (offset_for_adjustment)
374 offsets.push_back(*offset_for_adjustment); 426 offsets.push_back(*offset_for_adjustment);
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 AppendFormattedComponent(spec, parsed.password, 526 AppendFormattedComponent(spec, parsed.password,
475 NonHostComponentTransform(unescape_rules), 527 NonHostComponentTransform(unescape_rules),
476 &url_string, &new_parsed->password, adjustments); 528 &url_string, &new_parsed->password, adjustments);
477 if (parsed.username.is_valid() || parsed.password.is_valid()) 529 if (parsed.username.is_valid() || parsed.password.is_valid())
478 url_string.push_back('@'); 530 url_string.push_back('@');
479 } 531 }
480 if (prefix_end) 532 if (prefix_end)
481 *prefix_end = static_cast<size_t>(url_string.length()); 533 *prefix_end = static_cast<size_t>(url_string.length());
482 534
483 // Host. 535 // Host.
484 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(), 536 bool trim_trivial_subdomains =
537 (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0;
538 AppendFormattedComponent(spec, parsed.host,
539 HostComponentTransform(trim_trivial_subdomains),
485 &url_string, &new_parsed->host, adjustments); 540 &url_string, &new_parsed->host, adjustments);
486 541
487 // Port. 542 // Port.
488 if (parsed.port.is_nonempty()) { 543 if (parsed.port.is_nonempty()) {
489 url_string.push_back(':'); 544 url_string.push_back(':');
490 new_parsed->port.begin = url_string.length(); 545 new_parsed->port.begin = url_string.length();
491 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, 546 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin,
492 spec.begin() + parsed.port.end()); 547 spec.begin() + parsed.port.end());
493 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 548 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
494 } else { 549 } else {
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 bool CanStripTrailingSlash(const GURL& url) { 642 bool CanStripTrailingSlash(const GURL& url) {
588 // Omit the path only for standard, non-file URLs with nothing but "/" after 643 // Omit the path only for standard, non-file URLs with nothing but "/" after
589 // the hostname. 644 // the hostname.
590 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && 645 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() &&
591 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; 646 !url.has_query() && !url.has_ref() && url.path_piece() == "/";
592 } 647 }
593 648
594 void AppendFormattedHost(const GURL& url, base::string16* output) { 649 void AppendFormattedHost(const GURL& url, base::string16* output) {
595 AppendFormattedComponent( 650 AppendFormattedComponent(
596 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, 651 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,
597 HostComponentTransform(), output, NULL, NULL); 652 HostComponentTransform(false), output, nullptr, nullptr);
598 } 653 }
599 654
600 base::string16 IDNToUnicode(base::StringPiece host) { 655 base::string16 IDNToUnicode(base::StringPiece host) {
601 return IDNToUnicodeWithAdjustments(host, nullptr); 656 return IDNToUnicodeWithAdjustments(host, nullptr);
602 } 657 }
603 658
604 base::string16 StripWWW(const base::string16& text) { 659 base::string16 StripWWW(const base::string16& text) {
605 const base::string16 www(base::ASCIIToUTF16("www.")); 660 const base::string16 www(base::ASCIIToUTF16("www."));
606 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) 661 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)
607 ? text.substr(www.length()) : text; 662 ? text.substr(www.length()) : text;
608 } 663 }
609 664
610 base::string16 StripWWWFromHost(const GURL& url) { 665 base::string16 StripWWWFromHost(const GURL& url) {
611 DCHECK(url.is_valid()); 666 DCHECK(url.is_valid());
612 return StripWWW(base::ASCIIToUTF16(url.host_piece())); 667 return StripWWW(base::ASCIIToUTF16(url.host_piece()));
613 } 668 }
614 669
615 } // namespace url_formatter 670 } // namespace url_formatter
OLDNEW
« no previous file with comments | « components/url_formatter/url_formatter.h ('k') | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698