Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(693)

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains (Closed)
Patch Set: address one more comment Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <utility> 8 #include <utility>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/lazy_instance.h" 11 #include "base/lazy_instance.h"
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/numerics/safe_conversions.h" 13 #include "base/numerics/safe_conversions.h"
14 #include "base/strings/string_piece.h" 14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_tokenizer.h"
15 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
16 #include "base/strings/utf_offset_string_conversions.h" 17 #include "base/strings/utf_offset_string_conversions.h"
17 #include "base/strings/utf_string_conversions.h" 18 #include "base/strings/utf_string_conversions.h"
18 #include "base/threading/thread_local_storage.h" 19 #include "base/threading/thread_local_storage.h"
19 #include "components/url_formatter/idn_spoof_checker.h" 20 #include "components/url_formatter/idn_spoof_checker.h"
21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "third_party/icu/source/common/unicode/uidna.h" 22 #include "third_party/icu/source/common/unicode/uidna.h"
21 #include "third_party/icu/source/common/unicode/utypes.h" 23 #include "third_party/icu/source/common/unicode/utypes.h"
22 #include "url/gurl.h" 24 #include "url/gurl.h"
23 #include "url/third_party/mozilla/url_parse.h" 25 #include "url/third_party/mozilla/url_parse.h"
24 26
25 namespace url_formatter { 27 namespace url_formatter {
26 28
27 namespace { 29 namespace {
28 30
29 base::string16 IDNToUnicodeWithAdjustments( 31 base::string16 IDNToUnicodeWithAdjustments(
(...skipping 13 matching lines...) Expand all
43 const std::string& component_text, 45 const std::string& component_text,
44 base::OffsetAdjuster::Adjustments* adjustments) const = 0; 46 base::OffsetAdjuster::Adjustments* adjustments) const = 0;
45 47
46 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an 48 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
47 // accessible copy constructor in order to call AppendFormattedComponent() 49 // accessible copy constructor in order to call AppendFormattedComponent()
48 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). 50 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
49 }; 51 };
50 52
51 class HostComponentTransform : public AppendComponentTransform { 53 class HostComponentTransform : public AppendComponentTransform {
52 public: 54 public:
53 HostComponentTransform() {} 55 HostComponentTransform(bool trim_trivial_subdomains)
56 : trim_trivial_subdomains_(trim_trivial_subdomains) {}
54 57
55 private: 58 private:
56 base::string16 Execute( 59 base::string16 Execute(
57 const std::string& component_text, 60 const std::string& component_text,
58 base::OffsetAdjuster::Adjustments* adjustments) const override { 61 base::OffsetAdjuster::Adjustments* adjustments) const override {
59 return IDNToUnicodeWithAdjustments(component_text, adjustments); 62 if (!trim_trivial_subdomains_)
63 return IDNToUnicodeWithAdjustments(component_text, adjustments);
64
65 // Exclude the registry and domain from trivial subdomain stripping.
66 // To get the adjustment offset calculations correct, we need to transform
67 // the registry and domain portion of the host as well.
68 std::string domain_and_registry =
69 net::registry_controlled_domains::GetDomainAndRegistry(
70 component_text,
71 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
72
73 base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments;
74 base::StringTokenizer tokenizer(
75 component_text.begin(),
76 component_text.end() - domain_and_registry.length(), ".");
77 tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS);
78
79 std::string transformed_subdomain;
80 while (tokenizer.GetNext()) {
81 // Append delimiters and non-trivial subdomains to the new subdomain part.
82 if (tokenizer.token_is_delim() ||
83 (tokenizer.token() != "m" && tokenizer.token() != "www")) {
84 transformed_subdomain += tokenizer.token();
85 continue;
86 }
87
88 // We found a trivial subdomain, so we add an adjustment accounting for
89 // the subdomain and the following consumed delimiter.
90 size_t trivial_subdomain_begin =
91 tokenizer.token_begin() - component_text.begin();
92 trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment(
93 trivial_subdomain_begin, tokenizer.token().length() + 1, 0));
94
95 // Consume the next token, which must be a delimiter.
96 bool next_delimiter_found = tokenizer.GetNext();
97 DCHECK(next_delimiter_found);
98 DCHECK(tokenizer.token_is_delim());
99 }
100
101 base::string16 unicode_result = IDNToUnicodeWithAdjustments(
102 transformed_subdomain + domain_and_registry, adjustments);
103 base::OffsetAdjuster::MergeSequentialAdjustments(
104 trivial_subdomains_adjustments, adjustments);
105 return unicode_result;
60 } 106 }
107
108 bool trim_trivial_subdomains_;
61 }; 109 };
62 110
63 class NonHostComponentTransform : public AppendComponentTransform { 111 class NonHostComponentTransform : public AppendComponentTransform {
64 public: 112 public:
65 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) 113 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules)
66 : unescape_rules_(unescape_rules) {} 114 : unescape_rules_(unescape_rules) {}
67 115
68 private: 116 private:
69 base::string16 Execute( 117 base::string16 Execute(
70 const std::string& component_text, 118 const std::string& component_text,
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 403
356 const FormatUrlType kFormatUrlOmitNothing = 0; 404 const FormatUrlType kFormatUrlOmitNothing = 0;
357 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 405 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
358 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 406 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
359 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 407 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
360 const FormatUrlType kFormatUrlOmitAll = 408 const FormatUrlType kFormatUrlOmitAll =
361 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | 409 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP |
362 kFormatUrlOmitTrailingSlashOnBareHostname; 410 kFormatUrlOmitTrailingSlashOnBareHostname;
363 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; 411 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3;
364 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; 412 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4;
413 const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5;
365 414
366 base::string16 FormatUrl(const GURL& url, 415 base::string16 FormatUrl(const GURL& url,
367 FormatUrlTypes format_types, 416 FormatUrlTypes format_types,
368 net::UnescapeRule::Type unescape_rules, 417 net::UnescapeRule::Type unescape_rules,
369 url::Parsed* new_parsed, 418 url::Parsed* new_parsed,
370 size_t* prefix_end, 419 size_t* prefix_end,
371 size_t* offset_for_adjustment) { 420 size_t* offset_for_adjustment) {
372 std::vector<size_t> offsets; 421 std::vector<size_t> offsets;
373 if (offset_for_adjustment) 422 if (offset_for_adjustment)
374 offsets.push_back(*offset_for_adjustment); 423 offsets.push_back(*offset_for_adjustment);
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 AppendFormattedComponent(spec, parsed.password, 523 AppendFormattedComponent(spec, parsed.password,
475 NonHostComponentTransform(unescape_rules), 524 NonHostComponentTransform(unescape_rules),
476 &url_string, &new_parsed->password, adjustments); 525 &url_string, &new_parsed->password, adjustments);
477 if (parsed.username.is_valid() || parsed.password.is_valid()) 526 if (parsed.username.is_valid() || parsed.password.is_valid())
478 url_string.push_back('@'); 527 url_string.push_back('@');
479 } 528 }
480 if (prefix_end) 529 if (prefix_end)
481 *prefix_end = static_cast<size_t>(url_string.length()); 530 *prefix_end = static_cast<size_t>(url_string.length());
482 531
483 // Host. 532 // Host.
484 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(), 533 bool trim_trivial_subdomains =
534 (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0;
535 AppendFormattedComponent(spec, parsed.host,
536 HostComponentTransform(trim_trivial_subdomains),
485 &url_string, &new_parsed->host, adjustments); 537 &url_string, &new_parsed->host, adjustments);
486 538
487 // Port. 539 // Port.
488 if (parsed.port.is_nonempty()) { 540 if (parsed.port.is_nonempty()) {
489 url_string.push_back(':'); 541 url_string.push_back(':');
490 new_parsed->port.begin = url_string.length(); 542 new_parsed->port.begin = url_string.length();
491 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, 543 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin,
492 spec.begin() + parsed.port.end()); 544 spec.begin() + parsed.port.end());
493 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 545 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
494 } else { 546 } else {
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 bool CanStripTrailingSlash(const GURL& url) { 639 bool CanStripTrailingSlash(const GURL& url) {
588 // Omit the path only for standard, non-file URLs with nothing but "/" after 640 // Omit the path only for standard, non-file URLs with nothing but "/" after
589 // the hostname. 641 // the hostname.
590 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && 642 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() &&
591 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; 643 !url.has_query() && !url.has_ref() && url.path_piece() == "/";
592 } 644 }
593 645
594 void AppendFormattedHost(const GURL& url, base::string16* output) { 646 void AppendFormattedHost(const GURL& url, base::string16* output) {
595 AppendFormattedComponent( 647 AppendFormattedComponent(
596 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, 648 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,
597 HostComponentTransform(), output, NULL, NULL); 649 HostComponentTransform(false), output, nullptr, nullptr);
598 } 650 }
599 651
600 base::string16 IDNToUnicode(base::StringPiece host) { 652 base::string16 IDNToUnicode(base::StringPiece host) {
601 return IDNToUnicodeWithAdjustments(host, nullptr); 653 return IDNToUnicodeWithAdjustments(host, nullptr);
602 } 654 }
603 655
604 base::string16 StripWWW(const base::string16& text) { 656 base::string16 StripWWW(const base::string16& text) {
605 const base::string16 www(base::ASCIIToUTF16("www.")); 657 const base::string16 www(base::ASCIIToUTF16("www."));
606 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) 658 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)
607 ? text.substr(www.length()) : text; 659 ? text.substr(www.length()) : text;
608 } 660 }
609 661
610 base::string16 StripWWWFromHost(const GURL& url) { 662 base::string16 StripWWWFromHost(const GURL& url) {
611 DCHECK(url.is_valid()); 663 DCHECK(url.is_valid());
612 return StripWWW(base::ASCIIToUTF16(url.host_piece())); 664 return StripWWW(base::ASCIIToUTF16(url.host_piece()));
613 } 665 }
614 666
615 } // namespace url_formatter 667 } // namespace url_formatter
OLDNEW
« no previous file with comments | « components/url_formatter/url_formatter.h ('k') | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698