Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains (Closed)
Patch Set: make cast explicit for windows Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <utility> 8 #include <utility>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/lazy_instance.h" 11 #include "base/lazy_instance.h"
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/numerics/safe_conversions.h" 13 #include "base/numerics/safe_conversions.h"
14 #include "base/strings/string_piece.h" 14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_tokenizer.h"
15 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
16 #include "base/strings/utf_offset_string_conversions.h" 17 #include "base/strings/utf_offset_string_conversions.h"
17 #include "base/strings/utf_string_conversions.h" 18 #include "base/strings/utf_string_conversions.h"
18 #include "base/threading/thread_local_storage.h" 19 #include "base/threading/thread_local_storage.h"
19 #include "components/url_formatter/idn_spoof_checker.h" 20 #include "components/url_formatter/idn_spoof_checker.h"
21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "third_party/icu/source/common/unicode/uidna.h" 22 #include "third_party/icu/source/common/unicode/uidna.h"
21 #include "third_party/icu/source/common/unicode/utypes.h" 23 #include "third_party/icu/source/common/unicode/utypes.h"
22 #include "url/gurl.h" 24 #include "url/gurl.h"
23 #include "url/third_party/mozilla/url_parse.h" 25 #include "url/third_party/mozilla/url_parse.h"
24 26
25 namespace url_formatter { 27 namespace url_formatter {
26 28
27 namespace { 29 namespace {
28 30
29 base::string16 IDNToUnicodeWithAdjustments( 31 base::string16 IDNToUnicodeWithAdjustments(
30 base::StringPiece host, 32 base::StringPiece host,
31 base::OffsetAdjuster::Adjustments* adjustments); 33 base::OffsetAdjuster::Adjustments* adjustments);
32 bool IDNToUnicodeOneComponent(const base::char16* comp, 34 bool IDNToUnicodeOneComponent(const base::char16* comp,
33 size_t comp_len, 35 size_t comp_len,
34 bool is_tld_ascii, 36 bool is_tld_ascii,
35 base::string16* out); 37 base::string16* out);
36 38
37 class AppendComponentTransform { 39 class AppendComponentTransform {
38 public: 40 public:
39 AppendComponentTransform() {} 41 AppendComponentTransform() {}
40 virtual ~AppendComponentTransform() {} 42 virtual ~AppendComponentTransform() {}
41 43
42 virtual base::string16 Execute( 44 virtual base::string16 Execute(
43 const std::string& component_text, 45 const std::string& component_text,
44 base::OffsetAdjuster::Adjustments* adjustments) const = 0; 46 base::OffsetAdjuster::Adjustments* adjustments) const = 0;
45 47
46 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an 48 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
47 // accessible copy constructor in order to call AppendFormattedComponent() 49 // accessible copy constructor in order to call AppendFormattedComponent()
48 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). 50 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
Peter Kasting 2017/07/06 06:01:21 FWIW, I think we could probably replace this comme
tommycli 2017/07/06 16:25:46 Acknowledged.
49 }; 51 };
50 52
51 class HostComponentTransform : public AppendComponentTransform { 53 class HostComponentTransform : public AppendComponentTransform {
52 public: 54 public:
53 HostComponentTransform() {} 55 HostComponentTransform(bool trim_trivial_subdomains)
56 : trim_trivial_subdomains_(trim_trivial_subdomains) {}
54 57
55 private: 58 private:
56 base::string16 Execute( 59 base::string16 Execute(
57 const std::string& component_text, 60 const std::string& component_text,
58 base::OffsetAdjuster::Adjustments* adjustments) const override { 61 base::OffsetAdjuster::Adjustments* adjustments) const override {
59 return IDNToUnicodeWithAdjustments(component_text, adjustments); 62 if (!trim_trivial_subdomains_)
63 return IDNToUnicodeWithAdjustments(component_text, adjustments);
64
65 // Exclude the registry and domain from trivial subdomain stripping.
66 // To get the adjustment offset calculations correct, we need to transform
67 // the registry and domain portion of the host as well.
68 std::string domain_and_registry =
69 net::registry_controlled_domains::GetDomainAndRegistry(
70 component_text,
71 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
72
73 base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments;
74 base::StringTokenizer t(component_text.begin(),
Peter Kasting 2017/07/06 06:01:21 Nit: t -> tokenizer (avoid abbreviation in general
tommycli 2017/07/06 16:25:46 Done.
75 component_text.end() - domain_and_registry.length(),
76 ".");
77 t.set_options(base::StringTokenizer::RETURN_DELIMS);
78
79 std::string new_subdomain_string;
Peter Kasting 2017/07/06 06:01:21 Nit: |transformed_subdomain|?
tommycli 2017/07/06 16:25:46 Done.
80 size_t offset = 0;
81 while (t.GetNext()) {
82 // Append delimiters and non-trivial subdomains to the new subdomain part.
83 if (t.token_is_delim() || (t.token() != "m" && t.token() != "www")) {
84 new_subdomain_string.append(t.token());
Peter Kasting 2017/07/06 06:01:21 Nit: append() is fine, I find += a little more idi
tommycli 2017/07/06 16:25:46 Done. Ah i did not realize that string had operato
85 offset += t.token().length();
86 continue;
87 }
88
89 // When we find a trivial subdomain, we simply do not append to
90 // |new_subdomain_string|. We also consume the next token, which must be
91 // a delimiter.
92 size_t trivial_subdomain_length = t.token().length();
93 bool next_delimiter_found = t.GetNext();
94 DCHECK(next_delimiter_found);
95 DCHECK(t.token_is_delim());
96
97 // Add an adjustment accounting for the consumed subdomain and delimiter.
98 trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment(
99 offset, trivial_subdomain_length + 1, 0));
100 offset += t.token().length() + 1;
Peter Kasting 2017/07/06 06:01:21 Wait, is this right? I think t.token() is pointin
tommycli 2017/07/06 16:25:46 Done. Good suggestion on re-using the tokenizer it
Peter Kasting 2017/07/06 16:31:47 Yeah, the new version feels a lot cleaner to me no
101 }
102
103 std::string new_component_text = new_subdomain_string + domain_and_registry;
104 base::string16 unicode_result =
105 IDNToUnicodeWithAdjustments(new_component_text, adjustments);
106
107 base::OffsetAdjuster::MergeSequentialAdjustments(
108 trivial_subdomains_adjustments, adjustments);
109 return unicode_result;
60 } 110 }
111
112 bool trim_trivial_subdomains_;
61 }; 113 };
62 114
63 class NonHostComponentTransform : public AppendComponentTransform { 115 class NonHostComponentTransform : public AppendComponentTransform {
64 public: 116 public:
65 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) 117 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules)
66 : unescape_rules_(unescape_rules) {} 118 : unescape_rules_(unescape_rules) {}
67 119
68 private: 120 private:
69 base::string16 Execute( 121 base::string16 Execute(
70 const std::string& component_text, 122 const std::string& component_text,
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 407
356 const FormatUrlType kFormatUrlOmitNothing = 0; 408 const FormatUrlType kFormatUrlOmitNothing = 0;
357 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 409 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
358 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 410 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
359 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 411 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
360 const FormatUrlType kFormatUrlOmitAll = 412 const FormatUrlType kFormatUrlOmitAll =
361 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | 413 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP |
362 kFormatUrlOmitTrailingSlashOnBareHostname; 414 kFormatUrlOmitTrailingSlashOnBareHostname;
363 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3; 415 const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3;
364 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4; 416 const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4;
417 const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5;
365 418
366 base::string16 FormatUrl(const GURL& url, 419 base::string16 FormatUrl(const GURL& url,
367 FormatUrlTypes format_types, 420 FormatUrlTypes format_types,
368 net::UnescapeRule::Type unescape_rules, 421 net::UnescapeRule::Type unescape_rules,
369 url::Parsed* new_parsed, 422 url::Parsed* new_parsed,
370 size_t* prefix_end, 423 size_t* prefix_end,
371 size_t* offset_for_adjustment) { 424 size_t* offset_for_adjustment) {
372 std::vector<size_t> offsets; 425 std::vector<size_t> offsets;
373 if (offset_for_adjustment) 426 if (offset_for_adjustment)
374 offsets.push_back(*offset_for_adjustment); 427 offsets.push_back(*offset_for_adjustment);
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 AppendFormattedComponent(spec, parsed.password, 527 AppendFormattedComponent(spec, parsed.password,
475 NonHostComponentTransform(unescape_rules), 528 NonHostComponentTransform(unescape_rules),
476 &url_string, &new_parsed->password, adjustments); 529 &url_string, &new_parsed->password, adjustments);
477 if (parsed.username.is_valid() || parsed.password.is_valid()) 530 if (parsed.username.is_valid() || parsed.password.is_valid())
478 url_string.push_back('@'); 531 url_string.push_back('@');
479 } 532 }
480 if (prefix_end) 533 if (prefix_end)
481 *prefix_end = static_cast<size_t>(url_string.length()); 534 *prefix_end = static_cast<size_t>(url_string.length());
482 535
483 // Host. 536 // Host.
484 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(), 537 bool trim_trivial_subdomains =
538 (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0;
539 AppendFormattedComponent(spec, parsed.host,
540 HostComponentTransform(trim_trivial_subdomains),
485 &url_string, &new_parsed->host, adjustments); 541 &url_string, &new_parsed->host, adjustments);
486 542
487 // Port. 543 // Port.
488 if (parsed.port.is_nonempty()) { 544 if (parsed.port.is_nonempty()) {
489 url_string.push_back(':'); 545 url_string.push_back(':');
490 new_parsed->port.begin = url_string.length(); 546 new_parsed->port.begin = url_string.length();
491 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, 547 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin,
492 spec.begin() + parsed.port.end()); 548 spec.begin() + parsed.port.end());
493 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 549 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
494 } else { 550 } else {
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 bool CanStripTrailingSlash(const GURL& url) { 643 bool CanStripTrailingSlash(const GURL& url) {
588 // Omit the path only for standard, non-file URLs with nothing but "/" after 644 // Omit the path only for standard, non-file URLs with nothing but "/" after
589 // the hostname. 645 // the hostname.
590 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && 646 return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() &&
591 !url.has_query() && !url.has_ref() && url.path_piece() == "/"; 647 !url.has_query() && !url.has_ref() && url.path_piece() == "/";
592 } 648 }
593 649
594 void AppendFormattedHost(const GURL& url, base::string16* output) { 650 void AppendFormattedHost(const GURL& url, base::string16* output) {
595 AppendFormattedComponent( 651 AppendFormattedComponent(
596 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, 652 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,
597 HostComponentTransform(), output, NULL, NULL); 653 HostComponentTransform(false /* trim_trivial_subdomains */), output,
Peter Kasting 2017/07/06 06:01:21 (I still dislike these kinda comments but whatever
tommycli 2017/07/06 16:25:46 Done.
654 nullptr, nullptr);
598 } 655 }
599 656
600 base::string16 IDNToUnicode(base::StringPiece host) { 657 base::string16 IDNToUnicode(base::StringPiece host) {
601 return IDNToUnicodeWithAdjustments(host, nullptr); 658 return IDNToUnicodeWithAdjustments(host, nullptr);
602 } 659 }
603 660
604 base::string16 StripWWW(const base::string16& text) { 661 base::string16 StripWWW(const base::string16& text) {
605 const base::string16 www(base::ASCIIToUTF16("www.")); 662 const base::string16 www(base::ASCIIToUTF16("www."));
606 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) 663 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)
607 ? text.substr(www.length()) : text; 664 ? text.substr(www.length()) : text;
608 } 665 }
609 666
610 base::string16 StripWWWFromHost(const GURL& url) { 667 base::string16 StripWWWFromHost(const GURL& url) {
611 DCHECK(url.is_valid()); 668 DCHECK(url.is_valid());
612 return StripWWW(base::ASCIIToUTF16(url.host_piece())); 669 return StripWWW(base::ASCIIToUTF16(url.host_piece()));
613 } 670 }
614 671
615 } // namespace url_formatter 672 } // namespace url_formatter
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698