components/url_formatter/url_formatter.cc - Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains

Unified Diff: components/url_formatter/url_formatter.cc

Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains (Closed)

Patch Set: make cast explicit for windows Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« components/omnibox/browser/autocomplete_match_unittest.cc ('K') | « components/url_formatter/url_formatter.h ('k') | components/url_formatter/url_formatter_unittest.cc » ('j') | components/url_formatter/url_formatter_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/url_formatter/url_formatter.cc

diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc

index 936482843a390f8bc4b17f31f7742fbb25b2f7a4..2e1e829de3b9af1b2941a36f068d0d28952c10c0 100644

--- a/components/url_formatter/url_formatter.cc

+++ b/components/url_formatter/url_formatter.cc

@@ -12,11 +12,13 @@

#include "base/macros.h"

#include "base/numerics/safe_conversions.h"

#include "base/strings/string_piece.h"

+#include "base/strings/string_tokenizer.h"

#include "base/strings/string_util.h"

#include "base/strings/utf_offset_string_conversions.h"

#include "base/strings/utf_string_conversions.h"

#include "base/threading/thread_local_storage.h"

#include "components/url_formatter/idn_spoof_checker.h"

+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

#include "third_party/icu/source/common/unicode/uidna.h"

#include "third_party/icu/source/common/unicode/utypes.h"

#include "url/gurl.h"

@@ -50,14 +52,64 @@ class AppendComponentTransform {

class HostComponentTransform : public AppendComponentTransform {

public:

- HostComponentTransform() {}

+ HostComponentTransform(bool trim_trivial_subdomains)

+ : trim_trivial_subdomains_(trim_trivial_subdomains) {}

private:

base::string16 Execute(

const std::string& component_text,

base::OffsetAdjuster::Adjustments* adjustments) const override {

- return IDNToUnicodeWithAdjustments(component_text, adjustments);

+ if (!trim_trivial_subdomains_)

+ return IDNToUnicodeWithAdjustments(component_text, adjustments);

+ // Exclude the registry and domain from trivial subdomain stripping.

+ // To get the adjustment offset calculations correct, we need to transform

+ // the registry and domain portion of the host as well.

+ std::string domain_and_registry =

+ net::registry_controlled_domains::GetDomainAndRegistry(

+ component_text,

+ net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

+ base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments;

+ base::StringTokenizer t(component_text.begin(),

Peter Kasting 2017/07/06 06:01:21 Nit: t -> tokenizer (avoid abbreviation in general

tommycli 2017/07/06 16:25:46 Done.

+ component_text.end() - domain_and_registry.length(),

+ ".");

+ t.set_options(base::StringTokenizer::RETURN_DELIMS);

+ std::string new_subdomain_string;

Peter Kasting 2017/07/06 06:01:21 Nit: |transformed_subdomain|?

tommycli 2017/07/06 16:25:46 Done.

+ size_t offset = 0;

+ while (t.GetNext()) {

+ // Append delimiters and non-trivial subdomains to the new subdomain part.

+ if (t.token_is_delim() || (t.token() != "m" && t.token() != "www")) {

+ new_subdomain_string.append(t.token());

Peter Kasting 2017/07/06 06:01:21 Nit: append() is fine, I find += a little more idi

tommycli 2017/07/06 16:25:46 Done. Ah i did not realize that string had operato

+ offset += t.token().length();

+ continue;

+ }

+ // When we find a trivial subdomain, we simply do not append to

+ // |new_subdomain_string|. We also consume the next token, which must be

+ // a delimiter.

+ size_t trivial_subdomain_length = t.token().length();

+ bool next_delimiter_found = t.GetNext();

+ DCHECK(next_delimiter_found);

+ DCHECK(t.token_is_delim());

+ // Add an adjustment accounting for the consumed subdomain and delimiter.

+ trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment(

+ offset, trivial_subdomain_length + 1, 0));

+ offset += t.token().length() + 1;

Peter Kasting 2017/07/06 06:01:21 Wait, is this right? I think t.token() is pointin

tommycli 2017/07/06 16:25:46 Done. Good suggestion on re-using the tokenizer it

Peter Kasting 2017/07/06 16:31:47 Yeah, the new version feels a lot cleaner to me no

+ }

+ std::string new_component_text = new_subdomain_string + domain_and_registry;

+ base::string16 unicode_result =

+ IDNToUnicodeWithAdjustments(new_component_text, adjustments);

+ base::OffsetAdjuster::MergeSequentialAdjustments(

+ trivial_subdomains_adjustments, adjustments);

+ return unicode_result;

}

+ bool trim_trivial_subdomains_;

};

class NonHostComponentTransform : public AppendComponentTransform {

@@ -362,6 +414,7 @@ const FormatUrlType kFormatUrlOmitAll =

kFormatUrlOmitTrailingSlashOnBareHostname;

const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3;

const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4;

+const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5;

base::string16 FormatUrl(const GURL& url,

FormatUrlTypes format_types,

@@ -481,7 +534,10 @@ base::string16 FormatUrlWithAdjustments(

*prefix_end = static_cast<size_t>(url_string.length());

// Host.

- AppendFormattedComponent(spec, parsed.host, HostComponentTransform(),

+ bool trim_trivial_subdomains =

+ (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0;

+ AppendFormattedComponent(spec, parsed.host,

+ HostComponentTransform(trim_trivial_subdomains),

&url_string, &new_parsed->host, adjustments);

// Port.

@@ -594,7 +650,8 @@ bool CanStripTrailingSlash(const GURL& url) {

void AppendFormattedHost(const GURL& url, base::string16* output) {

AppendFormattedComponent(

url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,

- HostComponentTransform(), output, NULL, NULL);

+ HostComponentTransform(false /* trim_trivial_subdomains */), output,

Peter Kasting 2017/07/06 06:01:21 (I still dislike these kinda comments but whatever

tommycli 2017/07/06 16:25:46 Done.

+ nullptr, nullptr);

}

base::string16 IDNToUnicode(base::StringPiece host) {