Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(674)

Unified Diff: components/url_formatter/url_formatter.cc

Issue 2966233002: Omnibox UI Experiments: Strip trivial subdomains (Closed)
Patch Set: make cast explicit for windows Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/url_formatter/url_formatter.cc
diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc
index 936482843a390f8bc4b17f31f7742fbb25b2f7a4..2e1e829de3b9af1b2941a36f068d0d28952c10c0 100644
--- a/components/url_formatter/url_formatter.cc
+++ b/components/url_formatter/url_formatter.cc
@@ -12,11 +12,13 @@
#include "base/macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_piece.h"
+#include "base/strings/string_tokenizer.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_offset_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/threading/thread_local_storage.h"
#include "components/url_formatter/idn_spoof_checker.h"
+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "url/gurl.h"
@@ -50,14 +52,64 @@ class AppendComponentTransform {
class HostComponentTransform : public AppendComponentTransform {
public:
- HostComponentTransform() {}
+ HostComponentTransform(bool trim_trivial_subdomains)
+ : trim_trivial_subdomains_(trim_trivial_subdomains) {}
private:
base::string16 Execute(
const std::string& component_text,
base::OffsetAdjuster::Adjustments* adjustments) const override {
- return IDNToUnicodeWithAdjustments(component_text, adjustments);
+ if (!trim_trivial_subdomains_)
+ return IDNToUnicodeWithAdjustments(component_text, adjustments);
+
+ // Exclude the registry and domain from trivial subdomain stripping.
+ // To get the adjustment offset calculations correct, we need to transform
+ // the registry and domain portion of the host as well.
+ std::string domain_and_registry =
+ net::registry_controlled_domains::GetDomainAndRegistry(
+ component_text,
+ net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
+
+ base::OffsetAdjuster::Adjustments trivial_subdomains_adjustments;
+ base::StringTokenizer t(component_text.begin(),
Peter Kasting 2017/07/06 06:01:21 Nit: t -> tokenizer (avoid abbreviation in general
tommycli 2017/07/06 16:25:46 Done.
+ component_text.end() - domain_and_registry.length(),
+ ".");
+ t.set_options(base::StringTokenizer::RETURN_DELIMS);
+
+ std::string new_subdomain_string;
Peter Kasting 2017/07/06 06:01:21 Nit: |transformed_subdomain|?
tommycli 2017/07/06 16:25:46 Done.
+ size_t offset = 0;
+ while (t.GetNext()) {
+ // Append delimiters and non-trivial subdomains to the new subdomain part.
+ if (t.token_is_delim() || (t.token() != "m" && t.token() != "www")) {
+ new_subdomain_string.append(t.token());
Peter Kasting 2017/07/06 06:01:21 Nit: append() is fine, I find += a little more idi
tommycli 2017/07/06 16:25:46 Done. Ah i did not realize that string had operato
+ offset += t.token().length();
+ continue;
+ }
+
+ // When we find a trivial subdomain, we simply do not append to
+ // |new_subdomain_string|. We also consume the next token, which must be
+ // a delimiter.
+ size_t trivial_subdomain_length = t.token().length();
+ bool next_delimiter_found = t.GetNext();
+ DCHECK(next_delimiter_found);
+ DCHECK(t.token_is_delim());
+
+ // Add an adjustment accounting for the consumed subdomain and delimiter.
+ trivial_subdomains_adjustments.push_back(base::OffsetAdjuster::Adjustment(
+ offset, trivial_subdomain_length + 1, 0));
+ offset += t.token().length() + 1;
Peter Kasting 2017/07/06 06:01:21 Wait, is this right? I think t.token() is pointin
tommycli 2017/07/06 16:25:46 Done. Good suggestion on re-using the tokenizer it
Peter Kasting 2017/07/06 16:31:47 Yeah, the new version feels a lot cleaner to me no
+ }
+
+ std::string new_component_text = new_subdomain_string + domain_and_registry;
+ base::string16 unicode_result =
+ IDNToUnicodeWithAdjustments(new_component_text, adjustments);
+
+ base::OffsetAdjuster::MergeSequentialAdjustments(
+ trivial_subdomains_adjustments, adjustments);
+ return unicode_result;
}
+
+ bool trim_trivial_subdomains_;
};
class NonHostComponentTransform : public AppendComponentTransform {
@@ -362,6 +414,7 @@ const FormatUrlType kFormatUrlOmitAll =
kFormatUrlOmitTrailingSlashOnBareHostname;
const FormatUrlType kFormatUrlExperimentalElideAfterHost = 1 << 3;
const FormatUrlType kFormatUrlExperimentalOmitHTTPS = 1 << 4;
+const FormatUrlType kFormatUrlExperimentalOmitTrivialSubdomains = 1 << 5;
base::string16 FormatUrl(const GURL& url,
FormatUrlTypes format_types,
@@ -481,7 +534,10 @@ base::string16 FormatUrlWithAdjustments(
*prefix_end = static_cast<size_t>(url_string.length());
// Host.
- AppendFormattedComponent(spec, parsed.host, HostComponentTransform(),
+ bool trim_trivial_subdomains =
+ (format_types & kFormatUrlExperimentalOmitTrivialSubdomains) != 0;
+ AppendFormattedComponent(spec, parsed.host,
+ HostComponentTransform(trim_trivial_subdomains),
&url_string, &new_parsed->host, adjustments);
// Port.
@@ -594,7 +650,8 @@ bool CanStripTrailingSlash(const GURL& url) {
void AppendFormattedHost(const GURL& url, base::string16* output) {
AppendFormattedComponent(
url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,
- HostComponentTransform(), output, NULL, NULL);
+ HostComponentTransform(false /* trim_trivial_subdomains */), output,
Peter Kasting 2017/07/06 06:01:21 (I still dislike these kinda comments but whatever
tommycli 2017/07/06 16:25:46 Done.
+ nullptr, nullptr);
}
base::string16 IDNToUnicode(base::StringPiece host) {

Powered by Google App Engine
This is Rietveld 408576698