Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(157)

Side by Side Diff: components/google/core/browser/google_util.cc

Issue 2861183002: Google search subdomains included for Safesearch (Closed)
Patch Set: Fixed review comments Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/google/core/browser/google_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/google/core/browser/google_util.h" 5 #include "components/google/core/browser/google_util.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
(...skipping 27 matching lines...) Expand all
38 // Helpers -------------------------------------------------------------------- 38 // Helpers --------------------------------------------------------------------
39 39
40 namespace { 40 namespace {
41 41
42 bool gUseMockLinkDoctorBaseURLForTesting = false; 42 bool gUseMockLinkDoctorBaseURLForTesting = false;
43 43
44 bool IsPathHomePageBase(base::StringPiece path) { 44 bool IsPathHomePageBase(base::StringPiece path) {
45 return (path == "/") || (path == "/webhp"); 45 return (path == "/") || (path == "/webhp");
46 } 46 }
47 47
48 // Removes a single trailing dot if present in |host|.
49 void StripTrailingDot(base::StringPiece* host) {
50 if (host->ends_with("."))
51 host->remove_suffix(1);
52 }
53
48 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>" 54 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>"
49 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check 55 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check
50 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD 56 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD
51 // string in |tld|, if specified and the |host| can be parsed. 57 // string in |tld|, if specified and the |host| can be parsed.
52 bool IsValidHostName(base::StringPiece host, 58 bool IsValidHostName(base::StringPiece host,
53 base::StringPiece domain_in_lower_case, 59 base::StringPiece domain_in_lower_case,
54 SubdomainPermission subdomain_permission, 60 SubdomainPermission subdomain_permission,
55 base::StringPiece* tld) { 61 base::StringPiece* tld) {
56 // Fast path to avoid searching the registry set. 62 // Fast path to avoid searching the registry set.
57 if (host.find(domain_in_lower_case) == base::StringPiece::npos) 63 if (host.find(domain_in_lower_case) == base::StringPiece::npos)
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, 103 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host,
98 SubdomainPermission subdomain_permission) { 104 SubdomainPermission subdomain_permission) {
99 const GURL& base_url(CommandLineGoogleBaseURL()); 105 const GURL& base_url(CommandLineGoogleBaseURL());
100 if (base_url.is_valid() && (canonical_host == base_url.host_piece())) 106 if (base_url.is_valid() && (canonical_host == base_url.host_piece()))
101 return true; 107 return true;
102 108
103 base::StringPiece tld; 109 base::StringPiece tld;
104 if (!IsValidHostName(canonical_host, "google", subdomain_permission, &tld)) 110 if (!IsValidHostName(canonical_host, "google", subdomain_permission, &tld))
105 return false; 111 return false;
106 112
113 // Remove the trailing dot from tld if present, as for google domain it's the
114 // same page.
115 StripTrailingDot(&tld);
116
107 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_tlds, 117 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_tlds,
108 ({GOOGLE_TLD_LIST})); 118 ({GOOGLE_TLD_LIST}));
109 return base::ContainsKey(google_tlds, tld.as_string()); 119 return base::ContainsKey(google_tlds, tld.as_string());
110 } 120 }
111 121
122 // True if |url| is a valid URL with a host that is in the static list of
123 // Google subdomains for google search, and an HTTP or HTTPS scheme. Requires
124 // |url| to use the standard port for its scheme (80 for HTTP, 443 for HTTPS).
125 bool IsGoogleSearchSubdomainUrl(const GURL& url) {
126 if (!IsValidURL(url, PortPermission::DISALLOW_NON_STANDARD_PORTS))
127 return false;
128
129 base::StringPiece host(url.host_piece());
130 StripTrailingDot(&host);
131
132 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_subdomains,
133 ({"ipv4.google.com", "ipv6.google.com"}));
134
135 return base::ContainsKey(google_subdomains, host.as_string());
136 }
137
112 } // namespace 138 } // namespace
113 139
114 // Global functions ----------------------------------------------------------- 140 // Global functions -----------------------------------------------------------
115 141
116 bool HasGoogleSearchQueryParam(base::StringPiece str) { 142 bool HasGoogleSearchQueryParam(base::StringPiece str) {
117 url::Component query(0, static_cast<int>(str.length())), key, value; 143 url::Component query(0, static_cast<int>(str.length())), key, value;
118 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { 144 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) {
119 if (value.is_nonempty()) { 145 if (value.is_nonempty()) {
120 base::StringPiece key_str = str.substr(key.begin, key.len); 146 base::StringPiece key_str = str.substr(key.begin, key.len);
121 if (key_str == "q" || key_str == "as_q") 147 if (key_str == "q" || key_str == "as_q")
(...skipping 19 matching lines...) Expand all
141 } 167 }
142 168
143 GURL AppendGoogleLocaleParam(const GURL& url, 169 GURL AppendGoogleLocaleParam(const GURL& url,
144 const std::string& application_locale) { 170 const std::string& application_locale) {
145 return net::AppendQueryParameter( 171 return net::AppendQueryParameter(
146 url, "hl", GetGoogleLocale(application_locale)); 172 url, "hl", GetGoogleLocale(application_locale));
147 } 173 }
148 174
149 std::string GetGoogleCountryCode(const GURL& google_homepage_url) { 175 std::string GetGoogleCountryCode(const GURL& google_homepage_url) {
150 base::StringPiece google_hostname = google_homepage_url.host_piece(); 176 base::StringPiece google_hostname = google_homepage_url.host_piece();
177 // TODO(igorcov): This needs a fix for case when the host has a trailing dot,
178 // like "google.com./". https://crbug.com/720295.
151 const size_t last_dot = google_hostname.find_last_of('.'); 179 const size_t last_dot = google_hostname.find_last_of('.');
152 if (last_dot == std::string::npos) { 180 if (last_dot == std::string::npos) {
153 NOTREACHED(); 181 NOTREACHED();
Peter Kasting 2017/05/11 15:37:59 So, is this truly NOTREACHED today, or is it reach
igorcov 2017/05/11 16:38:16 This is truly NOTREACHED. The hostname has to have
154 } 182 }
155 base::StringPiece country_code = google_hostname.substr(last_dot + 1); 183 base::StringPiece country_code = google_hostname.substr(last_dot + 1);
156 // Assume the com TLD implies the US. 184 // Assume the com TLD implies the US.
157 if (country_code == "com") 185 if (country_code == "com")
158 return "us"; 186 return "us";
159 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR 187 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR
160 // code for the UK is "gb". 188 // code for the UK is "gb".
161 if (country_code == "uk") 189 if (country_code == "uk")
162 return "gb"; 190 return "gb";
163 // Catalonia does not have a CLDR country code, since it's a region in Spain, 191 // Catalonia does not have a CLDR country code, since it's a region in Spain,
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 238
211 bool IsGoogleDomainUrl(const GURL& url, 239 bool IsGoogleDomainUrl(const GURL& url,
212 SubdomainPermission subdomain_permission, 240 SubdomainPermission subdomain_permission,
213 PortPermission port_permission) { 241 PortPermission port_permission) {
214 return IsValidURL(url, port_permission) && 242 return IsValidURL(url, port_permission) &&
215 IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission); 243 IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission);
216 } 244 }
217 245
218 bool IsGoogleHomePageUrl(const GURL& url) { 246 bool IsGoogleHomePageUrl(const GURL& url) {
219 // First check to see if this has a Google domain. 247 // First check to see if this has a Google domain.
220 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) 248 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN,
249 DISALLOW_NON_STANDARD_PORTS) &&
250 !IsGoogleSearchSubdomainUrl(url)) {
221 return false; 251 return false;
252 }
222 253
223 // Make sure the path is a known home page path. 254 // Make sure the path is a known home page path.
224 base::StringPiece path(url.path_piece()); 255 base::StringPiece path(url.path_piece());
225 return IsPathHomePageBase(path) || 256 return IsPathHomePageBase(path) ||
226 base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII); 257 base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII);
227 } 258 }
228 259
229 bool IsGoogleSearchUrl(const GURL& url) { 260 bool IsGoogleSearchUrl(const GURL& url) {
230 // First check to see if this has a Google domain. 261 // First check to see if this has a Google domain.
231 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) 262 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN,
263 DISALLOW_NON_STANDARD_PORTS) &&
264 !IsGoogleSearchSubdomainUrl(url)) {
232 return false; 265 return false;
266 }
233 267
234 // Make sure the path is a known search path. 268 // Make sure the path is a known search path.
235 base::StringPiece path(url.path_piece()); 269 base::StringPiece path(url.path_piece());
236 bool is_home_page_base = IsPathHomePageBase(path); 270 bool is_home_page_base = IsPathHomePageBase(path);
237 if (!is_home_page_base && (path != "/search")) 271 if (!is_home_page_base && (path != "/search"))
238 return false; 272 return false;
239 273
240 // Check for query parameter in URL parameter and hash fragment, depending on 274 // Check for query parameter in URL parameter and hash fragment, depending on
241 // the path type. 275 // the path type.
242 return HasGoogleSearchQueryParam(url.ref_piece()) || 276 return HasGoogleSearchQueryParam(url.ref_piece()) ||
243 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); 277 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece()));
244 } 278 }
245 279
246 bool IsYoutubeDomainUrl(const GURL& url, 280 bool IsYoutubeDomainUrl(const GURL& url,
247 SubdomainPermission subdomain_permission, 281 SubdomainPermission subdomain_permission,
248 PortPermission port_permission) { 282 PortPermission port_permission) {
249 return IsValidURL(url, port_permission) && 283 return IsValidURL(url, port_permission) &&
250 IsValidHostName(url.host_piece(), "youtube", subdomain_permission, 284 IsValidHostName(url.host_piece(), "youtube", subdomain_permission,
251 nullptr); 285 nullptr);
252 } 286 }
253 287
254 } // namespace google_util 288 } // namespace google_util
OLDNEW
« no previous file with comments | « no previous file | components/google/core/browser/google_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698