Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/google/core/browser/google_util.h" | 5 #include "components/google/core/browser/google_util.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include <string> | 9 #include <string> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 38 // Helpers -------------------------------------------------------------------- | 38 // Helpers -------------------------------------------------------------------- |
| 39 | 39 |
| 40 namespace { | 40 namespace { |
| 41 | 41 |
| 42 bool gUseMockLinkDoctorBaseURLForTesting = false; | 42 bool gUseMockLinkDoctorBaseURLForTesting = false; |
| 43 | 43 |
| 44 bool IsPathHomePageBase(base::StringPiece path) { | 44 bool IsPathHomePageBase(base::StringPiece path) { |
| 45 return (path == "/") || (path == "/webhp"); | 45 return (path == "/") || (path == "/webhp"); |
| 46 } | 46 } |
| 47 | 47 |
| 48 // Removes a single trailing dot if present in |host|. | |
| 49 void StripTrailingDot(base::StringPiece* host) { | |
| 50 if (host->ends_with(".")) | |
| 51 host->remove_suffix(1); | |
| 52 } | |
| 53 | |
| 48 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>" | 54 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>" |
| 49 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check | 55 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check |
| 50 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD | 56 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD |
| 51 // string in |tld|, if specified and the |host| can be parsed. | 57 // string in |tld|, if specified and the |host| can be parsed. |
| 52 bool IsValidHostName(base::StringPiece host, | 58 bool IsValidHostName(base::StringPiece host, |
| 53 base::StringPiece domain_in_lower_case, | 59 base::StringPiece domain_in_lower_case, |
| 54 SubdomainPermission subdomain_permission, | 60 SubdomainPermission subdomain_permission, |
| 55 base::StringPiece* tld) { | 61 base::StringPiece* tld) { |
| 56 // Fast path to avoid searching the registry set. | 62 // Fast path to avoid searching the registry set. |
| 57 if (host.find(domain_in_lower_case) == base::StringPiece::npos) | 63 if (host.find(domain_in_lower_case) == base::StringPiece::npos) |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 97 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, | 103 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, |
| 98 SubdomainPermission subdomain_permission) { | 104 SubdomainPermission subdomain_permission) { |
| 99 const GURL& base_url(CommandLineGoogleBaseURL()); | 105 const GURL& base_url(CommandLineGoogleBaseURL()); |
| 100 if (base_url.is_valid() && (canonical_host == base_url.host_piece())) | 106 if (base_url.is_valid() && (canonical_host == base_url.host_piece())) |
| 101 return true; | 107 return true; |
| 102 | 108 |
| 103 base::StringPiece tld; | 109 base::StringPiece tld; |
| 104 if (!IsValidHostName(canonical_host, "google", subdomain_permission, &tld)) | 110 if (!IsValidHostName(canonical_host, "google", subdomain_permission, &tld)) |
| 105 return false; | 111 return false; |
| 106 | 112 |
| 113 // Remove the trailing dot from tld if present, as for google domain it's the | |
| 114 // same page. | |
| 115 StripTrailingDot(&tld); | |
| 116 | |
| 107 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_tlds, | 117 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_tlds, |
| 108 ({GOOGLE_TLD_LIST})); | 118 ({GOOGLE_TLD_LIST})); |
| 109 return base::ContainsKey(google_tlds, tld.as_string()); | 119 return base::ContainsKey(google_tlds, tld.as_string()); |
| 110 } | 120 } |
| 111 | 121 |
| 122 // True if |url| is a valid URL with a host that is in the static list of | |
| 123 // Google subdomains for google search, and an HTTP or HTTPS scheme. Requires | |
| 124 // |url| to use the standard port for its scheme (80 for HTTP, 443 for HTTPS). | |
| 125 bool IsGoogleSearchSubdomainUrl(const GURL& url) { | |
| 126 if (!IsValidURL(url, PortPermission::DISALLOW_NON_STANDARD_PORTS)) | |
| 127 return false; | |
| 128 | |
| 129 base::StringPiece host(url.host_piece()); | |
| 130 StripTrailingDot(&host); | |
| 131 | |
| 132 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_subdomains, | |
| 133 ({"ipv4.google.com", "ipv6.google.com"})); | |
| 134 | |
| 135 return base::ContainsKey(google_subdomains, host.as_string()); | |
| 136 } | |
| 137 | |
| 112 } // namespace | 138 } // namespace |
| 113 | 139 |
| 114 // Global functions ----------------------------------------------------------- | 140 // Global functions ----------------------------------------------------------- |
| 115 | 141 |
| 116 bool HasGoogleSearchQueryParam(base::StringPiece str) { | 142 bool HasGoogleSearchQueryParam(base::StringPiece str) { |
| 117 url::Component query(0, static_cast<int>(str.length())), key, value; | 143 url::Component query(0, static_cast<int>(str.length())), key, value; |
| 118 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { | 144 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { |
| 119 if (value.is_nonempty()) { | 145 if (value.is_nonempty()) { |
| 120 base::StringPiece key_str = str.substr(key.begin, key.len); | 146 base::StringPiece key_str = str.substr(key.begin, key.len); |
| 121 if (key_str == "q" || key_str == "as_q") | 147 if (key_str == "q" || key_str == "as_q") |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 141 } | 167 } |
| 142 | 168 |
| 143 GURL AppendGoogleLocaleParam(const GURL& url, | 169 GURL AppendGoogleLocaleParam(const GURL& url, |
| 144 const std::string& application_locale) { | 170 const std::string& application_locale) { |
| 145 return net::AppendQueryParameter( | 171 return net::AppendQueryParameter( |
| 146 url, "hl", GetGoogleLocale(application_locale)); | 172 url, "hl", GetGoogleLocale(application_locale)); |
| 147 } | 173 } |
| 148 | 174 |
| 149 std::string GetGoogleCountryCode(const GURL& google_homepage_url) { | 175 std::string GetGoogleCountryCode(const GURL& google_homepage_url) { |
| 150 base::StringPiece google_hostname = google_homepage_url.host_piece(); | 176 base::StringPiece google_hostname = google_homepage_url.host_piece(); |
| 177 // TODO(igorcov): This needs a fix for case when the host has a trailing dot, | |
| 178 // like "google.com./". https://crbug.com/720295. | |
| 151 const size_t last_dot = google_hostname.find_last_of('.'); | 179 const size_t last_dot = google_hostname.find_last_of('.'); |
| 152 if (last_dot == std::string::npos) { | 180 if (last_dot == std::string::npos) { |
| 153 NOTREACHED(); | 181 NOTREACHED(); |
|
Peter Kasting
2017/05/11 15:37:59
So, is this truly NOTREACHED today, or is it reach
igorcov
2017/05/11 16:38:16
This is truly NOTREACHED. The hostname has to have
| |
| 154 } | 182 } |
| 155 base::StringPiece country_code = google_hostname.substr(last_dot + 1); | 183 base::StringPiece country_code = google_hostname.substr(last_dot + 1); |
| 156 // Assume the com TLD implies the US. | 184 // Assume the com TLD implies the US. |
| 157 if (country_code == "com") | 185 if (country_code == "com") |
| 158 return "us"; | 186 return "us"; |
| 159 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR | 187 // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR |
| 160 // code for the UK is "gb". | 188 // code for the UK is "gb". |
| 161 if (country_code == "uk") | 189 if (country_code == "uk") |
| 162 return "gb"; | 190 return "gb"; |
| 163 // Catalonia does not have a CLDR country code, since it's a region in Spain, | 191 // Catalonia does not have a CLDR country code, since it's a region in Spain, |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 210 | 238 |
| 211 bool IsGoogleDomainUrl(const GURL& url, | 239 bool IsGoogleDomainUrl(const GURL& url, |
| 212 SubdomainPermission subdomain_permission, | 240 SubdomainPermission subdomain_permission, |
| 213 PortPermission port_permission) { | 241 PortPermission port_permission) { |
| 214 return IsValidURL(url, port_permission) && | 242 return IsValidURL(url, port_permission) && |
| 215 IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission); | 243 IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission); |
| 216 } | 244 } |
| 217 | 245 |
| 218 bool IsGoogleHomePageUrl(const GURL& url) { | 246 bool IsGoogleHomePageUrl(const GURL& url) { |
| 219 // First check to see if this has a Google domain. | 247 // First check to see if this has a Google domain. |
| 220 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) | 248 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, |
| 249 DISALLOW_NON_STANDARD_PORTS) && | |
| 250 !IsGoogleSearchSubdomainUrl(url)) { | |
| 221 return false; | 251 return false; |
| 252 } | |
| 222 | 253 |
| 223 // Make sure the path is a known home page path. | 254 // Make sure the path is a known home page path. |
| 224 base::StringPiece path(url.path_piece()); | 255 base::StringPiece path(url.path_piece()); |
| 225 return IsPathHomePageBase(path) || | 256 return IsPathHomePageBase(path) || |
| 226 base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII); | 257 base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII); |
| 227 } | 258 } |
| 228 | 259 |
| 229 bool IsGoogleSearchUrl(const GURL& url) { | 260 bool IsGoogleSearchUrl(const GURL& url) { |
| 230 // First check to see if this has a Google domain. | 261 // First check to see if this has a Google domain. |
| 231 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) | 262 if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, |
| 263 DISALLOW_NON_STANDARD_PORTS) && | |
| 264 !IsGoogleSearchSubdomainUrl(url)) { | |
| 232 return false; | 265 return false; |
| 266 } | |
| 233 | 267 |
| 234 // Make sure the path is a known search path. | 268 // Make sure the path is a known search path. |
| 235 base::StringPiece path(url.path_piece()); | 269 base::StringPiece path(url.path_piece()); |
| 236 bool is_home_page_base = IsPathHomePageBase(path); | 270 bool is_home_page_base = IsPathHomePageBase(path); |
| 237 if (!is_home_page_base && (path != "/search")) | 271 if (!is_home_page_base && (path != "/search")) |
| 238 return false; | 272 return false; |
| 239 | 273 |
| 240 // Check for query parameter in URL parameter and hash fragment, depending on | 274 // Check for query parameter in URL parameter and hash fragment, depending on |
| 241 // the path type. | 275 // the path type. |
| 242 return HasGoogleSearchQueryParam(url.ref_piece()) || | 276 return HasGoogleSearchQueryParam(url.ref_piece()) || |
| 243 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); | 277 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); |
| 244 } | 278 } |
| 245 | 279 |
| 246 bool IsYoutubeDomainUrl(const GURL& url, | 280 bool IsYoutubeDomainUrl(const GURL& url, |
| 247 SubdomainPermission subdomain_permission, | 281 SubdomainPermission subdomain_permission, |
| 248 PortPermission port_permission) { | 282 PortPermission port_permission) { |
| 249 return IsValidURL(url, port_permission) && | 283 return IsValidURL(url, port_permission) && |
| 250 IsValidHostName(url.host_piece(), "youtube", subdomain_permission, | 284 IsValidHostName(url.host_piece(), "youtube", subdomain_permission, |
| 251 nullptr); | 285 nullptr); |
| 252 } | 286 } |
| 253 | 287 |
| 254 } // namespace google_util | 288 } // namespace google_util |
| OLD | NEW |