Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: components/google/core/browser/google_util.cc

Issue 2498113003: Add verification that google URL has a valid TLD. (Closed)
Patch Set: Move the list of Google TLDs into a separate header file. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/google/core/browser/google_util.h" 5 #include "components/google/core/browser/google_util.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
11 11
12 #include "base/command_line.h" 12 #include "base/command_line.h"
13 #include "base/macros.h" 13 #include "base/macros.h"
14 #include "base/strings/string16.h" 14 #include "base/strings/string16.h"
15 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_split.h" 16 #include "base/strings/string_split.h"
17 #include "base/strings/string_util.h" 17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h" 18 #include "base/strings/utf_string_conversions.h"
19 #include "components/google/core/browser/google_switches.h" 19 #include "components/google/core/browser/google_switches.h"
20 #include "components/google/core/browser/google_tld_list.h"
20 #include "components/google/core/browser/google_url_tracker.h" 21 #include "components/google/core/browser/google_url_tracker.h"
21 #include "components/url_formatter/url_fixer.h" 22 #include "components/url_formatter/url_fixer.h"
22 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 23 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
23 #include "net/base/url_util.h" 24 #include "net/base/url_util.h"
24 #include "url/gurl.h" 25 #include "url/gurl.h"
25 26
26 // Only use Link Doctor on official builds. It uses an API key, too, but 27 // Only use Link Doctor on official builds. It uses an API key, too, but
27 // seems best to just disable it, for more responsive error pages and to reduce 28 // seems best to just disable it, for more responsive error pages and to reduce
28 // server load. 29 // server load.
29 #if defined(GOOGLE_CHROME_BUILD) 30 #if defined(GOOGLE_CHROME_BUILD)
30 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc" 31 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc"
31 #else 32 #else
32 #define LINKDOCTOR_SERVER_REQUEST_URL "" 33 #define LINKDOCTOR_SERVER_REQUEST_URL ""
33 #endif 34 #endif
34 35
35 namespace google_util { 36 namespace google_util {
36 37
37 // Helpers -------------------------------------------------------------------- 38 // Helpers --------------------------------------------------------------------
38 39
39 namespace { 40 namespace {
40 41
41 bool gUseMockLinkDoctorBaseURLForTesting = false; 42 bool gUseMockLinkDoctorBaseURLForTesting = false;
42 43
43 bool IsPathHomePageBase(base::StringPiece path) { 44 bool IsPathHomePageBase(base::StringPiece path) {
44 return (path == "/") || (path == "/webhp"); 45 return (path == "/") || (path == "/webhp");
45 } 46 }
46 47
47 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>" 48 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>"
48 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check 49 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check
49 // against host "*.<domain_in_lower_case>.<TLD>" instead. 50 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD
51 // string in |tld|, if specified and the |host| can be parsed.
50 bool IsValidHostName(base::StringPiece host, 52 bool IsValidHostName(base::StringPiece host,
51 base::StringPiece domain_in_lower_case, 53 base::StringPiece domain_in_lower_case,
52 SubdomainPermission subdomain_permission) { 54 SubdomainPermission subdomain_permission,
55 base::StringPiece* tld) {
53 // Fast path to avoid searching the registry set. 56 // Fast path to avoid searching the registry set.
54 if (host.find(domain_in_lower_case) == base::StringPiece::npos) 57 if (host.find(domain_in_lower_case) == base::StringPiece::npos)
55 return false; 58 return false;
59
56 size_t tld_length = 60 size_t tld_length =
57 net::registry_controlled_domains::GetCanonicalHostRegistryLength( 61 net::registry_controlled_domains::GetCanonicalHostRegistryLength(
58 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 62 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
59 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 63 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
60 if ((tld_length == 0) || (tld_length == std::string::npos)) 64 if ((tld_length == 0) || (tld_length == std::string::npos))
61 return false; 65 return false;
62 66
63 // Removes the tld and the preceding dot. 67 // Removes the tld and the preceding dot.
64 base::StringPiece host_minus_tld = 68 base::StringPiece host_minus_tld =
65 host.substr(0, host.length() - tld_length - 1); 69 host.substr(0, host.length() - tld_length - 1);
70
71 if (tld)
72 *tld = host.substr(host.length() - tld_length);
73
66 if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case)) 74 if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case))
67 return true; 75 return true;
68 76
69 if (subdomain_permission == ALLOW_SUBDOMAIN) { 77 if (subdomain_permission == ALLOW_SUBDOMAIN) {
70 std::string dot_domain("."); 78 std::string dot_domain(".");
71 domain_in_lower_case.AppendToString(&dot_domain); 79 domain_in_lower_case.AppendToString(&dot_domain);
72 return base::EndsWith(host_minus_tld, dot_domain, 80 return base::EndsWith(host_minus_tld, dot_domain,
73 base::CompareCase::INSENSITIVE_ASCII); 81 base::CompareCase::INSENSITIVE_ASCII);
74 } 82 }
75 83
76 std::string www_domain("www."); 84 std::string www_domain("www.");
77 domain_in_lower_case.AppendToString(&www_domain); 85 domain_in_lower_case.AppendToString(&www_domain);
78 return base::LowerCaseEqualsASCII(host_minus_tld, www_domain); 86 return base::LowerCaseEqualsASCII(host_minus_tld, www_domain);
79 } 87 }
80 88
81 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission| 89 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
82 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard 90 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
83 // port for its scheme (80 for HTTP, 443 for HTTPS). 91 // port for its scheme (80 for HTTP, 443 for HTTPS).
84 bool IsValidURL(const GURL& url, PortPermission port_permission) { 92 bool IsValidURL(const GURL& url, PortPermission port_permission) {
85 return url.is_valid() && url.SchemeIsHTTPOrHTTPS() && 93 return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
86 (url.port().empty() || (port_permission == ALLOW_NON_STANDARD_PORTS)); 94 (url.port().empty() || (port_permission == ALLOW_NON_STANDARD_PORTS));
87 } 95 }
88 96
89 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, 97 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host,
90 SubdomainPermission subdomain_permission) { 98 SubdomainPermission subdomain_permission) {
91 const GURL& base_url(CommandLineGoogleBaseURL()); 99 const GURL& base_url(CommandLineGoogleBaseURL());
92 if (base_url.is_valid() && (canonical_host == base_url.host_piece())) 100 if (base_url.is_valid() && (canonical_host == base_url.host_piece()))
93 return true; 101 return true;
94 102
95 return IsValidHostName(canonical_host, "google", subdomain_permission); 103 base::StringPiece tld;
104 if (!IsValidHostName(canonical_host, "google", subdomain_permission, &tld))
105 return false;
106
107 CR_DEFINE_STATIC_LOCAL(std::set<std::string>, google_tlds,
108 ({GOOGLE_TLD_LIST}));
109
Peter Kasting 2016/12/16 01:35:58 Nit: Blank line here now probably unnecessary? I
Maria 2017/01/04 17:58:52 Done.
110 return base::ContainsKey(google_tlds, tld.as_string());
96 } 111 }
97 112
98 } // namespace 113 } // namespace
99 114
100 // Global functions ----------------------------------------------------------- 115 // Global functions -----------------------------------------------------------
101 116
102 bool HasGoogleSearchQueryParam(base::StringPiece str) { 117 bool HasGoogleSearchQueryParam(base::StringPiece str) {
103 url::Component query(0, static_cast<int>(str.length())), key, value; 118 url::Component query(0, static_cast<int>(str.length())), key, value;
104 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { 119 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) {
105 if (value.is_nonempty()) { 120 if (value.is_nonempty()) {
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 // Check for query parameter in URL parameter and hash fragment, depending on 241 // Check for query parameter in URL parameter and hash fragment, depending on
227 // the path type. 242 // the path type.
228 return HasGoogleSearchQueryParam(url.ref_piece()) || 243 return HasGoogleSearchQueryParam(url.ref_piece()) ||
229 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); 244 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece()));
230 } 245 }
231 246
232 bool IsYoutubeDomainUrl(const GURL& url, 247 bool IsYoutubeDomainUrl(const GURL& url,
233 SubdomainPermission subdomain_permission, 248 SubdomainPermission subdomain_permission,
234 PortPermission port_permission) { 249 PortPermission port_permission) {
235 return IsValidURL(url, port_permission) && 250 return IsValidURL(url, port_permission) &&
236 IsValidHostName(url.host_piece(), "youtube", subdomain_permission); 251 IsValidHostName(url.host_piece(), "youtube", subdomain_permission,
252 nullptr);
237 } 253 }
238 254
239 } // namespace google_util 255 } // namespace google_util
OLDNEW
« no previous file with comments | « components/google/core/browser/google_tld_list.h ('k') | components/google/core/browser/google_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698