Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1468)

Side by Side Diff: components/google/core/browser/google_util.cc

Issue 2498113003: Add verification that google URL has a valid TLD. (Closed)
Patch Set: Change implementation to set. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/google/core/browser/google_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/google/core/browser/google_util.h" 5 #include "components/google/core/browser/google_util.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
(...skipping 28 matching lines...) Expand all
39 namespace { 39 namespace {
40 40
41 bool gUseMockLinkDoctorBaseURLForTesting = false; 41 bool gUseMockLinkDoctorBaseURLForTesting = false;
42 42
43 bool IsPathHomePageBase(base::StringPiece path) { 43 bool IsPathHomePageBase(base::StringPiece path) {
44 return (path == "/") || (path == "/webhp"); 44 return (path == "/") || (path == "/webhp");
45 } 45 }
46 46
47 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>" 47 // True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>"
48 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check 48 // with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check
49 // against host "*.<domain_in_lower_case>.<TLD>" instead. 49 // against host "*.<domain_in_lower_case>.<TLD>" instead. Will return the TLD
50 // string in |tld|, if specified and the |host| can be parsed.
50 bool IsValidHostName(base::StringPiece host, 51 bool IsValidHostName(base::StringPiece host,
51 base::StringPiece domain_in_lower_case, 52 base::StringPiece domain_in_lower_case,
52 SubdomainPermission subdomain_permission) { 53 SubdomainPermission subdomain_permission,
54 base::StringPiece* tld) {
55
Peter Kasting 2016/12/15 01:21:59 Nit: Don't add a blank line here.
Maria 2016/12/15 17:56:33 Done.
53 // Fast path to avoid searching the registry set. 56 // Fast path to avoid searching the registry set.
54 if (host.find(domain_in_lower_case) == base::StringPiece::npos) 57 if (host.find(domain_in_lower_case) == base::StringPiece::npos)
55 return false; 58 return false;
59
56 size_t tld_length = 60 size_t tld_length =
57 net::registry_controlled_domains::GetCanonicalHostRegistryLength( 61 net::registry_controlled_domains::GetCanonicalHostRegistryLength(
58 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 62 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
59 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 63 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
60 if ((tld_length == 0) || (tld_length == std::string::npos)) 64 if ((tld_length == 0) || (tld_length == std::string::npos))
61 return false; 65 return false;
62 66
63 // Removes the tld and the preceding dot. 67 // Removes the tld and the preceding dot.
64 base::StringPiece host_minus_tld = 68 base::StringPiece host_minus_tld =
65 host.substr(0, host.length() - tld_length - 1); 69 host.substr(0, host.length() - tld_length - 1);
70
71 if (tld)
72 *tld = host.substr(host.length() - tld_length);
73
66 if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case)) 74 if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case))
67 return true; 75 return true;
68 76
69 if (subdomain_permission == ALLOW_SUBDOMAIN) { 77 if (subdomain_permission == ALLOW_SUBDOMAIN) {
70 std::string dot_domain("."); 78 std::string dot_domain(".");
71 domain_in_lower_case.AppendToString(&dot_domain); 79 domain_in_lower_case.AppendToString(&dot_domain);
72 return base::EndsWith(host_minus_tld, dot_domain, 80 return base::EndsWith(host_minus_tld, dot_domain,
73 base::CompareCase::INSENSITIVE_ASCII); 81 base::CompareCase::INSENSITIVE_ASCII);
74 } 82 }
75 83
76 std::string www_domain("www."); 84 std::string www_domain("www.");
77 domain_in_lower_case.AppendToString(&www_domain); 85 domain_in_lower_case.AppendToString(&www_domain);
78 return base::LowerCaseEqualsASCII(host_minus_tld, www_domain); 86 return base::LowerCaseEqualsASCII(host_minus_tld, www_domain);
79 } 87 }
80 88
81 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission| 89 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
82 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard 90 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
83 // port for its scheme (80 for HTTP, 443 for HTTPS). 91 // port for its scheme (80 for HTTP, 443 for HTTPS).
84 bool IsValidURL(const GURL& url, PortPermission port_permission) { 92 bool IsValidURL(const GURL& url, PortPermission port_permission) {
85 return url.is_valid() && url.SchemeIsHTTPOrHTTPS() && 93 return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
86 (url.port().empty() || (port_permission == ALLOW_NON_STANDARD_PORTS)); 94 (url.port().empty() || (port_permission == ALLOW_NON_STANDARD_PORTS));
87 } 95 }
88 96
89 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host, 97 bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host,
90 SubdomainPermission subdomain_permission) { 98 SubdomainPermission subdomain_permission) {
91 const GURL& base_url(CommandLineGoogleBaseURL()); 99 const GURL& base_url(CommandLineGoogleBaseURL());
92 if (base_url.is_valid() && (canonical_host == base_url.host_piece())) 100 if (base_url.is_valid() && (canonical_host == base_url.host_piece()))
93 return true; 101 return true;
94 102
95 return IsValidHostName(canonical_host, "google", subdomain_permission); 103 base::StringPiece tld;
104 bool valid = IsValidHostName(canonical_host, "google", subdomain_permission,
105 &tld);
106 if (!valid || tld.empty())
107 return valid;
Peter Kasting 2016/12/15 01:21:59 Why check "|| tld.empty()" here? It seems like th
Maria 2016/12/15 17:56:33 Done.
108
109 static std::set<std::string> google_tlds = {"ac", "ad", "ae", "af", "ag",
Peter Kasting 2016/12/15 01:21:59 Use CR_DEFINE_STATIC_LOCAL for this.
Maria 2016/12/15 17:56:33 I don't think I can because CR_DEFINE_STATIC_LOCAL
Peter Kasting 2016/12/15 19:00:25 I think this should work: CR_DEFINE_STATIC_LOCAL(
110 "al", "am", "as", "at", "aw", "az", "ba", "be", "bf", "bg", "bi", "biz",
111 "bj", "bm", "bn", "bo", "bs", "bt", "by", "bz", "ca", "cat", "cc", "cd",
112 "cf", "cg", "ch", "ci", "cl", "cm", "cn", "co", "co.ao", "co.at", "co.ba",
113 "co.bi", "co.bw", "co.ci", "co.ck", "co.cr", "co.gg", "co.gl", "co.gy",
114 "co.hu", "co.id", "co.il", "co.im", "co.in", "co.it", "co.je", "co.jp",
115 "co.ke", "co.kr", "co.ls", "co.ma", "co.mu", "co.mw", "co.mz", "co.nz",
116 "co.pn", "co.rs", "co.th", "co.tt", "co.tz", "co.ua", "co.ug", "co.uk",
117 "co.uz", "co.ve", "co.vi", "co.za", "co.zm", "co.zw", "com", "com.af",
118 "com.ag", "com.ai", "com.ar", "com.au", "com.az", "com.bd", "com.bh",
119 "com.bi", "com.bn", "com.bo", "com.br", "com.bs", "com.by", "com.bz",
120 "com.cn", "com.co", "com.cu", "com.cy", "com.do", "com.dz", "com.ec",
121 "com.eg", "com.er", "com.et", "com.fj", "com.ge", "com.gh", "com.gi",
122 "com.gl", "com.gp", "com.gr", "com.gt", "com.gy", "com.hk", "com.hn",
123 "com.hr", "com.ht", "com.iq", "com.jm", "com.jo", "com.kg", "com.kh",
124 "com.ki", "com.kw", "com.kz", "com.lb", "com.lc", "com.lk", "com.lv",
125 "com.ly", "com.mk", "com.mm", "com.mt", "com.mu", "com.mw", "com.mx",
126 "com.my", "com.na", "com.nc", "com.nf", "com.ng", "com.ni", "com.np",
127 "com.nr", "com.om", "com.pa", "com.pe", "com.pg", "com.ph", "com.pk",
128 "com.pl", "com.pr", "com.ps", "com.pt", "com.py", "com.qa", "com.ru",
129 "com.sa", "com.sb", "com.sc", "com.sg", "com.sl", "com.sv", "com.tj",
130 "com.tm", "com.tn", "com.tr", "com.tt", "com.tw", "com.ua", "com.uy",
131 "com.uz", "com.vc", "com.ve", "com.vi", "com.vn", "com.ws", "cv", "cx",
132 "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "es", "eu", "fi",
133 "fm", "fr", "ga", "gd", "ge", "gf", "gg", "gl", "gm", "gp", "gr", "gw",
134 "gy", "hk", "hn", "hr", "ht", "hu", "ie", "im", "in", "info", "in.rs",
135 "io", "iq", "is", "it", "it.ao", "je", "jo", "jobs", "jp", "kg", "ki",
136 "kids.us", "km", "kn", "kr", "kz", "la", "li", "lk", "lt", "lu", "lv",
137 "ma", "md", "me", "mg", "mh", "mk", "ml", "mn", "mobi", "mr", "ms", "mu",
138 "mv", "mw", "mx", "name", "ne", "ne.jp", "net", "net.in", "net.nz", "nf",
139 "ng", "nl", "no", "nom.es", "nr", "nu", "off.ai", "org", "org.af",
140 "org.es", "org.in", "org.nz", "org.uk", "pf", "ph", "pk", "pl", "pn",
141 "pr", "pro", "ps", "pt", "qa", "re", "ro", "rs", "ru", "rw", "sc", "se",
142 "sg", "sh", "si", "sk", "sl", "sm", "sn", "so", "sr", "st", "sz", "td",
143 "tel", "tg", "tk", "tl", "tm", "tn", "to", "tt", "tv", "tw", "ua", "ug",
144 "us", "uz", "vc", "vg", "vn", "vu", "ws", "yt"};
145
146 return google_tlds.find(tld.as_string()) != google_tlds.end();
Peter Kasting 2016/12/15 01:21:59 Nit: Reads more clearly: return base::ContainsK
Maria 2016/12/15 17:56:33 Done.
96 } 147 }
97 148
98 } // namespace 149 } // namespace
99 150
100 // Global functions ----------------------------------------------------------- 151 // Global functions -----------------------------------------------------------
101 152
102 bool HasGoogleSearchQueryParam(base::StringPiece str) { 153 bool HasGoogleSearchQueryParam(base::StringPiece str) {
103 url::Component query(0, static_cast<int>(str.length())), key, value; 154 url::Component query(0, static_cast<int>(str.length())), key, value;
104 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) { 155 while (url::ExtractQueryKeyValue(str.data(), &query, &key, &value)) {
105 if (value.is_nonempty()) { 156 if (value.is_nonempty()) {
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 // Check for query parameter in URL parameter and hash fragment, depending on 277 // Check for query parameter in URL parameter and hash fragment, depending on
227 // the path type. 278 // the path type.
228 return HasGoogleSearchQueryParam(url.ref_piece()) || 279 return HasGoogleSearchQueryParam(url.ref_piece()) ||
229 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece())); 280 (!is_home_page_base && HasGoogleSearchQueryParam(url.query_piece()));
230 } 281 }
231 282
232 bool IsYoutubeDomainUrl(const GURL& url, 283 bool IsYoutubeDomainUrl(const GURL& url,
233 SubdomainPermission subdomain_permission, 284 SubdomainPermission subdomain_permission,
234 PortPermission port_permission) { 285 PortPermission port_permission) {
235 return IsValidURL(url, port_permission) && 286 return IsValidURL(url, port_permission) &&
236 IsValidHostName(url.host_piece(), "youtube", subdomain_permission); 287 IsValidHostName(url.host_piece(), "youtube", subdomain_permission,
288 nullptr);
237 } 289 }
238 290
239 } // namespace google_util 291 } // namespace google_util
OLDNEW
« no previous file with comments | « no previous file | components/google/core/browser/google_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698