Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2433583002: Reduce buggy usage of the registry controlled domain service. (Closed)
Patch Set: Review comments Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 namespace registry_controlled_domains { 59 namespace registry_controlled_domains {
60 60
61 namespace { 61 namespace {
62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" 62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
63 63
64 // See make_dafsa.py for documentation of the generated dafsa byte array. 64 // See make_dafsa.py for documentation of the generated dafsa byte array.
65 65
66 const unsigned char* g_graph = kDafsa; 66 const unsigned char* g_graph = kDafsa;
67 size_t g_graph_length = sizeof(kDafsa); 67 size_t g_graph_length = sizeof(kDafsa);
68 68
69 struct MappedHostComponent {
70 size_t original_begin;
71 size_t original_end;
72
73 size_t canonical_begin;
74 size_t canonical_end;
75 };
76
69 size_t GetRegistryLengthImpl(base::StringPiece host, 77 size_t GetRegistryLengthImpl(base::StringPiece host,
70 UnknownRegistryFilter unknown_filter, 78 UnknownRegistryFilter unknown_filter,
71 PrivateRegistryFilter private_filter) { 79 PrivateRegistryFilter private_filter) {
72 DCHECK(!host.empty()); 80 if (host.empty())
81 return std::string::npos;
73 82
74 // Skip leading dots. 83 // Skip leading dots.
75 const size_t host_check_begin = host.find_first_not_of('.'); 84 const size_t host_check_begin = host.find_first_not_of('.');
76 if (host_check_begin == std::string::npos) 85 if (host_check_begin == std::string::npos)
77 return 0; // Host is only dots. 86 return 0; // Host is only dots.
78 87
79 // A single trailing dot isn't relevant in this determination, but does need 88 // A single trailing dot isn't relevant in this determination, but does need
80 // to be included in the final returned length. 89 // to be included in the final returned length.
81 size_t host_check_len = host.length(); 90 size_t host_check_len = host.length();
82 if (host[host_check_len - 1] == '.') { 91 if (host[host_check_len - 1] == '.') {
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
182 // interface type for all the APIs. 191 // interface type for all the APIs.
183 base::StringPiece GetDomainAndRegistryAsStringPiece( 192 base::StringPiece GetDomainAndRegistryAsStringPiece(
184 const GURL& gurl, 193 const GURL& gurl,
185 PrivateRegistryFilter filter) { 194 PrivateRegistryFilter filter) {
186 base::StringPiece host = gurl.host_piece(); 195 base::StringPiece host = gurl.host_piece();
187 if (host.empty() || gurl.HostIsIPAddress()) 196 if (host.empty() || gurl.HostIsIPAddress())
188 return base::StringPiece(); 197 return base::StringPiece();
189 return GetDomainAndRegistryImpl(host, filter); 198 return GetDomainAndRegistryImpl(host, filter);
190 } 199 }
191 200
201 // These two functions append the given string as-is to the given output,
202 // converting to UTF-8 if necessary.
203 void AppendInvalidString(base::StringPiece str, url::CanonOutput* output) {
204 output->Append(str.data(), static_cast<int>(str.length()));
205 }
206 void AppendInvalidString(base::StringPiece16 str, url::CanonOutput* output) {
207 std::string utf8 = base::UTF16ToUTF8(str);
208 output->Append(utf8.data(), static_cast<int>(utf8.length()));
209 }
210
211 // Backend for PermissiveGetHostRegistryLength that handles both UTF-8 and
212 // UTF-16 input. The template type is the std::string type to use (it makes the
213 // typedefs easier than using the character type).
214 template <typename Str>
215 size_t DoPermissiveGetHostRegistryLength(base::BasicStringPiece<Str> host,
216 UnknownRegistryFilter unknown_filter,
217 PrivateRegistryFilter private_filter) {
218 std::string canonical_host; // Do not modify outside of canon_output.
219 canonical_host.reserve(host.length());
220 url::StdStringCanonOutput canon_output(&canonical_host);
221
222 std::vector<MappedHostComponent> components;
223
224 for (size_t current = 0; current < host.length(); current++) {
225 size_t begin = current;
226
227 // Advance to next "." or end.
228 while (current < host.length() && host[current] != '.')
229 current++;
230
231 MappedHostComponent mapping;
232 mapping.original_begin = begin;
233 mapping.original_end = current;
234 mapping.canonical_begin = static_cast<size_t>(canon_output.length());
235
236 // Try to append the canonicalized version of this component.
237 int current_len = static_cast<int>(current - begin);
238 if (!url::CanonicalizeHostSubstring(
239 host.data(), url::Component(static_cast<int>(begin), current_len),
240 &canon_output)) {
241 // Failed to canonicalize this component; append as-is.
242 AppendInvalidString(host.substr(begin, current_len), &canon_output);
243 }
244
245 mapping.canonical_end = static_cast<size_t>(canon_output.length());
246 components.push_back(mapping);
247
248 if (current < host.length())
249 canon_output.push_back('.');
250 }
251 canon_output.Complete();
252
253 size_t canonical_rcd_len =
254 GetRegistryLengthImpl(canonical_host, unknown_filter, private_filter);
255 if (canonical_rcd_len == 0 || canonical_rcd_len == std::string::npos)
256 return canonical_rcd_len; // Error or no registry controlled domain.
257
258 // Find which host component the result started in.
259 size_t canonical_rcd_begin = canonical_host.length() - canonical_rcd_len;
260 for (const auto& mapping : components) {
261 // In the common case, GetRegistryLengthImpl will identify the beginning
262 // of a component and we can just return where that component was in the
263 // original string.
264 if (canonical_rcd_begin == mapping.canonical_begin)
265 return host.length() - mapping.original_begin;
266
267 if (canonical_rcd_begin >= mapping.canonical_end)
268 continue;
269
270 // The registry controlled domain begin was identified as being in the
271 // middle of this dot-separated domain component in the non-canonical
272 // input. This indicates some form of escaped dot, or a non-ASCII
273 // character that was canonicalized to a dot.
274 //
275 // Brute-force search from the end by repeatedly canonicalizing longer
276 // substrings until we get a match for the canonicalized version. This
277 // depends on the canonicalization process not changing the order of the
278 // characters. Punycode can change the order of characters, but it
279 // doesn't work across dots so this is safe.
280
281 // Expected canonical registry controlled domain.
282 base::StringPiece canonical_rcd(&canonical_host[canonical_rcd_begin],
283 canonical_rcd_len);
284
285 for (int current_try = static_cast<int>(mapping.original_end) - 1;
286 current_try >= static_cast<int>(mapping.original_begin);
287 current_try--) {
288 std::string try_string;
289 url::StdStringCanonOutput try_output(&try_string);
290
291 if (!url::CanonicalizeHostSubstring(
292 host.data(),
293 url::Component(
294 current_try,
295 static_cast<int>(mapping.original_end) - current_try),
296 &try_output))
297 continue; // Invalid substring, skip.
298
299 try_output.Complete();
300 if (try_string == canonical_rcd)
301 return host.length() - current_try;
302 }
303 }
304
305 NOTREACHED();
306 return canonical_rcd_len;
307 }
308
192 } // namespace 309 } // namespace
193 310
194 std::string GetDomainAndRegistry(const GURL& gurl, 311 std::string GetDomainAndRegistry(const GURL& gurl,
195 PrivateRegistryFilter filter) { 312 PrivateRegistryFilter filter) {
196 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string(); 313 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string();
197 } 314 }
198 315
199 std::string GetDomainAndRegistry(base::StringPiece host, 316 std::string GetDomainAndRegistry(base::StringPiece host,
200 PrivateRegistryFilter filter) { 317 PrivateRegistryFilter filter) {
201 url::CanonHostInfo host_info; 318 url::CanonHostInfo host_info;
(...skipping 29 matching lines...) Expand all
231 bool SameDomainOrHost(const url::Origin& origin1, 348 bool SameDomainOrHost(const url::Origin& origin1,
232 const url::Origin& origin2, 349 const url::Origin& origin2,
233 PrivateRegistryFilter filter) { 350 PrivateRegistryFilter filter) {
234 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter); 351 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter);
235 } 352 }
236 353
237 size_t GetRegistryLength( 354 size_t GetRegistryLength(
238 const GURL& gurl, 355 const GURL& gurl,
239 UnknownRegistryFilter unknown_filter, 356 UnknownRegistryFilter unknown_filter,
240 PrivateRegistryFilter private_filter) { 357 PrivateRegistryFilter private_filter) {
241 base::StringPiece host = gurl.host_piece(); 358 return GetRegistryLengthImpl(gurl.host_piece(), unknown_filter,
242 if (host.empty()) 359 private_filter);
243 return std::string::npos;
244 if (gurl.HostIsIPAddress())
245 return 0;
246 return GetRegistryLengthImpl(host, unknown_filter, private_filter);
247 } 360 }
248 361
249 size_t GetRegistryLength(base::StringPiece host, 362 bool HostHasRegistryControlledDomain(base::StringPiece host,
250 UnknownRegistryFilter unknown_filter, 363 UnknownRegistryFilter unknown_filter,
251 PrivateRegistryFilter private_filter) { 364 PrivateRegistryFilter private_filter) {
252 url::CanonHostInfo host_info; 365 url::CanonHostInfo host_info;
253 const std::string canon_host(CanonicalizeHost(host, &host_info)); 366 const std::string canon_host(CanonicalizeHost(host, &host_info));
254 if (canon_host.empty()) 367
255 return std::string::npos; 368 size_t rcd_length;
256 if (host_info.IsIPAddress()) 369 switch (host_info.family) {
257 return 0; 370 case url::CanonHostInfo::IPV4:
371 case url::CanonHostInfo::IPV6:
372 // IP addresses don't have R.C.D.'s.
373 return false;
374 case url::CanonHostInfo::BROKEN:
375 // Host is not canonicalizable. Fall back to the slower "permissive"
376 // version.
377 rcd_length =
378 PermissiveGetHostRegistryLength(host, unknown_filter, private_filter);
379 break;
380 case url::CanonHostInfo::NEUTRAL:
381 rcd_length =
382 GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
383 break;
384 default:
385 NOTREACHED();
386 return false;
387 }
388 if (rcd_length == std::string::npos)
389 return false;
390 return rcd_length > 0;
Peter Kasting 2016/10/25 01:33:32 Nit: Shorter and seems slightly simpler?: retur
brettw 2016/10/25 20:28:17 Done.
391 }
392
393 size_t GetCanonicalHostRegistryLength(base::StringPiece canon_host,
394 UnknownRegistryFilter unknown_filter,
395 PrivateRegistryFilter private_filter) {
396 #ifndef NDEBUG
397 // Ensure passed-in host name is canonical.
398 url::CanonHostInfo host_info;
399 DCHECK_EQ(net::CanonicalizeHost(canon_host, &host_info), canon_host);
400 #endif
401
258 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 402 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
259 } 403 }
260 404
405 size_t PermissiveGetHostRegistryLength(base::StringPiece host,
406 UnknownRegistryFilter unknown_filter,
407 PrivateRegistryFilter private_filter) {
408 return DoPermissiveGetHostRegistryLength<std::string>(host, unknown_filter,
409 private_filter);
410 }
411
412 size_t PermissiveGetHostRegistryLength(base::StringPiece16 host,
413 UnknownRegistryFilter unknown_filter,
414 PrivateRegistryFilter private_filter) {
415 return DoPermissiveGetHostRegistryLength<base::string16>(host, unknown_filter,
416 private_filter);
417 }
418
261 void SetFindDomainGraph() { 419 void SetFindDomainGraph() {
262 g_graph = kDafsa; 420 g_graph = kDafsa;
263 g_graph_length = sizeof(kDafsa); 421 g_graph_length = sizeof(kDafsa);
264 } 422 }
265 423
266 void SetFindDomainGraph(const unsigned char* domains, size_t length) { 424 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
267 CHECK(domains); 425 CHECK(domains);
268 CHECK_NE(length, 0u); 426 CHECK_NE(length, 0u);
269 g_graph = domains; 427 g_graph = domains;
270 g_graph_length = length; 428 g_graph_length = length;
271 } 429 }
272 430
273 } // namespace registry_controlled_domains 431 } // namespace registry_controlled_domains
274 } // namespace net 432 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698