Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2433583002: Reduce buggy usage of the registry controlled domain service. (Closed)
Patch Set: Fix Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 namespace registry_controlled_domains { 59 namespace registry_controlled_domains {
60 60
61 namespace { 61 namespace {
62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" 62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
63 63
64 // See make_dafsa.py for documentation of the generated dafsa byte array. 64 // See make_dafsa.py for documentation of the generated dafsa byte array.
65 65
66 const unsigned char* g_graph = kDafsa; 66 const unsigned char* g_graph = kDafsa;
67 size_t g_graph_length = sizeof(kDafsa); 67 size_t g_graph_length = sizeof(kDafsa);
68 68
69 struct MappedHostComponent {
70 size_t original_begin;
71 size_t original_end;
72
73 size_t canonical_begin;
74 size_t canonical_end;
75 };
76
69 size_t GetRegistryLengthImpl(base::StringPiece host, 77 size_t GetRegistryLengthImpl(base::StringPiece host,
70 UnknownRegistryFilter unknown_filter, 78 UnknownRegistryFilter unknown_filter,
71 PrivateRegistryFilter private_filter) { 79 PrivateRegistryFilter private_filter) {
72 DCHECK(!host.empty()); 80 if (host.empty())
81 return std::string::npos;
73 82
74 // Skip leading dots. 83 // Skip leading dots.
75 const size_t host_check_begin = host.find_first_not_of('.'); 84 const size_t host_check_begin = host.find_first_not_of('.');
76 if (host_check_begin == std::string::npos) 85 if (host_check_begin == std::string::npos)
77 return 0; // Host is only dots. 86 return 0; // Host is only dots.
78 87
79 // A single trailing dot isn't relevant in this determination, but does need 88 // A single trailing dot isn't relevant in this determination, but does need
80 // to be included in the final returned length. 89 // to be included in the final returned length.
81 size_t host_check_len = host.length(); 90 size_t host_check_len = host.length();
82 if (host[host_check_len - 1] == '.') { 91 if (host[host_check_len - 1] == '.') {
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
182 // interface type for all the APIs. 191 // interface type for all the APIs.
183 base::StringPiece GetDomainAndRegistryAsStringPiece( 192 base::StringPiece GetDomainAndRegistryAsStringPiece(
184 const GURL& gurl, 193 const GURL& gurl,
185 PrivateRegistryFilter filter) { 194 PrivateRegistryFilter filter) {
186 base::StringPiece host = gurl.host_piece(); 195 base::StringPiece host = gurl.host_piece();
187 if (host.empty() || gurl.HostIsIPAddress()) 196 if (host.empty() || gurl.HostIsIPAddress())
188 return base::StringPiece(); 197 return base::StringPiece();
189 return GetDomainAndRegistryImpl(host, filter); 198 return GetDomainAndRegistryImpl(host, filter);
190 } 199 }
191 200
201 // Backend for PermissiveGetHostRegistryLength that handles both UTF-8 and
202 // UTF-16 input. The template type is the std::string type to use (it makes the
203 // typedefs easier than using the character type).
204 template <typename Str>
205 size_t DoPermissiveGetHostRegistryLength(base::BasicStringPiece<Str> host,
206 UnknownRegistryFilter unknown_filter,
207 PrivateRegistryFilter private_filter) {
208 std::string canonical_host; // No not modify outside of canon_output.
Peter Kasting 2016/10/22 05:04:19 Nit: No -> Do ?
209 canonical_host.reserve(host.size());
Peter Kasting 2016/10/22 05:04:19 Nit: The rest of this file, and the majority of Ch
brettw 2016/10/24 21:45:24 I've always written size() but will use length() f
210 url::StdStringCanonOutput canon_output(&canonical_host);
211
212 std::vector<MappedHostComponent> components;
213
214 size_t current = 0;
215 while (current < host.size()) {
Peter Kasting 2016/10/22 05:04:19 Nit: Shorter, narrows scope of |current|: for (
brettw 2016/10/24 21:45:24 Done.
216 // Advance to next "." or end.
Peter Kasting 2016/10/22 05:04:19 Nit: Technically, this comment belongs on the whil
brettw 2016/10/24 21:45:24 Done.
217 size_t begin = current;
Peter Kasting 2016/10/22 05:04:19 Nit: Can be const
brettw 2016/10/24 21:45:24 I prefer not to mark random local variables const
Peter Kasting 2016/10/24 23:04:32 There's no Chromium-wide pattern, so that's fine.
218 while (current < host.size() && host[current] != '.')
219 current++;
Peter Kasting 2016/10/22 05:04:19 Nit: Shorter: current = std::min(host.find('.
brettw 2016/10/24 21:45:24 This seems less readable to me.
Peter Kasting 2016/10/24 23:04:32 What about: current = host.find('.', begin); if (
brettw 2016/10/25 20:28:17 I can live with this.
220
221 MappedHostComponent mapping;
222 mapping.original_begin = begin;
223 mapping.original_end = current;
224 mapping.canonical_begin = static_cast<size_t>(canon_output.length());
Peter Kasting 2016/10/22 05:04:19 Nit: If you save this in a temp here instead of cr
brettw 2016/10/24 21:45:24 This feels icky to me, I don't like implicitly dep
225
226 // Append the canonicalized version of this component.
Peter Kasting 2016/10/22 05:04:19 Nit: Append -> Try to append ?
brettw 2016/10/24 21:45:24 Done.
227 if (!url::CanonicalizeHostSubstring(
228 host.data(), url::Component(static_cast<int>(begin),
229 static_cast<int>(current - begin)),
230 &canon_output)) {
231 // Failure to canonicalize this component, append as-is.
Peter Kasting 2016/10/22 05:04:19 Nit: Failure -> Failed; comma -> semicolon
232 for (size_t i = begin; i < current; i++)
233 canon_output.push_back(host[i]);
Peter Kasting 2016/10/22 05:04:19 Nit: Shorter and more efficient: canon_outp
brettw 2016/10/24 21:45:24 Actually this old code was wrong in the UTF-16 cas
234 }
235
236 mapping.canonical_end = static_cast<size_t>(canon_output.length());
237 components.push_back(mapping);
238
239 if (current < host.size()) {
240 canon_output.push_back('.');
241 current++; // Skip over '.' in input.
242 }
243 }
244 canon_output.Complete();
245
246 size_t canonical_rcd_len =
247 GetRegistryLengthImpl(canonical_host, unknown_filter, private_filter);
248 if (canonical_rcd_len == 0 || canonical_rcd_len == std::string::npos)
249 return canonical_rcd_len; // Error or no registry controlled domain.
250
251 // Find which host component the result started in.
252 size_t canonical_rcd_begin = canonical_host.size() - canonical_rcd_len;
253 for (const auto& mapping : components) {
254 // In the common case, GetRegistryLengthImpl will identify the beginning
255 // of a component and we can just return where that component was in the
256 // original string.
257 if (canonical_rcd_begin == mapping.canonical_begin)
258 return host.size() - mapping.original_begin;
259
260 if (canonical_rcd_begin < mapping.canonical_end) {
Peter Kasting 2016/10/22 05:04:19 Nit: If you change this to this, at the top of the
brettw 2016/10/24 21:45:24 Done.
261 // The registry controlled domain begin was identified as being in the
262 // middle of this dot-separated domain component in the non-canonical
263 // input. This indicates some form of escaped dot, or a non-ASCII
264 // character that was canonicalized to a dot.
265 //
266 // Brute-force search from the end by repeatedly canonicalizing longer
267 // substrings until we get a match for the canonicalized version. This
268 // depends on the canonicalization process not changing the order of the
269 // characters. Punycode can change the order of characters, but it
270 // doesn't work across dots so this is safe.
271
272 // Expected canonical registry controlled domain.
273 base::StringPiece canonical_rcd(&canonical_host[canonical_rcd_begin],
274 canonical_rcd_len);
275
276 for (int current_try = static_cast<int>(mapping.original_end) - 1;
Peter Kasting 2016/10/22 05:04:19 Can't we binary-search this instead of linear-sear
brettw 2016/10/24 21:45:24 That doesn't work. Canonicalization can make the r
Peter Kasting 2016/10/24 23:04:32 Yeah, you're right. I thought about this some but
brettw 2016/10/25 20:28:17 Done.
277 current_try >= static_cast<int>(mapping.original_begin);
Peter Kasting 2016/10/22 05:04:19 Seems like we could get by with ">" instead of ">=
Peter Kasting 2016/10/25 01:33:32 Still wondering about this.
brettw 2016/10/25 20:28:17 You're correct, but in the context of a loop count
Peter Kasting 2016/10/25 20:37:53 Maybe write '>' with a comment saying we already c
278 current_try--) {
Peter Kasting 2016/10/22 05:04:19 Nit: Predecrement
brettw 2016/10/24 21:45:24 I don't understand this comment. Are you asking be
Peter Kasting 2016/10/24 23:04:32 There's no logic problem, I was suggesting using p
brettw 2016/10/25 20:28:17 I always write postfix for scalars because I think
Peter Kasting 2016/10/25 20:37:53 This surprised me so I tried to look myself: http
279 std::string try_string;
280 url::StdStringCanonOutput try_output(&try_string);
281
282 if (!url::CanonicalizeHostSubstring(
283 host.data(),
284 url::Component(
285 current_try,
286 static_cast<int>(mapping.original_end) - current_try),
287 &try_output))
288 continue; // Invalid substring, skip.
289
290 try_output.Complete();
291 if (try_string == canonical_rcd)
292 return host.size() - current_try;
293 }
294 }
295 }
296
297 NOTREACHED();
298 return canonical_rcd_len;
299 }
300
192 } // namespace 301 } // namespace
193 302
194 std::string GetDomainAndRegistry(const GURL& gurl, 303 std::string GetDomainAndRegistry(const GURL& gurl,
195 PrivateRegistryFilter filter) { 304 PrivateRegistryFilter filter) {
196 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string(); 305 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string();
197 } 306 }
198 307
199 std::string GetDomainAndRegistry(base::StringPiece host, 308 std::string GetDomainAndRegistry(base::StringPiece host,
200 PrivateRegistryFilter filter) { 309 PrivateRegistryFilter filter) {
201 url::CanonHostInfo host_info; 310 url::CanonHostInfo host_info;
(...skipping 29 matching lines...) Expand all
231 bool SameDomainOrHost(const url::Origin& origin1, 340 bool SameDomainOrHost(const url::Origin& origin1,
232 const url::Origin& origin2, 341 const url::Origin& origin2,
233 PrivateRegistryFilter filter) { 342 PrivateRegistryFilter filter) {
234 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter); 343 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter);
235 } 344 }
236 345
237 size_t GetRegistryLength( 346 size_t GetRegistryLength(
238 const GURL& gurl, 347 const GURL& gurl,
239 UnknownRegistryFilter unknown_filter, 348 UnknownRegistryFilter unknown_filter,
240 PrivateRegistryFilter private_filter) { 349 PrivateRegistryFilter private_filter) {
241 base::StringPiece host = gurl.host_piece(); 350 return GetRegistryLengthImpl(gurl.host_piece(), unknown_filter,
242 if (host.empty()) 351 private_filter);
243 return std::string::npos;
244 if (gurl.HostIsIPAddress())
Peter Kasting 2016/10/22 05:04:19 I assume deleting this is not a behavior change be
brettw 2016/10/24 21:45:24 I did this because it can never match, and decodin
245 return 0;
246 return GetRegistryLengthImpl(host, unknown_filter, private_filter);
247 } 352 }
248 353
249 size_t GetRegistryLength(base::StringPiece host, 354 bool HostHasRegistryControlledDomain(base::StringPiece host,
250 UnknownRegistryFilter unknown_filter, 355 UnknownRegistryFilter unknown_filter,
251 PrivateRegistryFilter private_filter) { 356 PrivateRegistryFilter private_filter) {
252 url::CanonHostInfo host_info; 357 url::CanonHostInfo host_info;
253 const std::string canon_host(CanonicalizeHost(host, &host_info)); 358 const std::string canon_host(CanonicalizeHost(host, &host_info));
254 if (canon_host.empty()) 359 switch (host_info.family) {
255 return std::string::npos; 360 case url::CanonHostInfo::IPV4:
256 if (host_info.IsIPAddress()) 361 case url::CanonHostInfo::IPV6:
257 return 0; 362 // IP addresses don't have R.C.D.'s.
363 return false;
364 case url::CanonHostInfo::BROKEN:
365 // Host is not canonicalizable. Fall back to the slower "permissive"
366 // version.
367 return PermissiveGetHostRegistryLength(host, unknown_filter,
368 private_filter) > 0;
Peter Kasting 2016/10/22 05:04:19 "> 0" doesn't seem right; we want to return false
brettw 2016/10/24 21:45:23 Doth done, thanks for catching. I added some unit
369 case url::CanonHostInfo::NEUTRAL:
370 return GetRegistryLengthImpl(host, unknown_filter, private_filter) > 0;
Peter Kasting 2016/10/22 05:04:19 Same question.
371 default:
372 NOTREACHED();
373 return false
Peter Kasting 2016/10/22 05:04:19 Semicolon
374 }
375 }
376
377 size_t GetCanonicalHostRegistryLength(base::StringPiece canon_host,
378 UnknownRegistryFilter unknown_filter,
379 PrivateRegistryFilter private_filter) {
380 #ifndef NDEBUG
381 // Ensure passed-in host name is canonical.
382 url::CanonHostInfo host_info;
383 DCHECK_EQ(net::CanonicalizeHost(canon_host, &host_info), canon_host);
384 #endif
385
258 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 386 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
259 } 387 }
260 388
389 size_t PermissiveGetHostRegistryLength(base::StringPiece host,
390 UnknownRegistryFilter unknown_filter,
391 PrivateRegistryFilter private_filter) {
392 return DoPermissiveGetHostRegistryLength<std::string>(host, unknown_filter,
393 private_filter);
394 }
395
396 size_t PermissiveGetHostRegistryLength(base::StringPiece16 host,
397 UnknownRegistryFilter unknown_filter,
398 PrivateRegistryFilter private_filter) {
399 return DoPermissiveGetHostRegistryLength<base::string16>(host, unknown_filter,
400 private_filter);
401 }
402
261 void SetFindDomainGraph() { 403 void SetFindDomainGraph() {
262 g_graph = kDafsa; 404 g_graph = kDafsa;
263 g_graph_length = sizeof(kDafsa); 405 g_graph_length = sizeof(kDafsa);
264 } 406 }
265 407
266 void SetFindDomainGraph(const unsigned char* domains, size_t length) { 408 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
267 CHECK(domains); 409 CHECK(domains);
268 CHECK_NE(length, 0u); 410 CHECK_NE(length, 0u);
269 g_graph = domains; 411 g_graph = domains;
270 g_graph_length = length; 412 g_graph_length = length;
271 } 413 }
272 414
273 } // namespace registry_controlled_domains 415 } // namespace registry_controlled_domains
274 } // namespace net 416 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698