Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1055)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2451353002: Reduce buggy usage of the registry controlled domain service. (Closed)
Patch Set: Fix Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 namespace registry_controlled_domains { 59 namespace registry_controlled_domains {
60 60
61 namespace { 61 namespace {
62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" 62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
63 63
64 // See make_dafsa.py for documentation of the generated dafsa byte array. 64 // See make_dafsa.py for documentation of the generated dafsa byte array.
65 65
66 const unsigned char* g_graph = kDafsa; 66 const unsigned char* g_graph = kDafsa;
67 size_t g_graph_length = sizeof(kDafsa); 67 size_t g_graph_length = sizeof(kDafsa);
68 68
69 struct MappedHostComponent {
70 size_t original_begin;
71 size_t original_end;
72
73 size_t canonical_begin;
74 size_t canonical_end;
75 };
76
69 size_t GetRegistryLengthImpl(base::StringPiece host, 77 size_t GetRegistryLengthImpl(base::StringPiece host,
70 UnknownRegistryFilter unknown_filter, 78 UnknownRegistryFilter unknown_filter,
71 PrivateRegistryFilter private_filter) { 79 PrivateRegistryFilter private_filter) {
72 DCHECK(!host.empty()); 80 if (host.empty())
81 return std::string::npos;
73 82
74 // Skip leading dots. 83 // Skip leading dots.
75 const size_t host_check_begin = host.find_first_not_of('.'); 84 const size_t host_check_begin = host.find_first_not_of('.');
76 if (host_check_begin == std::string::npos) 85 if (host_check_begin == std::string::npos)
77 return 0; // Host is only dots. 86 return 0; // Host is only dots.
78 87
79 // A single trailing dot isn't relevant in this determination, but does need 88 // A single trailing dot isn't relevant in this determination, but does need
80 // to be included in the final returned length. 89 // to be included in the final returned length.
81 size_t host_check_len = host.length(); 90 size_t host_check_len = host.length();
82 if (host[host_check_len - 1] == '.') { 91 if (host[host_check_len - 1] == '.') {
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
182 // interface type for all the APIs. 191 // interface type for all the APIs.
183 base::StringPiece GetDomainAndRegistryAsStringPiece( 192 base::StringPiece GetDomainAndRegistryAsStringPiece(
184 const GURL& gurl, 193 const GURL& gurl,
185 PrivateRegistryFilter filter) { 194 PrivateRegistryFilter filter) {
186 base::StringPiece host = gurl.host_piece(); 195 base::StringPiece host = gurl.host_piece();
187 if (host.empty() || gurl.HostIsIPAddress()) 196 if (host.empty() || gurl.HostIsIPAddress())
188 return base::StringPiece(); 197 return base::StringPiece();
189 return GetDomainAndRegistryImpl(host, filter); 198 return GetDomainAndRegistryImpl(host, filter);
190 } 199 }
191 200
201 // These two functions append the given string as-is to the given output,
202 // converting to UTF-8 if necessary.
203 void AppendInvalidString(base::StringPiece str, url::CanonOutput* output) {
204 output->Append(str.data(), static_cast<int>(str.length()));
205 }
206 void AppendInvalidString(base::StringPiece16 str, url::CanonOutput* output) {
207 std::string utf8 = base::UTF16ToUTF8(str);
208 output->Append(utf8.data(), static_cast<int>(utf8.length()));
209 }
210
211 // Backend for PermissiveGetHostRegistryLength that handles both UTF-8 and
212 // UTF-16 input. The template type is the std::string type to use (it makes the
213 // typedefs easier than using the character type).
214 template <typename Str>
215 size_t DoPermissiveGetHostRegistryLength(base::BasicStringPiece<Str> host,
216 UnknownRegistryFilter unknown_filter,
217 PrivateRegistryFilter private_filter) {
218 std::string canonical_host; // Do not modify outside of canon_output.
219 canonical_host.reserve(host.length());
220 url::StdStringCanonOutput canon_output(&canonical_host);
221
222 std::vector<MappedHostComponent> components;
223
224 for (size_t current = 0; current < host.length(); current++) {
225 size_t begin = current;
226
227 // Advance to next "." or end.
228 current = host.find('.', begin);
229 if (current == std::string::npos)
230 current = host.length();
231
232 MappedHostComponent mapping;
233 mapping.original_begin = begin;
234 mapping.original_end = current;
235 mapping.canonical_begin = static_cast<size_t>(canon_output.length());
236
237 // Try to append the canonicalized version of this component.
238 int current_len = static_cast<int>(current - begin);
239 if (!url::CanonicalizeHostSubstring(
240 host.data(), url::Component(static_cast<int>(begin), current_len),
241 &canon_output)) {
242 // Failed to canonicalize this component; append as-is.
243 AppendInvalidString(host.substr(begin, current_len), &canon_output);
244 }
245
246 mapping.canonical_end = static_cast<size_t>(canon_output.length());
247 components.push_back(mapping);
248
249 if (current < host.length())
250 canon_output.push_back('.');
251 }
252 canon_output.Complete();
253
254 size_t canonical_rcd_len =
255 GetRegistryLengthImpl(canonical_host, unknown_filter, private_filter);
256 if (canonical_rcd_len == 0 || canonical_rcd_len == std::string::npos)
257 return canonical_rcd_len; // Error or no registry controlled domain.
258
259 // Find which host component the result started in.
260 size_t canonical_rcd_begin = canonical_host.length() - canonical_rcd_len;
261 for (const auto& mapping : components) {
262 // In the common case, GetRegistryLengthImpl will identify the beginning
263 // of a component and we can just return where that component was in the
264 // original string.
265 if (canonical_rcd_begin == mapping.canonical_begin)
266 return host.length() - mapping.original_begin;
267
268 if (canonical_rcd_begin >= mapping.canonical_end)
269 continue;
270
271 // The registry controlled domain begin was identified as being in the
272 // middle of this dot-separated domain component in the non-canonical
273 // input. This indicates some form of escaped dot, or a non-ASCII
274 // character that was canonicalized to a dot.
275 //
276 // Brute-force search from the end by repeatedly canonicalizing longer
277 // substrings until we get a match for the canonicalized version. This
278 // can't be done with binary search because canonicalization might increase
279 // or decrease the length of the produced string depending on where it's
280 // split. This depends on the canonicalization process not changing the
281 // order of the characters. Punycode can change the order of characters,
282 // but it doesn't work across dots so this is safe.
283
284 // Expected canonical registry controlled domain.
285 base::StringPiece canonical_rcd(&canonical_host[canonical_rcd_begin],
286 canonical_rcd_len);
287
288 for (int current_try = static_cast<int>(mapping.original_end) - 1;
289 current_try >= static_cast<int>(mapping.original_begin);
290 current_try--) {
291 std::string try_string;
292 url::StdStringCanonOutput try_output(&try_string);
293
294 if (!url::CanonicalizeHostSubstring(
295 host.data(),
296 url::Component(
297 current_try,
298 static_cast<int>(mapping.original_end) - current_try),
299 &try_output))
300 continue; // Invalid substring, skip.
301
302 try_output.Complete();
303 if (try_string == canonical_rcd)
304 return host.length() - current_try;
305 }
306 }
307
308 NOTREACHED();
309 return canonical_rcd_len;
310 }
311
192 } // namespace 312 } // namespace
193 313
194 std::string GetDomainAndRegistry(const GURL& gurl, 314 std::string GetDomainAndRegistry(const GURL& gurl,
195 PrivateRegistryFilter filter) { 315 PrivateRegistryFilter filter) {
196 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string(); 316 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string();
197 } 317 }
198 318
199 std::string GetDomainAndRegistry(base::StringPiece host, 319 std::string GetDomainAndRegistry(base::StringPiece host,
200 PrivateRegistryFilter filter) { 320 PrivateRegistryFilter filter) {
201 url::CanonHostInfo host_info; 321 url::CanonHostInfo host_info;
(...skipping 29 matching lines...) Expand all
231 bool SameDomainOrHost(const url::Origin& origin1, 351 bool SameDomainOrHost(const url::Origin& origin1,
232 const url::Origin& origin2, 352 const url::Origin& origin2,
233 PrivateRegistryFilter filter) { 353 PrivateRegistryFilter filter) {
234 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter); 354 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter);
235 } 355 }
236 356
237 size_t GetRegistryLength( 357 size_t GetRegistryLength(
238 const GURL& gurl, 358 const GURL& gurl,
239 UnknownRegistryFilter unknown_filter, 359 UnknownRegistryFilter unknown_filter,
240 PrivateRegistryFilter private_filter) { 360 PrivateRegistryFilter private_filter) {
241 base::StringPiece host = gurl.host_piece(); 361 return GetRegistryLengthImpl(gurl.host_piece(), unknown_filter,
242 if (host.empty()) 362 private_filter);
243 return std::string::npos;
244 if (gurl.HostIsIPAddress())
245 return 0;
246 return GetRegistryLengthImpl(host, unknown_filter, private_filter);
247 } 363 }
248 364
249 size_t GetRegistryLength(base::StringPiece host, 365 bool HostHasRegistryControlledDomain(base::StringPiece host,
250 UnknownRegistryFilter unknown_filter, 366 UnknownRegistryFilter unknown_filter,
251 PrivateRegistryFilter private_filter) { 367 PrivateRegistryFilter private_filter) {
252 url::CanonHostInfo host_info; 368 url::CanonHostInfo host_info;
253 const std::string canon_host(CanonicalizeHost(host, &host_info)); 369 const std::string canon_host(CanonicalizeHost(host, &host_info));
254 if (canon_host.empty()) 370
255 return std::string::npos; 371 size_t rcd_length;
256 if (host_info.IsIPAddress()) 372 switch (host_info.family) {
257 return 0; 373 case url::CanonHostInfo::IPV4:
374 case url::CanonHostInfo::IPV6:
375 // IP addresses don't have R.C.D.'s.
376 return false;
377 case url::CanonHostInfo::BROKEN:
378 // Host is not canonicalizable. Fall back to the slower "permissive"
379 // version.
380 rcd_length =
381 PermissiveGetHostRegistryLength(host, unknown_filter, private_filter);
382 break;
383 case url::CanonHostInfo::NEUTRAL:
384 rcd_length =
385 GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
386 break;
387 default:
388 NOTREACHED();
389 return false;
390 }
391 return (rcd_length != 0) && (rcd_length != std::string::npos);
392 }
393
394 size_t GetCanonicalHostRegistryLength(base::StringPiece canon_host,
395 UnknownRegistryFilter unknown_filter,
396 PrivateRegistryFilter private_filter) {
397 #ifndef NDEBUG
398 // Ensure passed-in host name is canonical.
399 url::CanonHostInfo host_info;
400 DCHECK_EQ(net::CanonicalizeHost(canon_host, &host_info), canon_host);
401 #endif
402
258 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 403 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
259 } 404 }
260 405
406 size_t PermissiveGetHostRegistryLength(base::StringPiece host,
407 UnknownRegistryFilter unknown_filter,
408 PrivateRegistryFilter private_filter) {
409 return DoPermissiveGetHostRegistryLength<std::string>(host, unknown_filter,
410 private_filter);
411 }
412
413 size_t PermissiveGetHostRegistryLength(base::StringPiece16 host,
414 UnknownRegistryFilter unknown_filter,
415 PrivateRegistryFilter private_filter) {
416 return DoPermissiveGetHostRegistryLength<base::string16>(host, unknown_filter,
417 private_filter);
418 }
419
261 void SetFindDomainGraph() { 420 void SetFindDomainGraph() {
262 g_graph = kDafsa; 421 g_graph = kDafsa;
263 g_graph_length = sizeof(kDafsa); 422 g_graph_length = sizeof(kDafsa);
264 } 423 }
265 424
266 void SetFindDomainGraph(const unsigned char* domains, size_t length) { 425 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
267 CHECK(domains); 426 CHECK(domains);
268 CHECK_NE(length, 0u); 427 CHECK_NE(length, 0u);
269 g_graph = domains; 428 g_graph = domains;
270 g_graph_length = length; 429 g_graph_length = length;
271 } 430 }
272 431
273 } // namespace registry_controlled_domains 432 } // namespace registry_controlled_domains
274 } // namespace net 433 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698