Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 2454553002: Revert of Reduce buggy usage of the registry controlled domain service. (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 namespace registry_controlled_domains { 59 namespace registry_controlled_domains {
60 60
61 namespace { 61 namespace {
62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc" 62 #include "net/base/registry_controlled_domains/effective_tld_names-inc.cc"
63 63
64 // See make_dafsa.py for documentation of the generated dafsa byte array. 64 // See make_dafsa.py for documentation of the generated dafsa byte array.
65 65
66 const unsigned char* g_graph = kDafsa; 66 const unsigned char* g_graph = kDafsa;
67 size_t g_graph_length = sizeof(kDafsa); 67 size_t g_graph_length = sizeof(kDafsa);
68 68
69 struct MappedHostComponent {
70 size_t original_begin;
71 size_t original_end;
72
73 size_t canonical_begin;
74 size_t canonical_end;
75 };
76
77 size_t GetRegistryLengthImpl(base::StringPiece host, 69 size_t GetRegistryLengthImpl(base::StringPiece host,
78 UnknownRegistryFilter unknown_filter, 70 UnknownRegistryFilter unknown_filter,
79 PrivateRegistryFilter private_filter) { 71 PrivateRegistryFilter private_filter) {
80 if (host.empty()) 72 DCHECK(!host.empty());
81 return std::string::npos;
82 73
83 // Skip leading dots. 74 // Skip leading dots.
84 const size_t host_check_begin = host.find_first_not_of('.'); 75 const size_t host_check_begin = host.find_first_not_of('.');
85 if (host_check_begin == std::string::npos) 76 if (host_check_begin == std::string::npos)
86 return 0; // Host is only dots. 77 return 0; // Host is only dots.
87 78
88 // A single trailing dot isn't relevant in this determination, but does need 79 // A single trailing dot isn't relevant in this determination, but does need
89 // to be included in the final returned length. 80 // to be included in the final returned length.
90 size_t host_check_len = host.length(); 81 size_t host_check_len = host.length();
91 if (host[host_check_len - 1] == '.') { 82 if (host[host_check_len - 1] == '.') {
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 // interface type for all the APIs. 182 // interface type for all the APIs.
192 base::StringPiece GetDomainAndRegistryAsStringPiece( 183 base::StringPiece GetDomainAndRegistryAsStringPiece(
193 const GURL& gurl, 184 const GURL& gurl,
194 PrivateRegistryFilter filter) { 185 PrivateRegistryFilter filter) {
195 base::StringPiece host = gurl.host_piece(); 186 base::StringPiece host = gurl.host_piece();
196 if (host.empty() || gurl.HostIsIPAddress()) 187 if (host.empty() || gurl.HostIsIPAddress())
197 return base::StringPiece(); 188 return base::StringPiece();
198 return GetDomainAndRegistryImpl(host, filter); 189 return GetDomainAndRegistryImpl(host, filter);
199 } 190 }
200 191
201 // These two functions append the given string as-is to the given output,
202 // converting to UTF-8 if necessary.
203 void AppendInvalidString(base::StringPiece str, url::CanonOutput* output) {
204 output->Append(str.data(), static_cast<int>(str.length()));
205 }
206 void AppendInvalidString(base::StringPiece16 str, url::CanonOutput* output) {
207 std::string utf8 = base::UTF16ToUTF8(str);
208 output->Append(utf8.data(), static_cast<int>(utf8.length()));
209 }
210
211 // Backend for PermissiveGetHostRegistryLength that handles both UTF-8 and
212 // UTF-16 input. The template type is the std::string type to use (it makes the
213 // typedefs easier than using the character type).
214 template <typename Str>
215 size_t DoPermissiveGetHostRegistryLength(base::BasicStringPiece<Str> host,
216 UnknownRegistryFilter unknown_filter,
217 PrivateRegistryFilter private_filter) {
218 std::string canonical_host; // Do not modify outside of canon_output.
219 canonical_host.reserve(host.length());
220 url::StdStringCanonOutput canon_output(&canonical_host);
221
222 std::vector<MappedHostComponent> components;
223
224 for (size_t current = 0; current < host.length(); current++) {
225 size_t begin = current;
226
227 // Advance to next "." or end.
228 current = host.find('.', begin);
229 if (current == std::string::npos)
230 current = host.length();
231
232 MappedHostComponent mapping;
233 mapping.original_begin = begin;
234 mapping.original_end = current;
235 mapping.canonical_begin = static_cast<size_t>(canon_output.length());
236
237 // Try to append the canonicalized version of this component.
238 int current_len = static_cast<int>(current - begin);
239 if (!url::CanonicalizeHostSubstring(
240 host.data(), url::Component(static_cast<int>(begin), current_len),
241 &canon_output)) {
242 // Failed to canonicalize this component; append as-is.
243 AppendInvalidString(host.substr(begin, current_len), &canon_output);
244 }
245
246 mapping.canonical_end = static_cast<size_t>(canon_output.length());
247 components.push_back(mapping);
248
249 if (current < host.length())
250 canon_output.push_back('.');
251 }
252 canon_output.Complete();
253
254 size_t canonical_rcd_len =
255 GetRegistryLengthImpl(canonical_host, unknown_filter, private_filter);
256 if (canonical_rcd_len == 0 || canonical_rcd_len == std::string::npos)
257 return canonical_rcd_len; // Error or no registry controlled domain.
258
259 // Find which host component the result started in.
260 size_t canonical_rcd_begin = canonical_host.length() - canonical_rcd_len;
261 for (const auto& mapping : components) {
262 // In the common case, GetRegistryLengthImpl will identify the beginning
263 // of a component and we can just return where that component was in the
264 // original string.
265 if (canonical_rcd_begin == mapping.canonical_begin)
266 return host.length() - mapping.original_begin;
267
268 if (canonical_rcd_begin >= mapping.canonical_end)
269 continue;
270
271 // The registry controlled domain begin was identified as being in the
272 // middle of this dot-separated domain component in the non-canonical
273 // input. This indicates some form of escaped dot, or a non-ASCII
274 // character that was canonicalized to a dot.
275 //
276 // Brute-force search from the end by repeatedly canonicalizing longer
277 // substrings until we get a match for the canonicalized version. This
278 // can't be done with binary search because canonicalization might increase
279 // or decrease the length of the produced string depending on where it's
280 // split. This depends on the canonicalization process not changing the
281 // order of the characters. Punycode can change the order of characters,
282 // but it doesn't work across dots so this is safe.
283
284 // Expected canonical registry controlled domain.
285 base::StringPiece canonical_rcd(&canonical_host[canonical_rcd_begin],
286 canonical_rcd_len);
287
288 for (int current_try = static_cast<int>(mapping.original_end) - 1;
289 current_try >= static_cast<int>(mapping.original_begin);
290 current_try--) {
291 std::string try_string;
292 url::StdStringCanonOutput try_output(&try_string);
293
294 if (!url::CanonicalizeHostSubstring(
295 host.data(),
296 url::Component(
297 current_try,
298 static_cast<int>(mapping.original_end) - current_try),
299 &try_output))
300 continue; // Invalid substring, skip.
301
302 try_output.Complete();
303 if (try_string == canonical_rcd)
304 return host.length() - current_try;
305 }
306 }
307
308 NOTREACHED();
309 return canonical_rcd_len;
310 }
311
312 } // namespace 192 } // namespace
313 193
314 std::string GetDomainAndRegistry(const GURL& gurl, 194 std::string GetDomainAndRegistry(const GURL& gurl,
315 PrivateRegistryFilter filter) { 195 PrivateRegistryFilter filter) {
316 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string(); 196 return GetDomainAndRegistryAsStringPiece(gurl, filter).as_string();
317 } 197 }
318 198
319 std::string GetDomainAndRegistry(base::StringPiece host, 199 std::string GetDomainAndRegistry(base::StringPiece host,
320 PrivateRegistryFilter filter) { 200 PrivateRegistryFilter filter) {
321 url::CanonHostInfo host_info; 201 url::CanonHostInfo host_info;
(...skipping 29 matching lines...) Expand all
351 bool SameDomainOrHost(const url::Origin& origin1, 231 bool SameDomainOrHost(const url::Origin& origin1,
352 const url::Origin& origin2, 232 const url::Origin& origin2,
353 PrivateRegistryFilter filter) { 233 PrivateRegistryFilter filter) {
354 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter); 234 return SameDomainOrHost(origin1.GetURL(), origin2.GetURL(), filter);
355 } 235 }
356 236
357 size_t GetRegistryLength( 237 size_t GetRegistryLength(
358 const GURL& gurl, 238 const GURL& gurl,
359 UnknownRegistryFilter unknown_filter, 239 UnknownRegistryFilter unknown_filter,
360 PrivateRegistryFilter private_filter) { 240 PrivateRegistryFilter private_filter) {
361 return GetRegistryLengthImpl(gurl.host_piece(), unknown_filter, 241 base::StringPiece host = gurl.host_piece();
362 private_filter); 242 if (host.empty())
243 return std::string::npos;
244 if (gurl.HostIsIPAddress())
245 return 0;
246 return GetRegistryLengthImpl(host, unknown_filter, private_filter);
363 } 247 }
364 248
365 bool HostHasRegistryControlledDomain(base::StringPiece host, 249 size_t GetRegistryLength(base::StringPiece host,
366 UnknownRegistryFilter unknown_filter, 250 UnknownRegistryFilter unknown_filter,
367 PrivateRegistryFilter private_filter) { 251 PrivateRegistryFilter private_filter) {
368 url::CanonHostInfo host_info; 252 url::CanonHostInfo host_info;
369 const std::string canon_host(CanonicalizeHost(host, &host_info)); 253 const std::string canon_host(CanonicalizeHost(host, &host_info));
370 254 if (canon_host.empty())
371 size_t rcd_length; 255 return std::string::npos;
372 switch (host_info.family) { 256 if (host_info.IsIPAddress())
373 case url::CanonHostInfo::IPV4: 257 return 0;
374 case url::CanonHostInfo::IPV6:
375 // IP addresses don't have R.C.D.'s.
376 return false;
377 case url::CanonHostInfo::BROKEN:
378 // Host is not canonicalizable. Fall back to the slower "permissive"
379 // version.
380 rcd_length =
381 PermissiveGetHostRegistryLength(host, unknown_filter, private_filter);
382 break;
383 case url::CanonHostInfo::NEUTRAL:
384 rcd_length =
385 GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
386 break;
387 default:
388 NOTREACHED();
389 return false;
390 }
391 return (rcd_length != 0) && (rcd_length != std::string::npos);
392 }
393
394 size_t GetCanonicalHostRegistryLength(base::StringPiece canon_host,
395 UnknownRegistryFilter unknown_filter,
396 PrivateRegistryFilter private_filter) {
397 #ifndef NDEBUG
398 // Ensure passed-in host name is canonical.
399 url::CanonHostInfo host_info;
400 DCHECK_EQ(net::CanonicalizeHost(canon_host, &host_info), canon_host);
401 #endif
402
403 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 258 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
404 } 259 }
405 260
406 size_t PermissiveGetHostRegistryLength(base::StringPiece host,
407 UnknownRegistryFilter unknown_filter,
408 PrivateRegistryFilter private_filter) {
409 return DoPermissiveGetHostRegistryLength<std::string>(host, unknown_filter,
410 private_filter);
411 }
412
413 size_t PermissiveGetHostRegistryLength(base::StringPiece16 host,
414 UnknownRegistryFilter unknown_filter,
415 PrivateRegistryFilter private_filter) {
416 return DoPermissiveGetHostRegistryLength<base::string16>(host, unknown_filter,
417 private_filter);
418 }
419
420 void SetFindDomainGraph() { 261 void SetFindDomainGraph() {
421 g_graph = kDafsa; 262 g_graph = kDafsa;
422 g_graph_length = sizeof(kDafsa); 263 g_graph_length = sizeof(kDafsa);
423 } 264 }
424 265
425 void SetFindDomainGraph(const unsigned char* domains, size_t length) { 266 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
426 CHECK(domains); 267 CHECK(domains);
427 CHECK_NE(length, 0u); 268 CHECK_NE(length, 0u);
428 g_graph = domains; 269 g_graph = domains;
429 g_graph_length = length; 270 g_graph_length = length;
430 } 271 }
431 272
432 } // namespace registry_controlled_domains 273 } // namespace registry_controlled_domains
433 } // namespace net 274 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698