OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/url_util.h" | 5 #include "net/base/url_util.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/strings/string_util.h" | 8 #include "base/strings/string_util.h" |
9 #include "base/strings/stringprintf.h" | 9 #include "base/strings/stringprintf.h" |
10 #include "net/base/escape.h" | 10 #include "net/base/escape.h" |
| 11 #include "net/base/ip_address_number.h" |
| 12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
11 #include "url/gurl.h" | 13 #include "url/gurl.h" |
12 #include "url/third_party/mozilla/url_parse.h" | |
13 #include "url/url_canon.h" | 14 #include "url/url_canon.h" |
14 #include "url/url_canon_ip.h" | 15 #include "url/url_canon_ip.h" |
15 | 16 |
16 namespace net { | 17 namespace net { |
17 | 18 |
18 namespace { | 19 namespace { |
19 | 20 |
20 bool IsHostCharAlphanumeric(char c) { | 21 bool IsHostCharAlphanumeric(char c) { |
21 // We can just check lowercase because uppercase characters have already been | 22 // We can just check lowercase because uppercase characters have already been |
22 // normalized. | 23 // normalized. |
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
285 } else if (c == '.') { | 286 } else if (c == '.') { |
286 in_component = false; | 287 in_component = false; |
287 } else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) { | 288 } else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) { |
288 return false; | 289 return false; |
289 } | 290 } |
290 } | 291 } |
291 | 292 |
292 return most_recent_component_started_alphanumeric; | 293 return most_recent_component_started_alphanumeric; |
293 } | 294 } |
294 | 295 |
| 296 bool IsHostnameNonUnique(const std::string& hostname) { |
| 297 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address. |
| 298 const std::string host_or_ip = hostname.find(':') != std::string::npos ? |
| 299 "[" + hostname + "]" : hostname; |
| 300 url::CanonHostInfo host_info; |
| 301 std::string canonical_name = CanonicalizeHost(host_or_ip, &host_info); |
| 302 |
| 303 // If canonicalization fails, then the input is truly malformed. However, |
| 304 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique. |
| 305 if (canonical_name.empty()) |
| 306 return false; |
| 307 |
| 308 // If |hostname| is an IP address, check to see if it's in an IANA-reserved |
| 309 // range. |
| 310 if (host_info.IsIPAddress()) { |
| 311 IPAddressNumber host_addr; |
| 312 if (!ParseIPLiteralToNumber(hostname.substr(host_info.out_host.begin, |
| 313 host_info.out_host.len), |
| 314 &host_addr)) { |
| 315 return false; |
| 316 } |
| 317 switch (host_info.family) { |
| 318 case url::CanonHostInfo::IPV4: |
| 319 case url::CanonHostInfo::IPV6: |
| 320 return IsIPAddressReserved(host_addr); |
| 321 case url::CanonHostInfo::NEUTRAL: |
| 322 case url::CanonHostInfo::BROKEN: |
| 323 return false; |
| 324 } |
| 325 } |
| 326 |
| 327 // Check for a registry controlled portion of |hostname|, ignoring private |
| 328 // registries, as they already chain to ICANN-administered registries, |
| 329 // and explicitly ignoring unknown registries. |
| 330 // |
| 331 // Note: This means that as new gTLDs are introduced on the Internet, they |
| 332 // will be treated as non-unique until the registry controlled domain list |
| 333 // is updated. However, because gTLDs are expected to provide significant |
| 334 // advance notice to deprecate older versions of this code, this an |
| 335 // acceptable tradeoff. |
| 336 return 0 == registry_controlled_domains::GetRegistryLength( |
| 337 canonical_name, |
| 338 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 339 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 340 } |
| 341 |
295 GURL SimplifyUrlForRequest(const GURL& url) { | 342 GURL SimplifyUrlForRequest(const GURL& url) { |
296 DCHECK(url.is_valid()); | 343 DCHECK(url.is_valid()); |
297 GURL::Replacements replacements; | 344 GURL::Replacements replacements; |
298 replacements.ClearUsername(); | 345 replacements.ClearUsername(); |
299 replacements.ClearPassword(); | 346 replacements.ClearPassword(); |
300 replacements.ClearRef(); | 347 replacements.ClearRef(); |
301 return url.ReplaceComponents(replacements); | 348 return url.ReplaceComponents(replacements); |
302 } | 349 } |
303 | 350 |
304 void GetIdentityFromURL(const GURL& url, | 351 void GetIdentityFromURL(const GURL& url, |
(...skipping 25 matching lines...) Expand all Loading... |
330 // Here it's possible to get away with faster case-sensitive comparisons | 377 // Here it's possible to get away with faster case-sensitive comparisons |
331 // because the list above is all lowercase, and a GURL's host name will | 378 // because the list above is all lowercase, and a GURL's host name will |
332 // always be canonicalized to lowercase as well. | 379 // always be canonicalized to lowercase as well. |
333 if (base::EndsWith(host, suffix, base::CompareCase::SENSITIVE)) | 380 if (base::EndsWith(host, suffix, base::CompareCase::SENSITIVE)) |
334 return true; | 381 return true; |
335 } | 382 } |
336 return false; | 383 return false; |
337 } | 384 } |
338 | 385 |
339 } // namespace net | 386 } // namespace net |
OLD | NEW |