Index: url/url_canon.h |
diff --git a/url/url_canon.h b/url/url_canon.h |
index 95d53453f64362efc98d8141c11691573eaae165..c4852e490b05b0286b463ab23eca0c238eeec52a 100644 |
--- a/url/url_canon.h |
+++ b/url/url_canon.h |
@@ -379,6 +379,33 @@ URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec, |
CanonOutput* output, |
CanonHostInfo* host_info); |
+// Canonicalizes a string according to the host canonicalization rules. Unlike |
+// CanonicalizeHost, this will not check for IP addresses which can change the |
+// meaning (and canonicalization) of the components. This means it is possible |
+// to call this for sub-components of a host name without corruption. |
+// |
+// As an example, "01.02.03.04.com" is a canonical hostname. If you called |
+// CanonicalizeHost on the substring "01.02.03.04" it will get "fixed" to |
+// "1.2.3.4" which will produce an invalid host name when reassembled. This |
+// can happen more than one might think because all numbers by themselves are |
+// considered IP addresses; so "5" canonicalizes to "0.0.0.5". |
+// |
+// Be careful: Because Punycode works on each dot-separated substring as a |
+// unit, you should only pass this function substrings that represent complete |
+// dot-separated subcomponents of the original host. Even if you have ASCII |
+// input, percent-escaped characters will have different meanings if split in |
+// the middle. |
+// |
+// Returns true if the host was valid. This function will treat a 0-length |
+// host as valid (because it's designed to be used for substrings) while the |
+// full version above will mark empty hosts as broken. |
+URL_EXPORT bool CanonicalizeHostSubstring(const char* spec, |
+ const Component& host, |
+ CanonOutput* output); |
+URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec, |
+ const Component& host, |
+ CanonOutput* output); |
+ |
// IP addresses. |
// |
// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is |