Index: net/base/net_util.h |
diff --git a/net/base/net_util.h b/net/base/net_util.h |
index 597e52e2f71efb7ca43d30687c7ee9b2d21f9d55..628abe2095f67d89840f1cd3ae8731e7e51f29ec 100644 |
--- a/net/base/net_util.h |
+++ b/net/base/net_util.h |
@@ -48,10 +48,30 @@ |
// Keep this in sync. |
typedef std::vector<unsigned char> IPAddressNumber; |
+// Used by FormatUrl to specify handling of certain parts of the url. |
+typedef uint32_t FormatUrlType; |
+typedef uint32_t FormatUrlTypes; |
+ |
#if defined(OS_WIN) |
// Bluetooth address size. Windows Bluetooth is supported via winsock. |
static const size_t kBluetoothAddressSize = 6; |
#endif |
+ |
+// Nothing is ommitted. |
+NET_EXPORT extern const FormatUrlType kFormatUrlOmitNothing; |
+ |
+// If set, any username and password are removed. |
+NET_EXPORT extern const FormatUrlType kFormatUrlOmitUsernamePassword; |
+ |
+// If the scheme is 'http://', it's removed. |
+NET_EXPORT extern const FormatUrlType kFormatUrlOmitHTTP; |
+ |
+// Omits the path if it is just a slash and there is no query or ref. This is |
+// meaningful for non-file "standard" URLs. |
+NET_EXPORT extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; |
+ |
+// Convenience for omitting all unecessary types. |
+NET_EXPORT extern const FormatUrlType kFormatUrlOmitAll; |
// Splits an input of the form <host>[":"<port>] into its consitituent parts. |
// Saves the result into |*host| and |*port|. If the input did not have |
@@ -127,6 +147,24 @@ |
// Returns either the host from |url|, or, if the host is empty, the full spec. |
NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url); |
+// Converts the given host name to unicode characters. This can be called for |
+// any host name, if the input is not IDN or is invalid in some way, we'll just |
+// return the ASCII source so it is still usable. |
+// |
+// The input should be the canonicalized ASCII host name from GURL. This |
+// function does NOT accept UTF-8! |
+// |
+// |languages| is a comma separated list of ISO 639 language codes. It |
+// is used to determine whether a hostname is 'comprehensible' to a user |
+// who understands languages listed. |host| will be converted to a |
+// human-readable form (Unicode) ONLY when each component of |host| is |
+// regarded as 'comprehensible'. Scipt-mixing is not allowed except that |
+// Latin letters in the ASCII range can be mixed with a limited set of |
+// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). |
+// When |languages| is empty, even that mixing is not allowed. |
+NET_EXPORT base::string16 IDNToUnicode(const std::string& host, |
+ const std::string& languages); |
+ |
// Canonicalizes |host| and returns it. Also fills |host_info| with |
// IP address information. |host_info| must not be NULL. |
NET_EXPORT std::string CanonicalizeHost(const std::string& host, |
@@ -175,6 +213,84 @@ |
// Set socket to non-blocking mode |
NET_EXPORT int SetNonBlocking(int fd); |
+ |
+// Formats the host in |url| and appends it to |output|. The host formatter |
+// takes the same accept languages component as ElideURL(). |
+NET_EXPORT void AppendFormattedHost(const GURL& url, |
+ const std::string& languages, |
+ base::string16* output); |
+ |
+// Creates a string representation of |url|. The IDN host name may be in Unicode |
+// if |languages| accepts the Unicode representation. |format_type| is a bitmask |
+// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean |
+// the URL for human readability. You will generally want |UnescapeRule::SPACES| |
+// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| |
+// if not. If the path part and the query part seem to be encoded in %-encoded |
+// UTF-8, decodes %-encoding and UTF-8. |
+// |
+// The last three parameters may be NULL. |
+// |
+// |new_parsed| will be set to the parsing parameters of the resultant URL. |
+// |
+// |prefix_end| will be the length before the hostname of the resultant URL. |
+// |
+// |offset[s]_for_adjustment| specifies one or more offsets into the original |
+// URL, representing insertion or selection points between characters: if the |
+// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is |
+// between the scheme and the host, and offset 15 is after the end of the URL. |
+// Valid input offsets range from 0 to the length of the input URL string. On |
+// exit, each offset will have been modified to reflect any changes made to the |
+// output string. For example, if |url| is "http://a:b@c.com/", |
+// |omit_username_password| is true, and an offset is 12 (pointing between 'c' |
+// and '.'), then on return the output string will be "http://c.com/" and the |
+// offset will be 8. If an offset cannot be successfully adjusted (e.g. because |
+// it points into the middle of a component that was entirely removed or into |
+// the middle of an encoding sequence), it will be set to base::string16::npos. |
+// For consistency, if an input offset points between the scheme and the |
+// username/password, and both are removed, on output this offset will be 0 |
+// rather than npos; this means that offsets at the starts and ends of removed |
+// components are always transformed the same way regardless of what other |
+// components are adjacent. |
+NET_EXPORT base::string16 FormatUrl(const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ size_t* offset_for_adjustment); |
+NET_EXPORT base::string16 FormatUrlWithOffsets( |
+ const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ std::vector<size_t>* offsets_for_adjustment); |
+// This function is like those above except it takes |adjustments| rather |
+// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all |
+// the transformations that happened to |url| to convert it into the returned |
+// value. |
+NET_EXPORT base::string16 FormatUrlWithAdjustments( |
+ const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ base::OffsetAdjuster::Adjustments* adjustments); |
+ |
+// This is a convenience function for FormatUrl() with |
+// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical |
+// set of flags for "URLs to display to the user". You should be cautious about |
+// using this for URLs which will be parsed or sent to other applications. |
+inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { |
+ return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES, |
+ NULL, NULL, NULL); |
+} |
+ |
+// Returns whether FormatUrl() would strip a trailing slash from |url|, given a |
+// format flag including kFormatUrlOmitTrailingSlashOnBareHostname. |
+NET_EXPORT bool CanStripTrailingSlash(const GURL& url); |
// Strip the portions of |url| that aren't core to the network request. |
// - user name / password |