Index: components/url_formatter/url_formatter.h |
diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..01c8795ce0662e0edc465e1505e515923b26173b |
--- /dev/null |
+++ b/components/url_formatter/url_formatter.h |
@@ -0,0 +1,155 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// url_formatter contains routines for formatting URLs in a way that can be |
+// safely and securely displayed to users. For example, it is responsible |
+// for determining when to convert an IDN A-Label (e.g. "xn--[something]") |
+// into the IDN U-Label. |
+// |
+// Note that this formatting is only intended for display purposes; it would |
+// be insecure and insufficient to make comparisons solely on formatted URLs |
+// (that is, it should not be used for normalizing URLs for comparison for |
+// security decisions). |
+ |
+#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ |
+#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ |
+ |
+#include <stdint.h> |
+ |
+#include <string> |
+#include <vector> |
+ |
+#include "base/strings/string16.h" |
+#include "base/strings/utf_offset_string_conversions.h" |
+#include "net/base/escape.h" |
+ |
+class GURL; |
+ |
+namespace url { |
+struct Parsed; |
+} // url |
+ |
+namespace url_formatter { |
+ |
+// Used by FormatUrl to specify handling of certain parts of the url. |
+typedef uint32_t FormatUrlType; |
+typedef uint32_t FormatUrlTypes; |
+ |
+// Nothing is ommitted. |
+extern const FormatUrlType kFormatUrlOmitNothing; |
+ |
+// If set, any username and password are removed. |
+extern const FormatUrlType kFormatUrlOmitUsernamePassword; |
+ |
+// If the scheme is 'http://', it's removed. |
+extern const FormatUrlType kFormatUrlOmitHTTP; |
+ |
+// Omits the path if it is just a slash and there is no query or ref. This is |
+// meaningful for non-file "standard" URLs. |
+extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; |
+ |
+// Convenience for omitting all unecessary types. |
+extern const FormatUrlType kFormatUrlOmitAll; |
+ |
+// Creates a string representation of |url|. The IDN host name may be in Unicode |
+// if |languages| accepts the Unicode representation. |format_type| is a bitmask |
+// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean |
+// the URL for human readability. You will generally want |UnescapeRule::SPACES| |
+// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| |
+// if not. If the path part and the query part seem to be encoded in %-encoded |
+// UTF-8, decodes %-encoding and UTF-8. |
+// |
+// The last three parameters may be NULL. |
+// |
+// |new_parsed| will be set to the parsing parameters of the resultant URL. |
+// |
+// |prefix_end| will be the length before the hostname of the resultant URL. |
+// |
+// |offset[s]_for_adjustment| specifies one or more offsets into the original |
+// URL, representing insertion or selection points between characters: if the |
+// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is |
+// between the scheme and the host, and offset 15 is after the end of the URL. |
+// Valid input offsets range from 0 to the length of the input URL string. On |
+// exit, each offset will have been modified to reflect any changes made to the |
+// output string. For example, if |url| is "http://a:b@c.com/", |
+// |omit_username_password| is true, and an offset is 12 (pointing between 'c' |
+// and '.'), then on return the output string will be "http://c.com/" and the |
+// offset will be 8. If an offset cannot be successfully adjusted (e.g. because |
+// it points into the middle of a component that was entirely removed or into |
+// the middle of an encoding sequence), it will be set to base::string16::npos. |
+// For consistency, if an input offset points between the scheme and the |
+// username/password, and both are removed, on output this offset will be 0 |
+// rather than npos; this means that offsets at the starts and ends of removed |
+// components are always transformed the same way regardless of what other |
+// components are adjacent. |
+base::string16 FormatUrl(const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ net::UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ size_t* offset_for_adjustment); |
+ |
+base::string16 FormatUrlWithOffsets( |
+ const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ net::UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ std::vector<size_t>* offsets_for_adjustment); |
+ |
+// This function is like those above except it takes |adjustments| rather |
+// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all |
+// the transformations that happened to |url| to convert it into the returned |
+// value. |
+base::string16 FormatUrlWithAdjustments( |
+ const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ net::UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ base::OffsetAdjuster::Adjustments* adjustments); |
+ |
+// This is a convenience function for FormatUrl() with |
+// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical |
+// set of flags for "URLs to display to the user". You should be cautious about |
+// using this for URLs which will be parsed or sent to other applications. |
+inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { |
+ return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES, |
+ nullptr, nullptr, nullptr); |
+} |
+ |
+// Returns whether FormatUrl() would strip a trailing slash from |url|, given a |
+// format flag including kFormatUrlOmitTrailingSlashOnBareHostname. |
+bool CanStripTrailingSlash(const GURL& url); |
+ |
+// Formats the host in |url| and appends it to |output|. The host formatter |
+// takes the same accept languages component as ElideURL(). |
+void AppendFormattedHost(const GURL& url, |
+ const std::string& languages, |
+ base::string16* output); |
+ |
+// Converts the given host name to unicode characters. This can be called for |
+// any host name, if the input is not IDN or is invalid in some way, we'll just |
+// return the ASCII source so it is still usable. |
+// |
+// The input should be the canonicalized ASCII host name from GURL. This |
+// function does NOT accept UTF-8! |
+// |
+// |languages| is a comma separated list of ISO 639 language codes. It |
+// is used to determine whether a hostname is 'comprehensible' to a user |
+// who understands languages listed. |host| will be converted to a |
+// human-readable form (Unicode) ONLY when each component of |host| is |
+// regarded as 'comprehensible'. Scipt-mixing is not allowed except that |
+// Latin letters in the ASCII range can be mixed with a limited set of |
+// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). |
+// When |languages| is empty, even that mixing is not allowed. |
+base::string16 IDNToUnicode(const std::string& host, |
+ const std::string& languages); |
+ |
+} // url_formatter |
+ |
+#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ |