| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // url_formatter contains routines for formatting URLs in a way that can be | 5 // url_formatter contains routines for formatting URLs in a way that can be |
| 6 // safely and securely displayed to users. For example, it is responsible | 6 // safely and securely displayed to users. For example, it is responsible |
| 7 // for determining when to convert an IDN A-Label (e.g. "xn--[something]") | 7 // for determining when to convert an IDN A-Label (e.g. "xn--[something]") |
| 8 // into the IDN U-Label. | 8 // into the IDN U-Label. |
| 9 // | 9 // |
| 10 // Note that this formatting is only intended for display purposes; it would | 10 // Note that this formatting is only intended for display purposes; it would |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 47 extern const FormatUrlType kFormatUrlOmitHTTP; | 47 extern const FormatUrlType kFormatUrlOmitHTTP; |
| 48 | 48 |
| 49 // Omits the path if it is just a slash and there is no query or ref. This is | 49 // Omits the path if it is just a slash and there is no query or ref. This is |
| 50 // meaningful for non-file "standard" URLs. | 50 // meaningful for non-file "standard" URLs. |
| 51 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; | 51 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; |
| 52 | 52 |
| 53 // Convenience for omitting all unecessary types. | 53 // Convenience for omitting all unecessary types. |
| 54 extern const FormatUrlType kFormatUrlOmitAll; | 54 extern const FormatUrlType kFormatUrlOmitAll; |
| 55 | 55 |
| 56 // Creates a string representation of |url|. The IDN host name is turned to | 56 // Creates a string representation of |url|. The IDN host name is turned to |
| 57 // Unicode if the Unicode representation is deemed safe. |languages| is not | 57 // Unicode if the Unicode representation is deemed safe. |format_type| is a |
| 58 // used any more and will be removed. |format_type| is a bitmask | 58 // bitmask of FormatUrlTypes, see it for details. |unescape_rules| defines how |
| 59 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean | 59 // to clean the URL for human readability. You will generally want |
| 60 // the URL for human readability. You will generally want |UnescapeRule::SPACES| | 60 // |UnescapeRule::SPACES| for display to the user if you can handle spaces, or |
| 61 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| | 61 // |UnescapeRule::NORMAL| if not. If the path part and the query part seem to |
| 62 // if not. If the path part and the query part seem to be encoded in %-encoded | 62 // be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8. |
| 63 // UTF-8, decodes %-encoding and UTF-8. | |
| 64 // | 63 // |
| 65 // The last three parameters may be NULL. | 64 // The last three parameters may be NULL. |
| 66 // | 65 // |
| 67 // |new_parsed| will be set to the parsing parameters of the resultant URL. | 66 // |new_parsed| will be set to the parsing parameters of the resultant URL. |
| 68 // | 67 // |
| 69 // |prefix_end| will be the length before the hostname of the resultant URL. | 68 // |prefix_end| will be the length before the hostname of the resultant URL. |
| 70 // | 69 // |
| 71 // |offset[s]_for_adjustment| specifies one or more offsets into the original | 70 // |offset[s]_for_adjustment| specifies one or more offsets into the original |
| 72 // URL, representing insertion or selection points between characters: if the | 71 // URL, representing insertion or selection points between characters: if the |
| 73 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is | 72 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is |
| 74 // between the scheme and the host, and offset 15 is after the end of the URL. | 73 // between the scheme and the host, and offset 15 is after the end of the URL. |
| 75 // Valid input offsets range from 0 to the length of the input URL string. On | 74 // Valid input offsets range from 0 to the length of the input URL string. On |
| 76 // exit, each offset will have been modified to reflect any changes made to the | 75 // exit, each offset will have been modified to reflect any changes made to the |
| 77 // output string. For example, if |url| is "http://a:b@c.com/", | 76 // output string. For example, if |url| is "http://a:b@c.com/", |
| 78 // |omit_username_password| is true, and an offset is 12 (pointing between 'c' | 77 // |omit_username_password| is true, and an offset is 12 (pointing between 'c' |
| 79 // and '.'), then on return the output string will be "http://c.com/" and the | 78 // and '.'), then on return the output string will be "http://c.com/" and the |
| 80 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because | 79 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because |
| 81 // it points into the middle of a component that was entirely removed or into | 80 // it points into the middle of a component that was entirely removed or into |
| 82 // the middle of an encoding sequence), it will be set to base::string16::npos. | 81 // the middle of an encoding sequence), it will be set to base::string16::npos. |
| 83 // For consistency, if an input offset points between the scheme and the | 82 // For consistency, if an input offset points between the scheme and the |
| 84 // username/password, and both are removed, on output this offset will be 0 | 83 // username/password, and both are removed, on output this offset will be 0 |
| 85 // rather than npos; this means that offsets at the starts and ends of removed | 84 // rather than npos; this means that offsets at the starts and ends of removed |
| 86 // components are always transformed the same way regardless of what other | 85 // components are always transformed the same way regardless of what other |
| 87 // components are adjacent. | 86 // components are adjacent. |
| 88 base::string16 FormatUrl(const GURL& url, | 87 base::string16 FormatUrl(const GURL& url, |
| 89 const std::string& languages, | |
| 90 FormatUrlTypes format_types, | 88 FormatUrlTypes format_types, |
| 91 net::UnescapeRule::Type unescape_rules, | 89 net::UnescapeRule::Type unescape_rules, |
| 92 url::Parsed* new_parsed, | 90 url::Parsed* new_parsed, |
| 93 size_t* prefix_end, | 91 size_t* prefix_end, |
| 94 size_t* offset_for_adjustment); | 92 size_t* offset_for_adjustment); |
| 95 | 93 |
| 96 base::string16 FormatUrlWithOffsets( | 94 base::string16 FormatUrlWithOffsets( |
| 97 const GURL& url, | 95 const GURL& url, |
| 98 const std::string& languages, | |
| 99 FormatUrlTypes format_types, | 96 FormatUrlTypes format_types, |
| 100 net::UnescapeRule::Type unescape_rules, | 97 net::UnescapeRule::Type unescape_rules, |
| 101 url::Parsed* new_parsed, | 98 url::Parsed* new_parsed, |
| 102 size_t* prefix_end, | 99 size_t* prefix_end, |
| 103 std::vector<size_t>* offsets_for_adjustment); | 100 std::vector<size_t>* offsets_for_adjustment); |
| 104 | 101 |
| 105 // This function is like those above except it takes |adjustments| rather | 102 // This function is like those above except it takes |adjustments| rather |
| 106 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all | 103 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all |
| 107 // the transformations that happened to |url| to convert it into the returned | 104 // the transformations that happened to |url| to convert it into the returned |
| 108 // value. | 105 // value. |
| 109 base::string16 FormatUrlWithAdjustments( | 106 base::string16 FormatUrlWithAdjustments( |
| 110 const GURL& url, | 107 const GURL& url, |
| 111 const std::string& languages, | |
| 112 FormatUrlTypes format_types, | 108 FormatUrlTypes format_types, |
| 113 net::UnescapeRule::Type unescape_rules, | 109 net::UnescapeRule::Type unescape_rules, |
| 114 url::Parsed* new_parsed, | 110 url::Parsed* new_parsed, |
| 115 size_t* prefix_end, | 111 size_t* prefix_end, |
| 116 base::OffsetAdjuster::Adjustments* adjustments); | 112 base::OffsetAdjuster::Adjustments* adjustments); |
| 117 | 113 |
| 118 // This is a convenience function for FormatUrl() with | 114 // This is a convenience function for FormatUrl() with |
| 119 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical | 115 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical |
| 120 // set of flags for "URLs to display to the user". You should be cautious about | 116 // set of flags for "URLs to display to the user". You should be cautious about |
| 121 // using this for URLs which will be parsed or sent to other applications. | 117 // using this for URLs which will be parsed or sent to other applications. |
| 122 inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { | 118 inline base::string16 FormatUrl(const GURL& url) { |
| 123 return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES, | 119 return FormatUrl(url, kFormatUrlOmitAll, net::UnescapeRule::SPACES, |
| 124 nullptr, nullptr, nullptr); | 120 nullptr, nullptr, nullptr); |
| 125 } | 121 } |
| 126 | 122 |
| 127 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a | 123 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a |
| 128 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. | 124 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. |
| 129 bool CanStripTrailingSlash(const GURL& url); | 125 bool CanStripTrailingSlash(const GURL& url); |
| 130 | 126 |
| 131 // Formats the host in |url| and appends it to |output|. The host formatter | 127 // Formats the host in |url| and appends it to |output|. |
| 132 // takes the same accept languages component as ElideURL(), but it does not | 128 void AppendFormattedHost(const GURL& url, base::string16* output); |
| 133 // affect the result. It'll be removed. | |
| 134 void AppendFormattedHost(const GURL& url, | |
| 135 const std::string& languages, | |
| 136 base::string16* output); | |
| 137 | 129 |
| 138 // Converts the given host name to unicode characters. This can be called for | 130 // Converts the given host name to unicode characters. This can be called for |
| 139 // any host name, if the input is not IDN or is invalid in some way, we'll just | 131 // any host name, if the input is not IDN or is invalid in some way, we'll just |
| 140 // return the ASCII source so it is still usable. | 132 // return the ASCII source so it is still usable. |
| 141 // | 133 // |
| 142 // The input should be the canonicalized ASCII host name from GURL. This | 134 // The input should be the canonicalized ASCII host name from GURL. This |
| 143 // function does NOT accept UTF-8! | 135 // function does NOT accept UTF-8! |
| 144 // |languages| is not used any more and will be removed. | 136 base::string16 IDNToUnicode(const std::string& host); |
| 145 base::string16 IDNToUnicode(const std::string& host, | |
| 146 const std::string& languages); | |
| 147 | 137 |
| 148 // If |text| starts with "www." it is removed, otherwise |text| is returned | 138 // If |text| starts with "www." it is removed, otherwise |text| is returned |
| 149 // unmodified. | 139 // unmodified. |
| 150 base::string16 StripWWW(const base::string16& text); | 140 base::string16 StripWWW(const base::string16& text); |
| 151 | 141 |
| 152 // Runs |url|'s host through StripWWW(). |url| must be valid. | 142 // Runs |url|'s host through StripWWW(). |url| must be valid. |
| 153 base::string16 StripWWWFromHost(const GURL& url); | 143 base::string16 StripWWWFromHost(const GURL& url); |
| 154 | 144 |
| 155 } // namespace url_formatter | 145 } // namespace url_formatter |
| 156 | 146 |
| 157 #endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ | 147 #endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ |
| OLD | NEW |