OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // url_formatter contains routines for formatting URLs in a way that can be | 5 // url_formatter contains routines for formatting URLs in a way that can be |
6 // safely and securely displayed to users. For example, it is responsible | 6 // safely and securely displayed to users. For example, it is responsible |
7 // for determining when to convert an IDN A-Label (e.g. "xn--[something]") | 7 // for determining when to convert an IDN A-Label (e.g. "xn--[something]") |
8 // into the IDN U-Label. | 8 // into the IDN U-Label. |
9 // | 9 // |
10 // Note that this formatting is only intended for display purposes; it would | 10 // Note that this formatting is only intended for display purposes; it would |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
47 extern const FormatUrlType kFormatUrlOmitHTTP; | 47 extern const FormatUrlType kFormatUrlOmitHTTP; |
48 | 48 |
49 // Omits the path if it is just a slash and there is no query or ref. This is | 49 // Omits the path if it is just a slash and there is no query or ref. This is |
50 // meaningful for non-file "standard" URLs. | 50 // meaningful for non-file "standard" URLs. |
51 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; | 51 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; |
52 | 52 |
53 // Convenience for omitting all unecessary types. | 53 // Convenience for omitting all unecessary types. |
54 extern const FormatUrlType kFormatUrlOmitAll; | 54 extern const FormatUrlType kFormatUrlOmitAll; |
55 | 55 |
56 // Creates a string representation of |url|. The IDN host name is turned to | 56 // Creates a string representation of |url|. The IDN host name is turned to |
57 // Unicode if the Unicode representation is deemed safe. |languages| is not | 57 // Unicode if the Unicode representation is deemed safe. |format_type| is a |
58 // used any more and will be removed. |format_type| is a bitmask | 58 // bitmask of FormatUrlTypes, see it for details. |unescape_rules| defines how |
59 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean | 59 // to clean the URL for human readability. You will generally want |
60 // the URL for human readability. You will generally want |UnescapeRule::SPACES| | 60 // |UnescapeRule::SPACES| for display to the user if you can handle spaces, or |
61 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| | 61 // |UnescapeRule::NORMAL| if not. If the path part and the query part seem to |
62 // if not. If the path part and the query part seem to be encoded in %-encoded | 62 // be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8. |
63 // UTF-8, decodes %-encoding and UTF-8. | |
64 // | 63 // |
65 // The last three parameters may be NULL. | 64 // The last three parameters may be NULL. |
66 // | 65 // |
67 // |new_parsed| will be set to the parsing parameters of the resultant URL. | 66 // |new_parsed| will be set to the parsing parameters of the resultant URL. |
68 // | 67 // |
69 // |prefix_end| will be the length before the hostname of the resultant URL. | 68 // |prefix_end| will be the length before the hostname of the resultant URL. |
70 // | 69 // |
71 // |offset[s]_for_adjustment| specifies one or more offsets into the original | 70 // |offset[s]_for_adjustment| specifies one or more offsets into the original |
72 // URL, representing insertion or selection points between characters: if the | 71 // URL, representing insertion or selection points between characters: if the |
73 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is | 72 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is |
74 // between the scheme and the host, and offset 15 is after the end of the URL. | 73 // between the scheme and the host, and offset 15 is after the end of the URL. |
75 // Valid input offsets range from 0 to the length of the input URL string. On | 74 // Valid input offsets range from 0 to the length of the input URL string. On |
76 // exit, each offset will have been modified to reflect any changes made to the | 75 // exit, each offset will have been modified to reflect any changes made to the |
77 // output string. For example, if |url| is "http://a:b@c.com/", | 76 // output string. For example, if |url| is "http://a:b@c.com/", |
78 // |omit_username_password| is true, and an offset is 12 (pointing between 'c' | 77 // |omit_username_password| is true, and an offset is 12 (pointing between 'c' |
79 // and '.'), then on return the output string will be "http://c.com/" and the | 78 // and '.'), then on return the output string will be "http://c.com/" and the |
80 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because | 79 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because |
81 // it points into the middle of a component that was entirely removed or into | 80 // it points into the middle of a component that was entirely removed or into |
82 // the middle of an encoding sequence), it will be set to base::string16::npos. | 81 // the middle of an encoding sequence), it will be set to base::string16::npos. |
83 // For consistency, if an input offset points between the scheme and the | 82 // For consistency, if an input offset points between the scheme and the |
84 // username/password, and both are removed, on output this offset will be 0 | 83 // username/password, and both are removed, on output this offset will be 0 |
85 // rather than npos; this means that offsets at the starts and ends of removed | 84 // rather than npos; this means that offsets at the starts and ends of removed |
86 // components are always transformed the same way regardless of what other | 85 // components are always transformed the same way regardless of what other |
87 // components are adjacent. | 86 // components are adjacent. |
88 base::string16 FormatUrl(const GURL& url, | 87 base::string16 FormatUrl(const GURL& url, |
89 const std::string& languages, | |
90 FormatUrlTypes format_types, | 88 FormatUrlTypes format_types, |
91 net::UnescapeRule::Type unescape_rules, | 89 net::UnescapeRule::Type unescape_rules, |
92 url::Parsed* new_parsed, | 90 url::Parsed* new_parsed, |
93 size_t* prefix_end, | 91 size_t* prefix_end, |
94 size_t* offset_for_adjustment); | 92 size_t* offset_for_adjustment); |
95 | 93 |
96 base::string16 FormatUrlWithOffsets( | 94 base::string16 FormatUrlWithOffsets( |
97 const GURL& url, | 95 const GURL& url, |
98 const std::string& languages, | |
99 FormatUrlTypes format_types, | 96 FormatUrlTypes format_types, |
100 net::UnescapeRule::Type unescape_rules, | 97 net::UnescapeRule::Type unescape_rules, |
101 url::Parsed* new_parsed, | 98 url::Parsed* new_parsed, |
102 size_t* prefix_end, | 99 size_t* prefix_end, |
103 std::vector<size_t>* offsets_for_adjustment); | 100 std::vector<size_t>* offsets_for_adjustment); |
104 | 101 |
105 // This function is like those above except it takes |adjustments| rather | 102 // This function is like those above except it takes |adjustments| rather |
106 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all | 103 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all |
107 // the transformations that happened to |url| to convert it into the returned | 104 // the transformations that happened to |url| to convert it into the returned |
108 // value. | 105 // value. |
109 base::string16 FormatUrlWithAdjustments( | 106 base::string16 FormatUrlWithAdjustments( |
110 const GURL& url, | 107 const GURL& url, |
111 const std::string& languages, | |
112 FormatUrlTypes format_types, | 108 FormatUrlTypes format_types, |
113 net::UnescapeRule::Type unescape_rules, | 109 net::UnescapeRule::Type unescape_rules, |
114 url::Parsed* new_parsed, | 110 url::Parsed* new_parsed, |
115 size_t* prefix_end, | 111 size_t* prefix_end, |
116 base::OffsetAdjuster::Adjustments* adjustments); | 112 base::OffsetAdjuster::Adjustments* adjustments); |
117 | 113 |
118 // This is a convenience function for FormatUrl() with | 114 // This is a convenience function for FormatUrl() with |
119 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical | 115 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical |
120 // set of flags for "URLs to display to the user". You should be cautious about | 116 // set of flags for "URLs to display to the user". You should be cautious about |
121 // using this for URLs which will be parsed or sent to other applications. | 117 // using this for URLs which will be parsed or sent to other applications. |
122 inline base::string16 FormatUrl(const GURL& url, const std::string& languages) { | 118 inline base::string16 FormatUrl(const GURL& url) { |
123 return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES, | 119 return FormatUrl(url, kFormatUrlOmitAll, net::UnescapeRule::SPACES, |
124 nullptr, nullptr, nullptr); | 120 nullptr, nullptr, nullptr); |
125 } | 121 } |
126 | 122 |
127 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a | 123 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a |
128 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. | 124 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname. |
129 bool CanStripTrailingSlash(const GURL& url); | 125 bool CanStripTrailingSlash(const GURL& url); |
130 | 126 |
131 // Formats the host in |url| and appends it to |output|. The host formatter | 127 // Formats the host in |url| and appends it to |output|. |
132 // takes the same accept languages component as ElideURL(), but it does not | 128 void AppendFormattedHost(const GURL& url, base::string16* output); |
133 // affect the result. It'll be removed. | |
134 void AppendFormattedHost(const GURL& url, | |
135 const std::string& languages, | |
136 base::string16* output); | |
137 | 129 |
138 // Converts the given host name to unicode characters. This can be called for | 130 // Converts the given host name to unicode characters. This can be called for |
139 // any host name, if the input is not IDN or is invalid in some way, we'll just | 131 // any host name, if the input is not IDN or is invalid in some way, we'll just |
140 // return the ASCII source so it is still usable. | 132 // return the ASCII source so it is still usable. |
141 // | 133 // |
142 // The input should be the canonicalized ASCII host name from GURL. This | 134 // The input should be the canonicalized ASCII host name from GURL. This |
143 // function does NOT accept UTF-8! | 135 // function does NOT accept UTF-8! |
144 // |languages| is not used any more and will be removed. | 136 base::string16 IDNToUnicode(const std::string& host); |
145 base::string16 IDNToUnicode(const std::string& host, | |
146 const std::string& languages); | |
147 | 137 |
148 // If |text| starts with "www." it is removed, otherwise |text| is returned | 138 // If |text| starts with "www." it is removed, otherwise |text| is returned |
149 // unmodified. | 139 // unmodified. |
150 base::string16 StripWWW(const base::string16& text); | 140 base::string16 StripWWW(const base::string16& text); |
151 | 141 |
152 // Runs |url|'s host through StripWWW(). |url| must be valid. | 142 // Runs |url|'s host through StripWWW(). |url| must be valid. |
153 base::string16 StripWWWFromHost(const GURL& url); | 143 base::string16 StripWWWFromHost(const GURL& url); |
154 | 144 |
155 } // namespace url_formatter | 145 } // namespace url_formatter |
156 | 146 |
157 #endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ | 147 #endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_ |
OLD | NEW |