OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "net/base/escape.h" | 7 #include "net/base/escape.h" |
8 | 8 |
9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/string_util.h" | 11 #include "base/string_util.h" |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
101 // P Q R S T U V W X Y Z [ \ ] ^ _ | 101 // P Q R S T U V W X Y Z [ \ ] ^ _ |
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
103 // ` a b c d e f g h i j k l m n o | 103 // ` a b c d e f g h i j k l m n o |
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
105 // p q r s t u v w x y z { | } ~ <NBSP> | 105 // p q r s t u v w x y z { | } ~ <NBSP> |
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
107 }; | 107 }; |
108 | 108 |
109 std::string UnescapeURLImpl(const std::string& escaped_text, | 109 std::string UnescapeURLImpl(const std::string& escaped_text, |
110 UnescapeRule::Type rules) { | 110 UnescapeRule::Type rules, |
| 111 size_t* offset_for_adjustment) { |
| 112 size_t offset_temp = std::wstring::npos; |
| 113 if (!offset_for_adjustment) |
| 114 offset_for_adjustment = &offset_temp; |
| 115 else if (*offset_for_adjustment >= escaped_text.length()) |
| 116 *offset_for_adjustment = std::wstring::npos; |
| 117 |
111 // Do not unescape anything, return the |escaped_text| text. | 118 // Do not unescape anything, return the |escaped_text| text. |
112 if (rules == UnescapeRule::NONE) | 119 if (rules == UnescapeRule::NONE) |
113 return escaped_text; | 120 return escaped_text; |
114 | 121 |
115 // The output of the unescaping is always smaller than the input, so we can | 122 // The output of the unescaping is always smaller than the input, so we can |
116 // reserve the input size to make sure we have enough buffer and don't have | 123 // reserve the input size to make sure we have enough buffer and don't have |
117 // to allocate in the loop below. | 124 // to allocate in the loop below. |
118 std::string result; | 125 std::string result; |
119 result.reserve(escaped_text.length()); | 126 result.reserve(escaped_text.length()); |
120 | 127 |
121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 128 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
122 if (escaped_text[i] == '%' && i + 2 < max) { | 129 if (escaped_text[i] == '%' && i + 2 < max) { |
123 const std::string::value_type most_sig_digit(escaped_text[i + 1]); | 130 const std::string::value_type most_sig_digit(escaped_text[i + 1]); |
124 const std::string::value_type least_sig_digit(escaped_text[i + 2]); | 131 const std::string::value_type least_sig_digit(escaped_text[i + 2]); |
125 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { | 132 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { |
126 unsigned char value = HexToInt(most_sig_digit) * 16 + | 133 unsigned char value = HexToInt(most_sig_digit) * 16 + |
127 HexToInt(least_sig_digit); | 134 HexToInt(least_sig_digit); |
128 if (value >= 0x80 || // Unescape all high-bit characters. | 135 if (value >= 0x80 || // Unescape all high-bit characters. |
129 // For 7-bit characters, the lookup table tells us all valid chars. | 136 // For 7-bit characters, the lookup table tells us all valid chars. |
130 (kUrlUnescape[value] || | 137 (kUrlUnescape[value] || |
131 // ...and we allow some additional unescaping when flags are set. | 138 // ...and we allow some additional unescaping when flags are set. |
132 (value == ' ' && (rules & UnescapeRule::SPACES)) || | 139 (value == ' ' && (rules & UnescapeRule::SPACES)) || |
133 // Allow any of the prohibited but non-control characters when | 140 // Allow any of the prohibited but non-control characters when |
134 // we're doing "special" chars. | 141 // we're doing "special" chars. |
135 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || | 142 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || |
136 // Additionally allow control characters if requested. | 143 // Additionally allow control characters if requested. |
137 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { | 144 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { |
138 // Use the unescaped version of the character. | 145 // Use the unescaped version of the character. |
| 146 size_t length_before_append = result.length(); |
139 result.push_back(value); | 147 result.push_back(value); |
140 i += 2; | 148 i += 2; |
| 149 |
| 150 // Adjust offset to match length change. |
| 151 if (*offset_for_adjustment != std::string::npos) { |
| 152 if (*offset_for_adjustment > (length_before_append + 2)) |
| 153 *offset_for_adjustment -= 2; |
| 154 else if (*offset_for_adjustment > length_before_append) |
| 155 *offset_for_adjustment = std::string::npos; |
| 156 } |
141 } else { | 157 } else { |
142 // Keep escaped. Append a percent and we'll get the following two | 158 // Keep escaped. Append a percent and we'll get the following two |
143 // digits on the next loops through. | 159 // digits on the next loops through. |
144 result.push_back('%'); | 160 result.push_back('%'); |
145 } | 161 } |
146 } else { | 162 } else { |
147 // Invalid escape sequence, just pass the percent through and continue | 163 // Invalid escape sequence, just pass the percent through and continue |
148 // right after it. | 164 // right after it. |
149 result.push_back('%'); | 165 result.push_back('%'); |
150 } | 166 } |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
224 if (!base::WideToCodepage(text, codepage, | 240 if (!base::WideToCodepage(text, codepage, |
225 base::OnStringConversionError::SKIP, &encoded)) | 241 base::OnStringConversionError::SKIP, &encoded)) |
226 return false; | 242 return false; |
227 | 243 |
228 // It's safe to use UTF8ToWide here because Escape should only return | 244 // It's safe to use UTF8ToWide here because Escape should only return |
229 // alphanumerics and !'()*-._~ | 245 // alphanumerics and !'()*-._~ |
230 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); | 246 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); |
231 return true; | 247 return true; |
232 } | 248 } |
233 | 249 |
234 std::wstring UnescapeAndDecodeURLComponent(const std::string& text, | 250 std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, |
235 const char* codepage, | 251 UnescapeRule::Type rules, |
236 UnescapeRule::Type rules) { | 252 size_t* offset_for_adjustment) { |
237 std::wstring result; | 253 std::wstring result; |
238 if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage, | 254 size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; |
239 base::OnStringConversionError::FAIL, &result)) | 255 if (base::CodepageToWideAndAdjustOffset( |
| 256 UnescapeURLImpl(text, rules, offset_for_adjustment), |
| 257 "UTF-8", base::OnStringConversionError::FAIL, &result, |
| 258 offset_for_adjustment)) |
240 return result; // Character set looks like it's valid. | 259 return result; // Character set looks like it's valid. |
241 return UTF8ToWide(text); // Return the escaped version when it's not. | 260 |
| 261 // Not valid. Return the escaped version. Undo our changes to |
| 262 // |offset_for_adjustment| since we haven't changed the string after all. |
| 263 if (offset_for_adjustment) |
| 264 *offset_for_adjustment = original_offset; |
| 265 return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment); |
242 } | 266 } |
243 | 267 |
244 std::string UnescapeURLComponent(const std::string& escaped_text, | 268 std::string UnescapeURLComponent(const std::string& escaped_text, |
245 UnescapeRule::Type rules) { | 269 UnescapeRule::Type rules) { |
246 return UnescapeURLImpl(escaped_text, rules); | 270 return UnescapeURLImpl(escaped_text, rules, NULL); |
247 } | 271 } |
248 | 272 |
249 template <class str> | 273 template <class str> |
250 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 274 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
251 static const struct { | 275 static const struct { |
252 char key; | 276 char key; |
253 const char *replacement; | 277 const char *replacement; |
254 } kCharsToEscape[] = { | 278 } kCharsToEscape[] = { |
255 { '<', "<" }, | 279 { '<', "<" }, |
256 { '>', ">" }, | 280 { '>', ">" }, |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
290 return result; | 314 return result; |
291 } | 315 } |
292 | 316 |
293 std::string EscapeForHTML(const std::string& input) { | 317 std::string EscapeForHTML(const std::string& input) { |
294 return EscapeForHTMLImpl(input); | 318 return EscapeForHTMLImpl(input); |
295 } | 319 } |
296 | 320 |
297 std::wstring EscapeForHTML(const std::wstring& input) { | 321 std::wstring EscapeForHTML(const std::wstring& input) { |
298 return EscapeForHTMLImpl(input); | 322 return EscapeForHTMLImpl(input); |
299 } | 323 } |
OLD | NEW |