OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "net/base/escape.h" | 7 #include "net/base/escape.h" |
8 | 8 |
9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
101 // @ A B C D E F G H I J K L M N O | 101 // @ A B C D E F G H I J K L M N O |
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
103 // P Q R S T U V W X Y Z [ \ ] ^ _ | 103 // P Q R S T U V W X Y Z [ \ ] ^ _ |
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
105 // ` a b c d e f g h i j k l m n o | 105 // ` a b c d e f g h i j k l m n o |
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
107 // p q r s t u v w x y z { | } ~ <NBSP> | 107 // p q r s t u v w x y z { | } ~ <NBSP> |
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
109 }; | 109 }; |
110 | 110 |
111 std::string UnescapeURLImpl(const std::string& escaped_text, | 111 template<typename STR> |
112 UnescapeRule::Type rules, | 112 STR UnescapeURLImpl(const STR& escaped_text, |
113 size_t* offset_for_adjustment) { | 113 UnescapeRule::Type rules, |
| 114 size_t* offset_for_adjustment) { |
114 size_t offset_temp = string16::npos; | 115 size_t offset_temp = string16::npos; |
115 if (!offset_for_adjustment) | 116 if (!offset_for_adjustment) |
116 offset_for_adjustment = &offset_temp; | 117 offset_for_adjustment = &offset_temp; |
117 else if (*offset_for_adjustment >= escaped_text.length()) | 118 else if (*offset_for_adjustment >= escaped_text.length()) |
118 *offset_for_adjustment = string16::npos; | 119 *offset_for_adjustment = string16::npos; |
119 | 120 |
120 // Do not unescape anything, return the |escaped_text| text. | 121 // Do not unescape anything, return the |escaped_text| text. |
121 if (rules == UnescapeRule::NONE) | 122 if (rules == UnescapeRule::NONE) |
122 return escaped_text; | 123 return escaped_text; |
123 | 124 |
124 // The output of the unescaping is always smaller than the input, so we can | 125 // The output of the unescaping is always smaller than the input, so we can |
125 // reserve the input size to make sure we have enough buffer and don't have | 126 // reserve the input size to make sure we have enough buffer and don't have |
126 // to allocate in the loop below. | 127 // to allocate in the loop below. |
127 std::string result; | 128 STR result; |
128 result.reserve(escaped_text.length()); | 129 result.reserve(escaped_text.length()); |
129 | 130 |
130 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 131 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
131 if (escaped_text[i] == '%' && i + 2 < max) { | 132 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { |
132 const std::string::value_type most_sig_digit(escaped_text[i + 1]); | 133 // Non ASCII character, append as is. |
133 const std::string::value_type least_sig_digit(escaped_text[i + 2]); | 134 result.push_back(escaped_text[i]); |
| 135 continue; |
| 136 } |
| 137 |
| 138 char current_char = static_cast<char>(escaped_text[i]); |
| 139 if (current_char == '%' && i + 2 < max) { |
| 140 const typename STR::value_type most_sig_digit( |
| 141 static_cast<typename STR::value_type>(escaped_text[i + 1])); |
| 142 const typename STR::value_type least_sig_digit( |
| 143 static_cast<typename STR::value_type>(escaped_text[i + 2])); |
134 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { | 144 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { |
135 unsigned char value = HexToInt(most_sig_digit) * 16 + | 145 unsigned char value = HexToInt(most_sig_digit) * 16 + |
136 HexToInt(least_sig_digit); | 146 HexToInt(least_sig_digit); |
137 if (value >= 0x80 || // Unescape all high-bit characters. | 147 if (value >= 0x80 || // Unescape all high-bit characters. |
138 // For 7-bit characters, the lookup table tells us all valid chars. | 148 // For 7-bit characters, the lookup table tells us all valid chars. |
139 (kUrlUnescape[value] || | 149 (kUrlUnescape[value] || |
140 // ...and we allow some additional unescaping when flags are set. | 150 // ...and we allow some additional unescaping when flags are set. |
141 (value == ' ' && (rules & UnescapeRule::SPACES)) || | 151 (value == ' ' && (rules & UnescapeRule::SPACES)) || |
142 // Allow any of the prohibited but non-control characters when | 152 // Allow any of the prohibited but non-control characters when |
143 // we're doing "special" chars. | 153 // we're doing "special" chars. |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
268 *offset_for_adjustment = original_offset; | 278 *offset_for_adjustment = original_offset; |
269 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, | 279 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, |
270 offset_for_adjustment)); | 280 offset_for_adjustment)); |
271 } | 281 } |
272 | 282 |
273 std::string UnescapeURLComponent(const std::string& escaped_text, | 283 std::string UnescapeURLComponent(const std::string& escaped_text, |
274 UnescapeRule::Type rules) { | 284 UnescapeRule::Type rules) { |
275 return UnescapeURLImpl(escaped_text, rules, NULL); | 285 return UnescapeURLImpl(escaped_text, rules, NULL); |
276 } | 286 } |
277 | 287 |
| 288 string16 UnescapeURLComponent(const string16& escaped_text, |
| 289 UnescapeRule::Type rules) { |
| 290 return UnescapeURLImpl(escaped_text, rules, NULL); |
| 291 } |
| 292 |
| 293 |
278 template <class str> | 294 template <class str> |
279 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 295 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
280 static const struct { | 296 static const struct { |
281 char key; | 297 char key; |
282 const char *replacement; | 298 const char* replacement; |
283 } kCharsToEscape[] = { | 299 } kCharsToEscape[] = { |
284 { '<', "<" }, | 300 { '<', "<" }, |
285 { '>', ">" }, | 301 { '>', ">" }, |
286 { '&', "&" }, | 302 { '&', "&" }, |
287 { '"', """ }, | 303 { '"', """ }, |
288 { '\'', "'" }, | 304 { '\'', "'" }, |
289 }; | 305 }; |
290 size_t k; | 306 size_t k; |
291 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { | 307 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { |
292 if (c == kCharsToEscape[k].key) { | 308 if (c == kCharsToEscape[k].key) { |
(...skipping 26 matching lines...) Expand all Loading... |
319 return result; | 335 return result; |
320 } | 336 } |
321 | 337 |
322 std::string EscapeForHTML(const std::string& input) { | 338 std::string EscapeForHTML(const std::string& input) { |
323 return EscapeForHTMLImpl(input); | 339 return EscapeForHTMLImpl(input); |
324 } | 340 } |
325 | 341 |
326 string16 EscapeForHTML(const string16& input) { | 342 string16 EscapeForHTML(const string16& input) { |
327 return EscapeForHTMLImpl(input); | 343 return EscapeForHTMLImpl(input); |
328 } | 344 } |
| 345 |
| 346 string16 UnescapeForHTML(const string16& input) { |
| 347 static const struct { |
| 348 const wchar_t* ampersand_code; |
| 349 const char replacement; |
| 350 } kEscapeToChars[] = { |
| 351 { L"<", '<' }, |
| 352 { L">", '>' }, |
| 353 { L"&", '&' }, |
| 354 { L""", '"' }, |
| 355 { L"'", '\''}, |
| 356 }; |
| 357 |
| 358 if (input.find(WideToUTF16(L"&")) == std::string::npos) |
| 359 return input; |
| 360 |
| 361 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; |
| 362 string16 text(input); |
| 363 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { |
| 364 if (*iter == '&') { |
| 365 // Potential ampersand encode char. |
| 366 size_t index = iter - text.begin(); |
| 367 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { |
| 368 if (ampersand_chars[i].empty()) |
| 369 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); |
| 370 if (text.find(ampersand_chars[i], index) == index) { |
| 371 text.replace(iter, iter + ampersand_chars[i].length(), |
| 372 1, kEscapeToChars[i].replacement); |
| 373 break; |
| 374 } |
| 375 } |
| 376 } |
| 377 } |
| 378 return text; |
| 379 } |
OLD | NEW |