| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <algorithm> | 5 #include <algorithm> |
| 6 | 6 |
| 7 #include "net/base/escape.h" | 7 #include "net/base/escape.h" |
| 8 | 8 |
| 9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 101 // @ A B C D E F G H I J K L M N O | 101 // @ A B C D E F G H I J K L M N O |
| 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 103 // P Q R S T U V W X Y Z [ \ ] ^ _ | 103 // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 105 // ` a b c d e f g h i j k l m n o | 105 // ` a b c d e f g h i j k l m n o |
| 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 107 // p q r s t u v w x y z { | } ~ <NBSP> | 107 // p q r s t u v w x y z { | } ~ <NBSP> |
| 108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
| 109 }; | 109 }; |
| 110 | 110 |
| 111 std::string UnescapeURLImpl(const std::string& escaped_text, | 111 template<typename STR> |
| 112 UnescapeRule::Type rules, | 112 STR UnescapeURLImpl(const STR& escaped_text, |
| 113 size_t* offset_for_adjustment) { | 113 UnescapeRule::Type rules, |
| 114 size_t* offset_for_adjustment) { |
| 114 size_t offset_temp = string16::npos; | 115 size_t offset_temp = string16::npos; |
| 115 if (!offset_for_adjustment) | 116 if (!offset_for_adjustment) |
| 116 offset_for_adjustment = &offset_temp; | 117 offset_for_adjustment = &offset_temp; |
| 117 else if (*offset_for_adjustment >= escaped_text.length()) | 118 else if (*offset_for_adjustment >= escaped_text.length()) |
| 118 *offset_for_adjustment = string16::npos; | 119 *offset_for_adjustment = string16::npos; |
| 119 | 120 |
| 120 // Do not unescape anything, return the |escaped_text| text. | 121 // Do not unescape anything, return the |escaped_text| text. |
| 121 if (rules == UnescapeRule::NONE) | 122 if (rules == UnescapeRule::NONE) |
| 122 return escaped_text; | 123 return escaped_text; |
| 123 | 124 |
| 124 // The output of the unescaping is always smaller than the input, so we can | 125 // The output of the unescaping is always smaller than the input, so we can |
| 125 // reserve the input size to make sure we have enough buffer and don't have | 126 // reserve the input size to make sure we have enough buffer and don't have |
| 126 // to allocate in the loop below. | 127 // to allocate in the loop below. |
| 127 std::string result; | 128 STR result; |
| 128 result.reserve(escaped_text.length()); | 129 result.reserve(escaped_text.length()); |
| 129 | 130 |
| 130 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 131 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
| 131 if (escaped_text[i] == '%' && i + 2 < max) { | 132 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { |
| 132 const std::string::value_type most_sig_digit(escaped_text[i + 1]); | 133 // Non ASCII character, append as is. |
| 133 const std::string::value_type least_sig_digit(escaped_text[i + 2]); | 134 result.push_back(escaped_text[i]); |
| 135 continue; |
| 136 } |
| 137 |
| 138 char current_char = static_cast<char>(escaped_text[i]); |
| 139 if (current_char == '%' && i + 2 < max) { |
| 140 const typename STR::value_type most_sig_digit( |
| 141 static_cast<typename STR::value_type>(escaped_text[i + 1])); |
| 142 const typename STR::value_type least_sig_digit( |
| 143 static_cast<typename STR::value_type>(escaped_text[i + 2])); |
| 134 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { | 144 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { |
| 135 unsigned char value = HexToInt(most_sig_digit) * 16 + | 145 unsigned char value = HexToInt(most_sig_digit) * 16 + |
| 136 HexToInt(least_sig_digit); | 146 HexToInt(least_sig_digit); |
| 137 if (value >= 0x80 || // Unescape all high-bit characters. | 147 if (value >= 0x80 || // Unescape all high-bit characters. |
| 138 // For 7-bit characters, the lookup table tells us all valid chars. | 148 // For 7-bit characters, the lookup table tells us all valid chars. |
| 139 (kUrlUnescape[value] || | 149 (kUrlUnescape[value] || |
| 140 // ...and we allow some additional unescaping when flags are set. | 150 // ...and we allow some additional unescaping when flags are set. |
| 141 (value == ' ' && (rules & UnescapeRule::SPACES)) || | 151 (value == ' ' && (rules & UnescapeRule::SPACES)) || |
| 142 // Allow any of the prohibited but non-control characters when | 152 // Allow any of the prohibited but non-control characters when |
| 143 // we're doing "special" chars. | 153 // we're doing "special" chars. |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 268 *offset_for_adjustment = original_offset; | 278 *offset_for_adjustment = original_offset; |
| 269 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, | 279 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, |
| 270 offset_for_adjustment)); | 280 offset_for_adjustment)); |
| 271 } | 281 } |
| 272 | 282 |
| 273 std::string UnescapeURLComponent(const std::string& escaped_text, | 283 std::string UnescapeURLComponent(const std::string& escaped_text, |
| 274 UnescapeRule::Type rules) { | 284 UnescapeRule::Type rules) { |
| 275 return UnescapeURLImpl(escaped_text, rules, NULL); | 285 return UnescapeURLImpl(escaped_text, rules, NULL); |
| 276 } | 286 } |
| 277 | 287 |
| 288 string16 UnescapeURLComponent(const string16& escaped_text, |
| 289 UnescapeRule::Type rules) { |
| 290 return UnescapeURLImpl(escaped_text, rules, NULL); |
| 291 } |
| 292 |
| 293 |
| 278 template <class str> | 294 template <class str> |
| 279 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 295 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
| 280 static const struct { | 296 static const struct { |
| 281 char key; | 297 char key; |
| 282 const char *replacement; | 298 const char* replacement; |
| 283 } kCharsToEscape[] = { | 299 } kCharsToEscape[] = { |
| 284 { '<', "<" }, | 300 { '<', "<" }, |
| 285 { '>', ">" }, | 301 { '>', ">" }, |
| 286 { '&', "&" }, | 302 { '&', "&" }, |
| 287 { '"', """ }, | 303 { '"', """ }, |
| 288 { '\'', "'" }, | 304 { '\'', "'" }, |
| 289 }; | 305 }; |
| 290 size_t k; | 306 size_t k; |
| 291 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { | 307 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { |
| 292 if (c == kCharsToEscape[k].key) { | 308 if (c == kCharsToEscape[k].key) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 319 return result; | 335 return result; |
| 320 } | 336 } |
| 321 | 337 |
| 322 std::string EscapeForHTML(const std::string& input) { | 338 std::string EscapeForHTML(const std::string& input) { |
| 323 return EscapeForHTMLImpl(input); | 339 return EscapeForHTMLImpl(input); |
| 324 } | 340 } |
| 325 | 341 |
| 326 string16 EscapeForHTML(const string16& input) { | 342 string16 EscapeForHTML(const string16& input) { |
| 327 return EscapeForHTMLImpl(input); | 343 return EscapeForHTMLImpl(input); |
| 328 } | 344 } |
| 345 |
| 346 string16 UnescapeForHTML(const string16& input) { |
| 347 static const struct { |
| 348 const wchar_t* ampersand_code; |
| 349 const char replacement; |
| 350 } kEscapeToChars[] = { |
| 351 { L"<", '<' }, |
| 352 { L">", '>' }, |
| 353 { L"&", '&' }, |
| 354 { L""", '"' }, |
| 355 { L"'", '\''}, |
| 356 }; |
| 357 |
| 358 if (input.find(WideToUTF16(L"&")) == std::string::npos) |
| 359 return input; |
| 360 |
| 361 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; |
| 362 string16 text(input); |
| 363 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { |
| 364 if (*iter == '&') { |
| 365 // Potential ampersand encode char. |
| 366 size_t index = iter - text.begin(); |
| 367 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { |
| 368 if (ampersand_chars[i].empty()) |
| 369 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); |
| 370 if (text.find(ampersand_chars[i], index) == index) { |
| 371 text.replace(iter, iter + ampersand_chars[i].length(), |
| 372 1, kEscapeToChars[i].replacement); |
| 373 break; |
| 374 } |
| 375 } |
| 376 } |
| 377 } |
| 378 return text; |
| 379 } |
| OLD | NEW |