| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <algorithm> | 5 #include <algorithm> |
| 6 | 6 |
| 7 #include "net/base/escape.h" | 7 #include "net/base/escape.h" |
| 8 | 8 |
| 9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "base/string_util.h" | 11 #include "base/string_util.h" |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 101 // P Q R S T U V W X Y Z [ \ ] ^ _ | 101 // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 103 // ` a b c d e f g h i j k l m n o | 103 // ` a b c d e f g h i j k l m n o |
| 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 105 // p q r s t u v w x y z { | } ~ <NBSP> | 105 // p q r s t u v w x y z { | } ~ <NBSP> |
| 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
| 107 }; | 107 }; |
| 108 | 108 |
| 109 std::string UnescapeURLImpl(const std::string& escaped_text, | 109 std::string UnescapeURLImpl(const std::string& escaped_text, |
| 110 UnescapeRule::Type rules) { | 110 UnescapeRule::Type rules, |
| 111 size_t* offset_for_adjustment) { |
| 112 size_t offset_temp = std::wstring::npos; |
| 113 if (!offset_for_adjustment) |
| 114 offset_for_adjustment = &offset_temp; |
| 115 else if (*offset_for_adjustment >= escaped_text.length()) |
| 116 *offset_for_adjustment = std::wstring::npos; |
| 117 |
| 111 // Do not unescape anything, return the |escaped_text| text. | 118 // Do not unescape anything, return the |escaped_text| text. |
| 112 if (rules == UnescapeRule::NONE) | 119 if (rules == UnescapeRule::NONE) |
| 113 return escaped_text; | 120 return escaped_text; |
| 114 | 121 |
| 115 // The output of the unescaping is always smaller than the input, so we can | 122 // The output of the unescaping is always smaller than the input, so we can |
| 116 // reserve the input size to make sure we have enough buffer and don't have | 123 // reserve the input size to make sure we have enough buffer and don't have |
| 117 // to allocate in the loop below. | 124 // to allocate in the loop below. |
| 118 std::string result; | 125 std::string result; |
| 119 result.reserve(escaped_text.length()); | 126 result.reserve(escaped_text.length()); |
| 120 | 127 |
| 121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 128 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
| 122 if (escaped_text[i] == '%' && i + 2 < max) { | 129 if (escaped_text[i] == '%' && i + 2 < max) { |
| 123 const std::string::value_type most_sig_digit(escaped_text[i + 1]); | 130 const std::string::value_type most_sig_digit(escaped_text[i + 1]); |
| 124 const std::string::value_type least_sig_digit(escaped_text[i + 2]); | 131 const std::string::value_type least_sig_digit(escaped_text[i + 2]); |
| 125 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { | 132 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { |
| 126 unsigned char value = HexToInt(most_sig_digit) * 16 + | 133 unsigned char value = HexToInt(most_sig_digit) * 16 + |
| 127 HexToInt(least_sig_digit); | 134 HexToInt(least_sig_digit); |
| 128 if (value >= 0x80 || // Unescape all high-bit characters. | 135 if (value >= 0x80 || // Unescape all high-bit characters. |
| 129 // For 7-bit characters, the lookup table tells us all valid chars. | 136 // For 7-bit characters, the lookup table tells us all valid chars. |
| 130 (kUrlUnescape[value] || | 137 (kUrlUnescape[value] || |
| 131 // ...and we allow some additional unescaping when flags are set. | 138 // ...and we allow some additional unescaping when flags are set. |
| 132 (value == ' ' && (rules & UnescapeRule::SPACES)) || | 139 (value == ' ' && (rules & UnescapeRule::SPACES)) || |
| 133 // Allow any of the prohibited but non-control characters when | 140 // Allow any of the prohibited but non-control characters when |
| 134 // we're doing "special" chars. | 141 // we're doing "special" chars. |
| 135 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || | 142 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || |
| 136 // Additionally allow control characters if requested. | 143 // Additionally allow control characters if requested. |
| 137 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { | 144 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { |
| 138 // Use the unescaped version of the character. | 145 // Use the unescaped version of the character. |
| 146 size_t length_before_append = result.length(); |
| 139 result.push_back(value); | 147 result.push_back(value); |
| 140 i += 2; | 148 i += 2; |
| 149 |
| 150 // Adjust offset to match length change. |
| 151 if (*offset_for_adjustment != std::string::npos) { |
| 152 if (*offset_for_adjustment > (length_before_append + 2)) |
| 153 *offset_for_adjustment -= 2; |
| 154 else if (*offset_for_adjustment > length_before_append) |
| 155 *offset_for_adjustment = std::string::npos; |
| 156 } |
| 141 } else { | 157 } else { |
| 142 // Keep escaped. Append a percent and we'll get the following two | 158 // Keep escaped. Append a percent and we'll get the following two |
| 143 // digits on the next loops through. | 159 // digits on the next loops through. |
| 144 result.push_back('%'); | 160 result.push_back('%'); |
| 145 } | 161 } |
| 146 } else { | 162 } else { |
| 147 // Invalid escape sequence, just pass the percent through and continue | 163 // Invalid escape sequence, just pass the percent through and continue |
| 148 // right after it. | 164 // right after it. |
| 149 result.push_back('%'); | 165 result.push_back('%'); |
| 150 } | 166 } |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 224 if (!base::WideToCodepage(text, codepage, | 240 if (!base::WideToCodepage(text, codepage, |
| 225 base::OnStringConversionError::SKIP, &encoded)) | 241 base::OnStringConversionError::SKIP, &encoded)) |
| 226 return false; | 242 return false; |
| 227 | 243 |
| 228 // It's safe to use UTF8ToWide here because Escape should only return | 244 // It's safe to use UTF8ToWide here because Escape should only return |
| 229 // alphanumerics and !'()*-._~ | 245 // alphanumerics and !'()*-._~ |
| 230 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); | 246 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); |
| 231 return true; | 247 return true; |
| 232 } | 248 } |
| 233 | 249 |
| 234 std::wstring UnescapeAndDecodeURLComponent(const std::string& text, | 250 std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text, |
| 235 const char* codepage, | 251 UnescapeRule::Type rules, |
| 236 UnescapeRule::Type rules) { | 252 size_t* offset_for_adjustment) { |
| 237 std::wstring result; | 253 std::wstring result; |
| 238 if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage, | 254 size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0; |
| 239 base::OnStringConversionError::FAIL, &result)) | 255 if (base::CodepageToWideAndAdjustOffset( |
| 256 UnescapeURLImpl(text, rules, offset_for_adjustment), |
| 257 "UTF-8", base::OnStringConversionError::FAIL, &result, |
| 258 offset_for_adjustment)) |
| 240 return result; // Character set looks like it's valid. | 259 return result; // Character set looks like it's valid. |
| 241 return UTF8ToWide(text); // Return the escaped version when it's not. | 260 |
| 261 // Not valid. Return the escaped version. Undo our changes to |
| 262 // |offset_for_adjustment| since we haven't changed the string after all. |
| 263 if (offset_for_adjustment) |
| 264 *offset_for_adjustment = original_offset; |
| 265 return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment); |
| 242 } | 266 } |
| 243 | 267 |
| 244 std::string UnescapeURLComponent(const std::string& escaped_text, | 268 std::string UnescapeURLComponent(const std::string& escaped_text, |
| 245 UnescapeRule::Type rules) { | 269 UnescapeRule::Type rules) { |
| 246 return UnescapeURLImpl(escaped_text, rules); | 270 return UnescapeURLImpl(escaped_text, rules, NULL); |
| 247 } | 271 } |
| 248 | 272 |
| 249 template <class str> | 273 template <class str> |
| 250 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 274 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
| 251 static const struct { | 275 static const struct { |
| 252 char key; | 276 char key; |
| 253 const char *replacement; | 277 const char *replacement; |
| 254 } kCharsToEscape[] = { | 278 } kCharsToEscape[] = { |
| 255 { '<', "<" }, | 279 { '<', "<" }, |
| 256 { '>', ">" }, | 280 { '>', ">" }, |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 290 return result; | 314 return result; |
| 291 } | 315 } |
| 292 | 316 |
| 293 std::string EscapeForHTML(const std::string& input) { | 317 std::string EscapeForHTML(const std::string& input) { |
| 294 return EscapeForHTMLImpl(input); | 318 return EscapeForHTMLImpl(input); |
| 295 } | 319 } |
| 296 | 320 |
| 297 std::wstring EscapeForHTML(const std::wstring& input) { | 321 std::wstring EscapeForHTML(const std::wstring& input) { |
| 298 return EscapeForHTMLImpl(input); | 322 return EscapeForHTMLImpl(input); |
| 299 } | 323 } |
| OLD | NEW |