| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/scoped_ptr.h" | 10 #include "base/scoped_ptr.h" |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
| 99 }; | 99 }; |
| 100 | 100 |
| 101 template<typename STR> | 101 template<typename STR> |
| 102 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, | 102 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
| 103 UnescapeRule::Type rules, | 103 UnescapeRule::Type rules, |
| 104 std::vector<size_t>* offsets_for_adjustment) { | 104 std::vector<size_t>* offsets_for_adjustment) { |
| 105 if (offsets_for_adjustment) { | 105 if (offsets_for_adjustment) { |
| 106 std::for_each(offsets_for_adjustment->begin(), | 106 std::for_each(offsets_for_adjustment->begin(), |
| 107 offsets_for_adjustment->end(), | 107 offsets_for_adjustment->end(), |
| 108 LimitOffset<std::wstring>(escaped_text.length())); | 108 LimitOffset<STR>(escaped_text.length())); |
| 109 } | 109 } |
| 110 // Do not unescape anything, return the |escaped_text| text. | 110 // Do not unescape anything, return the |escaped_text| text. |
| 111 if (rules == UnescapeRule::NONE) | 111 if (rules == UnescapeRule::NONE) |
| 112 return escaped_text; | 112 return escaped_text; |
| 113 | 113 |
| 114 // The output of the unescaping is always smaller than the input, so we can | 114 // The output of the unescaping is always smaller than the input, so we can |
| 115 // reserve the input size to make sure we have enough buffer and don't have | 115 // reserve the input size to make sure we have enough buffer and don't have |
| 116 // to allocate in the loop below. | 116 // to allocate in the loop below. |
| 117 STR result; | 117 STR result; |
| 118 result.reserve(escaped_text.length()); | 118 result.reserve(escaped_text.length()); |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 // Make offset adjustment. | 170 // Make offset adjustment. |
| 171 if (offsets_for_adjustment && !adjustments.empty()) { | 171 if (offsets_for_adjustment && !adjustments.empty()) { |
| 172 std::for_each(offsets_for_adjustment->begin(), | 172 std::for_each(offsets_for_adjustment->begin(), |
| 173 offsets_for_adjustment->end(), | 173 offsets_for_adjustment->end(), |
| 174 AdjustEncodingOffset(adjustments)); | 174 AdjustEncodingOffset(adjustments)); |
| 175 } | 175 } |
| 176 | 176 |
| 177 return result; | 177 return result; |
| 178 } | 178 } |
| 179 | 179 |
| 180 template<typename STR> | |
| 181 STR UnescapeURLImpl(const STR& escaped_text, | |
| 182 UnescapeRule::Type rules, | |
| 183 size_t* offset_for_adjustment) { | |
| 184 std::vector<size_t> offsets; | |
| 185 if (offset_for_adjustment) | |
| 186 offsets.push_back(*offset_for_adjustment); | |
| 187 STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets); | |
| 188 if (offset_for_adjustment) | |
| 189 *offset_for_adjustment = offsets[0]; | |
| 190 return result; | |
| 191 } | |
| 192 | |
| 193 } // namespace | 180 } // namespace |
| 194 | 181 |
| 195 // Everything except alphanumerics and !'()*-._~ | 182 // Everything except alphanumerics and !'()*-._~ |
| 196 // See RFC 2396 for the list of reserved characters. | 183 // See RFC 2396 for the list of reserved characters. |
| 197 static const Charmap kQueryCharmap( | 184 static const Charmap kQueryCharmap( |
| 198 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, | 185 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, |
| 199 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); | 186 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); |
| 200 | 187 |
| 201 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { | 188 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { |
| 202 return Escape(text, kQueryCharmap, use_plus); | 189 return Escape(text, kQueryCharmap, use_plus); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); | 231 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); |
| 245 | 232 |
| 246 std::string EscapeExternalHandlerValue(const std::string& text) { | 233 std::string EscapeExternalHandlerValue(const std::string& text) { |
| 247 return Escape(text, kExternalHandlerCharmap, false); | 234 return Escape(text, kExternalHandlerCharmap, false); |
| 248 } | 235 } |
| 249 | 236 |
| 250 string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( | 237 string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( |
| 251 const std::string& text, | 238 const std::string& text, |
| 252 UnescapeRule::Type rules, | 239 UnescapeRule::Type rules, |
| 253 std::vector<size_t>* offsets_for_adjustment) { | 240 std::vector<size_t>* offsets_for_adjustment) { |
| 254 std::wstring result; | 241 string16 result; |
| 255 std::vector<size_t> original_offsets; | 242 std::vector<size_t> original_offsets; |
| 256 if (offsets_for_adjustment) | 243 if (offsets_for_adjustment) |
| 257 original_offsets = *offsets_for_adjustment; | 244 original_offsets = *offsets_for_adjustment; |
| 258 std::string unescaped_url( | 245 std::string unescaped_url( |
| 259 UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); | 246 UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); |
| 260 if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), | 247 if (UTF8ToUTF16AndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), |
| 261 &result, offsets_for_adjustment)) | 248 &result, offsets_for_adjustment)) |
| 262 return WideToUTF16Hack(result); // Character set looks like it's valid. | 249 return result; // Character set looks like it's valid. |
| 263 | 250 |
| 264 // Not valid. Return the escaped version. Undo our changes to | 251 // Not valid. Return the escaped version. Undo our changes to |
| 265 // |offset_for_adjustment| since we haven't changed the string after all. | 252 // |offset_for_adjustment| since we haven't changed the string after all. |
| 266 if (offsets_for_adjustment) | 253 if (offsets_for_adjustment) |
| 267 *offsets_for_adjustment = original_offsets; | 254 *offsets_for_adjustment = original_offsets; |
| 268 return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets( | 255 return UTF8ToUTF16AndAdjustOffsets(text, offsets_for_adjustment); |
| 269 text, offsets_for_adjustment)); | |
| 270 } | 256 } |
| 271 | 257 |
| 272 string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, | 258 string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, |
| 273 UnescapeRule::Type rules, | 259 UnescapeRule::Type rules, |
| 274 size_t* offset_for_adjustment) { | 260 size_t* offset_for_adjustment) { |
| 275 std::vector<size_t> offsets; | 261 std::vector<size_t> offsets; |
| 276 if (offset_for_adjustment) | 262 if (offset_for_adjustment) |
| 277 offsets.push_back(*offset_for_adjustment); | 263 offsets.push_back(*offset_for_adjustment); |
| 278 string16 result = | 264 string16 result = |
| 279 UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); | 265 UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); |
| 280 if (offset_for_adjustment) | 266 if (offset_for_adjustment) |
| 281 *offset_for_adjustment = offsets[0]; | 267 *offset_for_adjustment = offsets[0]; |
| 282 return result; | 268 return result; |
| 283 } | 269 } |
| 284 | 270 |
| 285 std::string UnescapeURLComponent(const std::string& escaped_text, | 271 std::string UnescapeURLComponent(const std::string& escaped_text, |
| 286 UnescapeRule::Type rules) { | 272 UnescapeRule::Type rules) { |
| 287 return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL); | 273 return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); |
| 288 } | 274 } |
| 289 | 275 |
| 290 string16 UnescapeURLComponent(const string16& escaped_text, | 276 string16 UnescapeURLComponent(const string16& escaped_text, |
| 291 UnescapeRule::Type rules) { | 277 UnescapeRule::Type rules) { |
| 292 return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL); | 278 return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); |
| 293 } | 279 } |
| 294 | 280 |
| 295 | 281 |
| 296 template <class str> | 282 template <class str> |
| 297 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 283 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
| 298 static const struct { | 284 static const struct { |
| 299 char key; | 285 char key; |
| 300 const char* replacement; | 286 const char* replacement; |
| 301 } kCharsToEscape[] = { | 287 } kCharsToEscape[] = { |
| 302 { '<', "<" }, | 288 { '<', "<" }, |
| (...skipping 12 matching lines...) Expand all Loading... |
| 315 } | 301 } |
| 316 } | 302 } |
| 317 if (k == ARRAYSIZE_UNSAFE(kCharsToEscape)) | 303 if (k == ARRAYSIZE_UNSAFE(kCharsToEscape)) |
| 318 output->push_back(c); | 304 output->push_back(c); |
| 319 } | 305 } |
| 320 | 306 |
| 321 void AppendEscapedCharForHTML(char c, std::string* output) { | 307 void AppendEscapedCharForHTML(char c, std::string* output) { |
| 322 AppendEscapedCharForHTMLImpl(c, output); | 308 AppendEscapedCharForHTMLImpl(c, output); |
| 323 } | 309 } |
| 324 | 310 |
| 325 void AppendEscapedCharForHTML(wchar_t c, string16* output) { | |
| 326 AppendEscapedCharForHTMLImpl(c, output); | |
| 327 } | |
| 328 | |
| 329 template <class str> | 311 template <class str> |
| 330 str EscapeForHTMLImpl(const str& input) { | 312 str EscapeForHTMLImpl(const str& input) { |
| 331 str result; | 313 str result; |
| 332 result.reserve(input.size()); // optimize for no escaping | 314 result.reserve(input.size()); // optimize for no escaping |
| 333 | 315 |
| 334 for (typename str::const_iterator it = input.begin(); it != input.end(); ++it) | 316 for (typename str::const_iterator it = input.begin(); it != input.end(); ++it) |
| 335 AppendEscapedCharForHTMLImpl(*it, &result); | 317 AppendEscapedCharForHTMLImpl(*it, &result); |
| 336 | 318 |
| 337 return result; | 319 return result; |
| 338 } | 320 } |
| 339 | 321 |
| 340 std::string EscapeForHTML(const std::string& input) { | 322 std::string EscapeForHTML(const std::string& input) { |
| 341 return EscapeForHTMLImpl(input); | 323 return EscapeForHTMLImpl(input); |
| 342 } | 324 } |
| 343 | 325 |
| 344 string16 EscapeForHTML(const string16& input) { | 326 string16 EscapeForHTML(const string16& input) { |
| 345 return EscapeForHTMLImpl(input); | 327 return EscapeForHTMLImpl(input); |
| 346 } | 328 } |
| 347 | 329 |
| 348 string16 UnescapeForHTML(const string16& input) { | 330 string16 UnescapeForHTML(const string16& input) { |
| 349 static const struct { | 331 static const struct { |
| 350 const wchar_t* ampersand_code; | 332 const char* ampersand_code; |
| 351 const char replacement; | 333 const char replacement; |
| 352 } kEscapeToChars[] = { | 334 } kEscapeToChars[] = { |
| 353 { L"<", '<' }, | 335 { "<", '<' }, |
| 354 { L">", '>' }, | 336 { ">", '>' }, |
| 355 { L"&", '&' }, | 337 { "&", '&' }, |
| 356 { L""", '"' }, | 338 { """, '"' }, |
| 357 { L"'", '\''}, | 339 { "'", '\''}, |
| 358 }; | 340 }; |
| 359 | 341 |
| 360 if (input.find(WideToUTF16(L"&")) == std::string::npos) | 342 if (input.find(ASCIIToUTF16("&")) == std::string::npos) |
| 361 return input; | 343 return input; |
| 362 | 344 |
| 363 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; | 345 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; |
| 364 string16 text(input); | 346 string16 text(input); |
| 365 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { | 347 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { |
| 366 if (*iter == '&') { | 348 if (*iter == '&') { |
| 367 // Potential ampersand encode char. | 349 // Potential ampersand encode char. |
| 368 size_t index = iter - text.begin(); | 350 size_t index = iter - text.begin(); |
| 369 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { | 351 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { |
| 370 if (ampersand_chars[i].empty()) | 352 if (ampersand_chars[i].empty()) |
| 371 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); | 353 ampersand_chars[i] = ASCIIToUTF16(kEscapeToChars[i].ampersand_code); |
| 372 if (text.find(ampersand_chars[i], index) == index) { | 354 if (text.find(ampersand_chars[i], index) == index) { |
| 373 text.replace(iter, iter + ampersand_chars[i].length(), | 355 text.replace(iter, iter + ampersand_chars[i].length(), |
| 374 1, kEscapeToChars[i].replacement); | 356 1, kEscapeToChars[i].replacement); |
| 375 break; | 357 break; |
| 376 } | 358 } |
| 377 } | 359 } |
| 378 } | 360 } |
| 379 } | 361 } |
| 380 return text; | 362 return text; |
| 381 } | 363 } |
| (...skipping 14 matching lines...) Expand all Loading... |
| 396 return; | 378 return; |
| 397 } | 379 } |
| 398 if (offset <= (location + 2)) { | 380 if (offset <= (location + 2)) { |
| 399 offset = string16::npos; | 381 offset = string16::npos; |
| 400 return; | 382 return; |
| 401 } | 383 } |
| 402 adjusted_offset -= 2; | 384 adjusted_offset -= 2; |
| 403 } | 385 } |
| 404 offset = adjusted_offset; | 386 offset = adjusted_offset; |
| 405 } | 387 } |
| OLD | NEW |