OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/scoped_ptr.h" | 10 #include "base/scoped_ptr.h" |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
99 }; | 99 }; |
100 | 100 |
101 template<typename STR> | 101 template<typename STR> |
102 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, | 102 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
103 UnescapeRule::Type rules, | 103 UnescapeRule::Type rules, |
104 std::vector<size_t>* offsets_for_adjustment) { | 104 std::vector<size_t>* offsets_for_adjustment) { |
105 if (offsets_for_adjustment) { | 105 if (offsets_for_adjustment) { |
106 std::for_each(offsets_for_adjustment->begin(), | 106 std::for_each(offsets_for_adjustment->begin(), |
107 offsets_for_adjustment->end(), | 107 offsets_for_adjustment->end(), |
108 LimitOffset<std::wstring>(escaped_text.length())); | 108 LimitOffset<STR>(escaped_text.length())); |
109 } | 109 } |
110 // Do not unescape anything, return the |escaped_text| text. | 110 // Do not unescape anything, return the |escaped_text| text. |
111 if (rules == UnescapeRule::NONE) | 111 if (rules == UnescapeRule::NONE) |
112 return escaped_text; | 112 return escaped_text; |
113 | 113 |
114 // The output of the unescaping is always smaller than the input, so we can | 114 // The output of the unescaping is always smaller than the input, so we can |
115 // reserve the input size to make sure we have enough buffer and don't have | 115 // reserve the input size to make sure we have enough buffer and don't have |
116 // to allocate in the loop below. | 116 // to allocate in the loop below. |
117 STR result; | 117 STR result; |
118 result.reserve(escaped_text.length()); | 118 result.reserve(escaped_text.length()); |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 // Make offset adjustment. | 170 // Make offset adjustment. |
171 if (offsets_for_adjustment && !adjustments.empty()) { | 171 if (offsets_for_adjustment && !adjustments.empty()) { |
172 std::for_each(offsets_for_adjustment->begin(), | 172 std::for_each(offsets_for_adjustment->begin(), |
173 offsets_for_adjustment->end(), | 173 offsets_for_adjustment->end(), |
174 AdjustEncodingOffset(adjustments)); | 174 AdjustEncodingOffset(adjustments)); |
175 } | 175 } |
176 | 176 |
177 return result; | 177 return result; |
178 } | 178 } |
179 | 179 |
180 template<typename STR> | |
181 STR UnescapeURLImpl(const STR& escaped_text, | |
182 UnescapeRule::Type rules, | |
183 size_t* offset_for_adjustment) { | |
184 std::vector<size_t> offsets; | |
185 if (offset_for_adjustment) | |
186 offsets.push_back(*offset_for_adjustment); | |
187 STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets); | |
188 if (offset_for_adjustment) | |
189 *offset_for_adjustment = offsets[0]; | |
190 return result; | |
191 } | |
192 | |
193 } // namespace | 180 } // namespace |
194 | 181 |
195 // Everything except alphanumerics and !'()*-._~ | 182 // Everything except alphanumerics and !'()*-._~ |
196 // See RFC 2396 for the list of reserved characters. | 183 // See RFC 2396 for the list of reserved characters. |
197 static const Charmap kQueryCharmap( | 184 static const Charmap kQueryCharmap( |
198 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, | 185 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, |
199 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); | 186 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); |
200 | 187 |
201 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { | 188 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { |
202 return Escape(text, kQueryCharmap, use_plus); | 189 return Escape(text, kQueryCharmap, use_plus); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); | 231 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); |
245 | 232 |
246 std::string EscapeExternalHandlerValue(const std::string& text) { | 233 std::string EscapeExternalHandlerValue(const std::string& text) { |
247 return Escape(text, kExternalHandlerCharmap, false); | 234 return Escape(text, kExternalHandlerCharmap, false); |
248 } | 235 } |
249 | 236 |
250 string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( | 237 string16 UnescapeAndDecodeUTF8URLComponentWithOffsets( |
251 const std::string& text, | 238 const std::string& text, |
252 UnescapeRule::Type rules, | 239 UnescapeRule::Type rules, |
253 std::vector<size_t>* offsets_for_adjustment) { | 240 std::vector<size_t>* offsets_for_adjustment) { |
254 std::wstring result; | 241 string16 result; |
255 std::vector<size_t> original_offsets; | 242 std::vector<size_t> original_offsets; |
256 if (offsets_for_adjustment) | 243 if (offsets_for_adjustment) |
257 original_offsets = *offsets_for_adjustment; | 244 original_offsets = *offsets_for_adjustment; |
258 std::string unescaped_url( | 245 std::string unescaped_url( |
259 UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); | 246 UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); |
260 if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), | 247 if (UTF8ToUTF16AndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), |
261 &result, offsets_for_adjustment)) | 248 &result, offsets_for_adjustment)) |
262 return WideToUTF16Hack(result); // Character set looks like it's valid. | 249 return result; // Character set looks like it's valid. |
263 | 250 |
264 // Not valid. Return the escaped version. Undo our changes to | 251 // Not valid. Return the escaped version. Undo our changes to |
265 // |offset_for_adjustment| since we haven't changed the string after all. | 252 // |offset_for_adjustment| since we haven't changed the string after all. |
266 if (offsets_for_adjustment) | 253 if (offsets_for_adjustment) |
267 *offsets_for_adjustment = original_offsets; | 254 *offsets_for_adjustment = original_offsets; |
268 return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets( | 255 return UTF8ToUTF16AndAdjustOffsets(text, offsets_for_adjustment); |
269 text, offsets_for_adjustment)); | |
270 } | 256 } |
271 | 257 |
272 string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, | 258 string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, |
273 UnescapeRule::Type rules, | 259 UnescapeRule::Type rules, |
274 size_t* offset_for_adjustment) { | 260 size_t* offset_for_adjustment) { |
275 std::vector<size_t> offsets; | 261 std::vector<size_t> offsets; |
276 if (offset_for_adjustment) | 262 if (offset_for_adjustment) |
277 offsets.push_back(*offset_for_adjustment); | 263 offsets.push_back(*offset_for_adjustment); |
278 string16 result = | 264 string16 result = |
279 UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); | 265 UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); |
280 if (offset_for_adjustment) | 266 if (offset_for_adjustment) |
281 *offset_for_adjustment = offsets[0]; | 267 *offset_for_adjustment = offsets[0]; |
282 return result; | 268 return result; |
283 } | 269 } |
284 | 270 |
285 std::string UnescapeURLComponent(const std::string& escaped_text, | 271 std::string UnescapeURLComponent(const std::string& escaped_text, |
286 UnescapeRule::Type rules) { | 272 UnescapeRule::Type rules) { |
287 return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL); | 273 return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); |
288 } | 274 } |
289 | 275 |
290 string16 UnescapeURLComponent(const string16& escaped_text, | 276 string16 UnescapeURLComponent(const string16& escaped_text, |
291 UnescapeRule::Type rules) { | 277 UnescapeRule::Type rules) { |
292 return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL); | 278 return UnescapeURLWithOffsetsImpl(escaped_text, rules, NULL); |
293 } | 279 } |
294 | 280 |
295 | 281 |
296 template <class str> | 282 template <class str> |
297 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { | 283 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { |
298 static const struct { | 284 static const struct { |
299 char key; | 285 char key; |
300 const char* replacement; | 286 const char* replacement; |
301 } kCharsToEscape[] = { | 287 } kCharsToEscape[] = { |
302 { '<', "<" }, | 288 { '<', "<" }, |
(...skipping 12 matching lines...) Expand all Loading... |
315 } | 301 } |
316 } | 302 } |
317 if (k == ARRAYSIZE_UNSAFE(kCharsToEscape)) | 303 if (k == ARRAYSIZE_UNSAFE(kCharsToEscape)) |
318 output->push_back(c); | 304 output->push_back(c); |
319 } | 305 } |
320 | 306 |
321 void AppendEscapedCharForHTML(char c, std::string* output) { | 307 void AppendEscapedCharForHTML(char c, std::string* output) { |
322 AppendEscapedCharForHTMLImpl(c, output); | 308 AppendEscapedCharForHTMLImpl(c, output); |
323 } | 309 } |
324 | 310 |
325 void AppendEscapedCharForHTML(wchar_t c, string16* output) { | |
326 AppendEscapedCharForHTMLImpl(c, output); | |
327 } | |
328 | |
329 template <class str> | 311 template <class str> |
330 str EscapeForHTMLImpl(const str& input) { | 312 str EscapeForHTMLImpl(const str& input) { |
331 str result; | 313 str result; |
332 result.reserve(input.size()); // optimize for no escaping | 314 result.reserve(input.size()); // optimize for no escaping |
333 | 315 |
334 for (typename str::const_iterator it = input.begin(); it != input.end(); ++it) | 316 for (typename str::const_iterator it = input.begin(); it != input.end(); ++it) |
335 AppendEscapedCharForHTMLImpl(*it, &result); | 317 AppendEscapedCharForHTMLImpl(*it, &result); |
336 | 318 |
337 return result; | 319 return result; |
338 } | 320 } |
339 | 321 |
340 std::string EscapeForHTML(const std::string& input) { | 322 std::string EscapeForHTML(const std::string& input) { |
341 return EscapeForHTMLImpl(input); | 323 return EscapeForHTMLImpl(input); |
342 } | 324 } |
343 | 325 |
344 string16 EscapeForHTML(const string16& input) { | 326 string16 EscapeForHTML(const string16& input) { |
345 return EscapeForHTMLImpl(input); | 327 return EscapeForHTMLImpl(input); |
346 } | 328 } |
347 | 329 |
348 string16 UnescapeForHTML(const string16& input) { | 330 string16 UnescapeForHTML(const string16& input) { |
349 static const struct { | 331 static const struct { |
350 const wchar_t* ampersand_code; | 332 const char* ampersand_code; |
351 const char replacement; | 333 const char replacement; |
352 } kEscapeToChars[] = { | 334 } kEscapeToChars[] = { |
353 { L"<", '<' }, | 335 { "<", '<' }, |
354 { L">", '>' }, | 336 { ">", '>' }, |
355 { L"&", '&' }, | 337 { "&", '&' }, |
356 { L""", '"' }, | 338 { """, '"' }, |
357 { L"'", '\''}, | 339 { "'", '\''}, |
358 }; | 340 }; |
359 | 341 |
360 if (input.find(WideToUTF16(L"&")) == std::string::npos) | 342 if (input.find(ASCIIToUTF16("&")) == std::string::npos) |
361 return input; | 343 return input; |
362 | 344 |
363 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; | 345 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; |
364 string16 text(input); | 346 string16 text(input); |
365 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { | 347 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { |
366 if (*iter == '&') { | 348 if (*iter == '&') { |
367 // Potential ampersand encode char. | 349 // Potential ampersand encode char. |
368 size_t index = iter - text.begin(); | 350 size_t index = iter - text.begin(); |
369 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { | 351 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { |
370 if (ampersand_chars[i].empty()) | 352 if (ampersand_chars[i].empty()) |
371 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); | 353 ampersand_chars[i] = ASCIIToUTF16(kEscapeToChars[i].ampersand_code); |
372 if (text.find(ampersand_chars[i], index) == index) { | 354 if (text.find(ampersand_chars[i], index) == index) { |
373 text.replace(iter, iter + ampersand_chars[i].length(), | 355 text.replace(iter, iter + ampersand_chars[i].length(), |
374 1, kEscapeToChars[i].replacement); | 356 1, kEscapeToChars[i].replacement); |
375 break; | 357 break; |
376 } | 358 } |
377 } | 359 } |
378 } | 360 } |
379 } | 361 } |
380 return text; | 362 return text; |
381 } | 363 } |
(...skipping 14 matching lines...) Expand all Loading... |
396 return; | 378 return; |
397 } | 379 } |
398 if (offset <= (location + 2)) { | 380 if (offset <= (location + 2)) { |
399 offset = string16::npos; | 381 offset = string16::npos; |
400 return; | 382 return; |
401 } | 383 } |
402 adjusted_offset -= 2; | 384 adjusted_offset -= 2; |
403 } | 385 } |
404 offset = adjusted_offset; | 386 offset = adjusted_offset; |
405 } | 387 } |
OLD | NEW |