OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
6 | 6 |
7 #include <algorithm> | |
8 #include <memory> | |
9 | |
10 #include "base/logging.h" | 7 #include "base/logging.h" |
11 #include "base/strings/string_piece.h" | |
12 #include "base/strings/string_util.h" | 8 #include "base/strings/string_util.h" |
13 #include "base/strings/utf_offset_string_conversions.h" | |
14 #include "base/strings/utf_string_conversions.h" | 9 #include "base/strings/utf_string_conversions.h" |
15 | 10 |
16 namespace net { | 11 namespace net { |
17 | 12 |
18 namespace { | 13 namespace { |
19 | 14 |
20 const char kHexString[] = "0123456789ABCDEF"; | 15 const char kHexString[] = "0123456789ABCDEF"; |
21 inline char IntToHex(int i) { | 16 inline char IntToHex(int i) { |
22 DCHECK_GE(i, 0) << i << " not a hex value"; | 17 DCHECK_GE(i, 0) << i << " not a hex value"; |
23 DCHECK_LE(i, 15) << i << " not a hex value"; | 18 DCHECK_LE(i, 15) << i << " not a hex value"; |
(...skipping 10 matching lines...) Expand all Loading... |
34 } | 29 } |
35 | 30 |
36 uint32_t map[8]; | 31 uint32_t map[8]; |
37 }; | 32 }; |
38 | 33 |
39 // Given text to escape and a Charmap defining which values to escape, | 34 // Given text to escape and a Charmap defining which values to escape, |
40 // return an escaped string. If use_plus is true, spaces are converted | 35 // return an escaped string. If use_plus is true, spaces are converted |
41 // to +, otherwise, if spaces are in the charmap, they are converted to | 36 // to +, otherwise, if spaces are in the charmap, they are converted to |
42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if | 37 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if |
43 // '%' is in the charmap, it is converted to %25. | 38 // '%' is in the charmap, it is converted to %25. |
44 std::string Escape(const std::string& text, | 39 std::string Escape(base::StringPiece text, |
45 const Charmap& charmap, | 40 const Charmap& charmap, |
46 bool use_plus, | 41 bool use_plus, |
47 bool keep_escaped = false) { | 42 bool keep_escaped = false) { |
48 std::string escaped; | 43 std::string escaped; |
49 escaped.reserve(text.length() * 3); | 44 escaped.reserve(text.length() * 3); |
50 for (unsigned int i = 0; i < text.length(); ++i) { | 45 for (unsigned int i = 0; i < text.length(); ++i) { |
51 unsigned char c = static_cast<unsigned char>(text[i]); | 46 unsigned char c = static_cast<unsigned char>(text[i]); |
52 if (use_plus && ' ' == c) { | 47 if (use_plus && ' ' == c) { |
53 escaped.push_back('+'); | 48 escaped.push_back('+'); |
54 } else if (keep_escaped && '%' == c && i + 2 < text.length() && | 49 } else if (keep_escaped && '%' == c && i + 2 < text.length() && |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, | 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
100 // ` a b c d e f g h i j k l m n o | 95 // ` a b c d e f g h i j k l m n o |
101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 96 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
102 // p q r s t u v w x y z { | } ~ <NBSP> | 97 // p q r s t u v w x y z { | } ~ <NBSP> |
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 | 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
104 }; | 99 }; |
105 | 100 |
106 // Attempts to unescape the sequence at |index| within |escaped_text|. If | 101 // Attempts to unescape the sequence at |index| within |escaped_text|. If |
107 // successful, sets |value| to the unescaped value. Returns whether | 102 // successful, sets |value| to the unescaped value. Returns whether |
108 // unescaping succeeded. | 103 // unescaping succeeded. |
109 template<typename STR> | 104 template <typename STR> |
110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, | 105 bool UnescapeUnsignedCharAtIndex(STR escaped_text, |
111 size_t index, | 106 size_t index, |
112 unsigned char* value) { | 107 unsigned char* value) { |
113 if ((index + 2) >= escaped_text.size()) | 108 if ((index + 2) >= escaped_text.size()) |
114 return false; | 109 return false; |
115 if (escaped_text[index] != '%') | 110 if (escaped_text[index] != '%') |
116 return false; | 111 return false; |
117 const typename STR::value_type most_sig_digit( | 112 const typename STR::value_type most_sig_digit( |
118 static_cast<typename STR::value_type>(escaped_text[index + 1])); | 113 static_cast<typename STR::value_type>(escaped_text[index + 1])); |
119 const typename STR::value_type least_sig_digit( | 114 const typename STR::value_type least_sig_digit( |
120 static_cast<typename STR::value_type>(escaped_text[index + 2])); | 115 static_cast<typename STR::value_type>(escaped_text[index + 2])); |
121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { | 116 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { |
122 *value = base::HexDigitToInt(most_sig_digit) * 16 + | 117 *value = base::HexDigitToInt(most_sig_digit) * 16 + |
123 base::HexDigitToInt(least_sig_digit); | 118 base::HexDigitToInt(least_sig_digit); |
124 return true; | 119 return true; |
125 } | 120 } |
126 return false; | 121 return false; |
127 } | 122 } |
128 | 123 |
129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| | 124 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| |
130 // is the byte at |index|. | 125 // is the byte at |index|. |
131 template<typename STR> | 126 template <typename STR> |
132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, | 127 bool HasArabicLanguageMarkAtIndex(STR escaped_text, |
133 unsigned char first_byte, | 128 unsigned char first_byte, |
134 size_t index) { | 129 size_t index) { |
135 if (first_byte != 0xD8) | 130 if (first_byte != 0xD8) |
136 return false; | 131 return false; |
137 unsigned char second_byte; | 132 unsigned char second_byte; |
138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) | 133 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
139 return false; | 134 return false; |
140 return second_byte == 0x9c; | 135 return second_byte == 0x9c; |
141 } | 136 } |
142 | 137 |
143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the | 138 // Returns true if there is a BiDi control char at |index|. |first_byte| is the |
144 // byte at |index|. | 139 // byte at |index|. |
145 template<typename STR> | 140 template <typename STR> |
146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, | 141 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text, |
147 unsigned char first_byte, | 142 unsigned char first_byte, |
148 size_t index) { | 143 size_t index) { |
149 if (first_byte != 0xE2) | 144 if (first_byte != 0xE2) |
150 return false; | 145 return false; |
151 unsigned char second_byte; | 146 unsigned char second_byte; |
152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) | 147 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
153 return false; | 148 return false; |
154 if (second_byte != 0x80 && second_byte != 0x81) | 149 if (second_byte != 0x80 && second_byte != 0x81) |
155 return false; | 150 return false; |
156 unsigned char third_byte; | 151 unsigned char third_byte; |
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) | 152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
158 return false; | 153 return false; |
159 if (second_byte == 0x80) { | 154 if (second_byte == 0x80) { |
160 return third_byte == 0x8E || | 155 return third_byte == 0x8E || |
161 third_byte == 0x8F || | 156 third_byte == 0x8F || |
162 (third_byte >= 0xAA && third_byte <= 0xAE); | 157 (third_byte >= 0xAA && third_byte <= 0xAE); |
163 } | 158 } |
164 return third_byte >= 0xA6 && third_byte <= 0xA9; | 159 return third_byte >= 0xA6 && third_byte <= 0xA9; |
165 } | 160 } |
166 | 161 |
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is | 162 // Returns true if there is a four-byte banned char at |index|. |first_byte| is |
168 // the byte at |index|. | 163 // the byte at |index|. |
169 template <typename STR> | 164 template <typename STR> |
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text, | 165 bool HasFourByteBannedCharAtIndex(STR escaped_text, |
171 unsigned char first_byte, | 166 unsigned char first_byte, |
172 size_t index) { | 167 size_t index) { |
173 // The following characters are blacklisted for spoofability concerns. | 168 // The following characters are blacklisted for spoofability concerns. |
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) | 169 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) | 170 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
176 // U+1F512 LOCK (%F0%9F%94%92) | 171 // U+1F512 LOCK (%F0%9F%94%92) |
177 // U+1F513 OPEN LOCK (%F0%9F%94%93) | 172 // U+1F513 OPEN LOCK (%F0%9F%94%93) |
178 if (first_byte != 0xF0) | 173 if (first_byte != 0xF0) |
179 return false; | 174 return false; |
180 | 175 |
(...skipping 13 matching lines...) Expand all Loading... |
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && | 189 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && |
195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || | 190 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || |
196 fourth_byte == 0x93); | 191 fourth_byte == 0x93); |
197 } | 192 } |
198 | 193 |
199 // Unescapes |escaped_text| according to |rules|, returning the resulting | 194 // Unescapes |escaped_text| according to |rules|, returning the resulting |
200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects | 195 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
201 // the alterations done to the string that are not one-character-to-one- | 196 // the alterations done to the string that are not one-character-to-one- |
202 // character. The resulting |adjustments| will always be sorted by increasing | 197 // character. The resulting |adjustments| will always be sorted by increasing |
203 // offset. | 198 // offset. |
204 template<typename STR> | 199 template <typename STR> |
205 STR UnescapeURLWithAdjustmentsImpl( | 200 STR UnescapeURLWithAdjustmentsImpl( |
206 const STR& escaped_text, | 201 base::BasicStringPiece<STR> escaped_text, |
207 UnescapeRule::Type rules, | 202 UnescapeRule::Type rules, |
208 base::OffsetAdjuster::Adjustments* adjustments) { | 203 base::OffsetAdjuster::Adjustments* adjustments) { |
209 if (adjustments) | 204 if (adjustments) |
210 adjustments->clear(); | 205 adjustments->clear(); |
211 // Do not unescape anything, return the |escaped_text| text. | 206 // Do not unescape anything, return the |escaped_text| text. |
212 if (rules == UnescapeRule::NONE) | 207 if (rules == UnescapeRule::NONE) |
213 return escaped_text; | 208 return escaped_text.as_string(); |
214 | 209 |
215 // The output of the unescaping is always smaller than the input, so we can | 210 // The output of the unescaping is always smaller than the input, so we can |
216 // reserve the input size to make sure we have enough buffer and don't have | 211 // reserve the input size to make sure we have enough buffer and don't have |
217 // to allocate in the loop below. | 212 // to allocate in the loop below. |
218 STR result; | 213 STR result; |
219 result.reserve(escaped_text.length()); | 214 result.reserve(escaped_text.length()); |
220 | 215 |
221 // Locations of adjusted text. | 216 // Locations of adjusted text. |
222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 217 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { | 218 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
258 // U+1F513 OPEN LOCK (%F0%9F%94%93) | 253 // U+1F513 OPEN LOCK (%F0%9F%94%93) |
259 // | 254 // |
260 // However, some schemes such as data: and file: need to parse the exact | 255 // However, some schemes such as data: and file: need to parse the exact |
261 // binary data when loading the URL. For that reason, | 256 // binary data when loading the URL. For that reason, |
262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. | 257 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. |
263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be | 258 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be |
264 // displayed in the UI. | 259 // displayed in the UI. |
265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { | 260 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { |
266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { | 261 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
267 // Keep Arabic Language Mark escaped. | 262 // Keep Arabic Language Mark escaped. |
268 result.append(escaped_text, i, 6); | 263 escaped_text.substr(i, 6).AppendToString(&result); |
269 i += 5; | 264 i += 5; |
270 continue; | 265 continue; |
271 } | 266 } |
272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { | 267 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { |
273 // Keep BiDi control char escaped. | 268 // Keep BiDi control char escaped. |
274 result.append(escaped_text, i, 9); | 269 escaped_text.substr(i, 9).AppendToString(&result); |
275 i += 8; | 270 i += 8; |
276 continue; | 271 continue; |
277 } | 272 } |
278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { | 273 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { |
279 // Keep banned char escaped. | 274 // Keep banned char escaped. |
280 result.append(escaped_text, i, 12); | 275 escaped_text.substr(i, 12).AppendToString(&result); |
281 i += 11; | 276 i += 11; |
282 continue; | 277 continue; |
283 } | 278 } |
284 } | 279 } |
285 | 280 |
286 if (first_byte >= 0x80 || // Unescape all high-bit characters. | 281 if (first_byte >= 0x80 || // Unescape all high-bit characters. |
287 // For 7-bit characters, the lookup table tells us all valid chars. | 282 // For 7-bit characters, the lookup table tells us all valid chars. |
288 (kUrlUnescape[first_byte] || | 283 (kUrlUnescape[first_byte] || |
289 // ...and we allow some additional unescaping when flags are set. | 284 // ...and we allow some additional unescaping when flags are set. |
290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || | 285 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
338 while (*p) | 333 while (*p) |
339 output->push_back(*p++); | 334 output->push_back(*p++); |
340 break; | 335 break; |
341 } | 336 } |
342 } | 337 } |
343 if (k == arraysize(kCharsToEscape)) | 338 if (k == arraysize(kCharsToEscape)) |
344 output->push_back(c); | 339 output->push_back(c); |
345 } | 340 } |
346 | 341 |
347 template <class str> | 342 template <class str> |
348 str EscapeForHTMLImpl(const str& input) { | 343 str EscapeForHTMLImpl(base::BasicStringPiece<str> input) { |
349 str result; | 344 str result; |
350 result.reserve(input.size()); // Optimize for no escaping. | 345 result.reserve(input.size()); // Optimize for no escaping. |
351 | 346 |
352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i) | 347 for (auto c : input) { |
353 AppendEscapedCharForHTMLImpl(*i, &result); | 348 AppendEscapedCharForHTMLImpl(c, &result); |
| 349 } |
354 | 350 |
355 return result; | 351 return result; |
356 } | 352 } |
357 | 353 |
358 // Everything except alphanumerics and !'()*-._~ | 354 // Everything except alphanumerics and !'()*-._~ |
359 // See RFC 2396 for the list of reserved characters. | 355 // See RFC 2396 for the list of reserved characters. |
360 static const Charmap kQueryCharmap = {{ | 356 static const Charmap kQueryCharmap = {{ |
361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, | 357 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, |
362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL | 358 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL |
363 }}; | 359 }}; |
(...skipping 26 matching lines...) Expand all Loading... |
390 | 386 |
391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and | 387 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and |
392 // !'()*-._~#[] | 388 // !'()*-._~#[] |
393 static const Charmap kExternalHandlerCharmap = {{ | 389 static const Charmap kExternalHandlerCharmap = {{ |
394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, | 390 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, |
395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL | 391 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL |
396 }}; | 392 }}; |
397 | 393 |
398 } // namespace | 394 } // namespace |
399 | 395 |
400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { | 396 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) { |
401 return Escape(text, kQueryCharmap, use_plus); | 397 return Escape(text, kQueryCharmap, use_plus); |
402 } | 398 } |
403 | 399 |
404 std::string EscapePath(const std::string& path) { | 400 std::string EscapePath(base::StringPiece path) { |
405 return Escape(path, kPathCharmap, false); | 401 return Escape(path, kPathCharmap, false); |
406 } | 402 } |
407 | 403 |
408 #if defined(OS_MACOSX) | 404 #if defined(OS_MACOSX) |
409 std::string EscapeNSURLPrecursor(const std::string& precursor) { | 405 std::string EscapeNSURLPrecursor(base::StringPiece precursor) { |
410 return Escape(precursor, kNSURLCharmap, false, true); | 406 return Escape(precursor, kNSURLCharmap, false, true); |
411 } | 407 } |
412 #endif // defined(OS_MACOSX) | 408 #endif // defined(OS_MACOSX) |
413 | 409 |
414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) { | 410 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) { |
415 return Escape(path, kUrlEscape, use_plus); | 411 return Escape(path, kUrlEscape, use_plus); |
416 } | 412 } |
417 | 413 |
418 std::string EscapeNonASCII(const std::string& input) { | 414 std::string EscapeNonASCII(base::StringPiece input) { |
419 return Escape(input, kNonASCIICharmap, false); | 415 return Escape(input, kNonASCIICharmap, false); |
420 } | 416 } |
421 | 417 |
422 std::string EscapeExternalHandlerValue(const std::string& text) { | 418 std::string EscapeExternalHandlerValue(base::StringPiece text) { |
423 return Escape(text, kExternalHandlerCharmap, false, true); | 419 return Escape(text, kExternalHandlerCharmap, false, true); |
424 } | 420 } |
425 | 421 |
426 void AppendEscapedCharForHTML(char c, std::string* output) { | 422 void AppendEscapedCharForHTML(char c, std::string* output) { |
427 AppendEscapedCharForHTMLImpl(c, output); | 423 AppendEscapedCharForHTMLImpl(c, output); |
428 } | 424 } |
429 | 425 |
430 std::string EscapeForHTML(const std::string& input) { | 426 std::string EscapeForHTML(base::StringPiece input) { |
431 return EscapeForHTMLImpl(input); | 427 return EscapeForHTMLImpl(input); |
432 } | 428 } |
433 | 429 |
434 base::string16 EscapeForHTML(const base::string16& input) { | 430 base::string16 EscapeForHTML(base::StringPiece16 input) { |
435 return EscapeForHTMLImpl(input); | 431 return EscapeForHTMLImpl(input); |
436 } | 432 } |
437 | 433 |
438 std::string UnescapeURLComponent(const std::string& escaped_text, | 434 std::string UnescapeURLComponent(base::StringPiece escaped_text, |
439 UnescapeRule::Type rules) { | 435 UnescapeRule::Type rules) { |
440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); | 436 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); |
441 } | 437 } |
442 | 438 |
443 base::string16 UnescapeURLComponent(const base::string16& escaped_text, | 439 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text, |
444 UnescapeRule::Type rules) { | 440 UnescapeRule::Type rules) { |
445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); | 441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); |
446 } | 442 } |
447 | 443 |
448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, | 444 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text, |
449 UnescapeRule::Type rules) { | 445 UnescapeRule::Type rules) { |
450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); | 446 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); |
451 } | 447 } |
452 | 448 |
453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( | 449 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
454 const std::string& text, | 450 base::StringPiece text, |
455 UnescapeRule::Type rules, | 451 UnescapeRule::Type rules, |
456 base::OffsetAdjuster::Adjustments* adjustments) { | 452 base::OffsetAdjuster::Adjustments* adjustments) { |
457 base::string16 result; | 453 base::string16 result; |
458 base::OffsetAdjuster::Adjustments unescape_adjustments; | 454 base::OffsetAdjuster::Adjustments unescape_adjustments; |
459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( | 455 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( |
460 text, rules, &unescape_adjustments)); | 456 text, rules, &unescape_adjustments)); |
461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), | 457 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), |
462 unescaped_url.length(), | 458 unescaped_url.length(), |
463 &result, adjustments)) { | 459 &result, adjustments)) { |
464 // Character set looks like it's valid. | 460 // Character set looks like it's valid. |
465 if (adjustments) { | 461 if (adjustments) { |
466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, | 462 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, |
467 adjustments); | 463 adjustments); |
468 } | 464 } |
469 return result; | 465 return result; |
470 } | 466 } |
471 // Character set is not valid. Return the escaped version. | 467 // Character set is not valid. Return the escaped version. |
472 return base::UTF8ToUTF16WithAdjustments(text, adjustments); | 468 return base::UTF8ToUTF16WithAdjustments(text, adjustments); |
473 } | 469 } |
474 | 470 |
475 base::string16 UnescapeForHTML(const base::string16& input) { | 471 base::string16 UnescapeForHTML(base::StringPiece16 input) { |
476 static const struct { | 472 static const struct { |
477 const char* ampersand_code; | 473 const char* ampersand_code; |
478 const char replacement; | 474 const char replacement; |
479 } kEscapeToChars[] = { | 475 } kEscapeToChars[] = { |
480 { "<", '<' }, | 476 { "<", '<' }, |
481 { ">", '>' }, | 477 { ">", '>' }, |
482 { "&", '&' }, | 478 { "&", '&' }, |
483 { """, '"' }, | 479 { """, '"' }, |
484 { "'", '\''}, | 480 { "'", '\''}, |
485 }; | 481 }; |
486 | 482 |
487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) | 483 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) |
488 return input; | 484 return input.as_string(); |
489 | 485 |
490 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; | 486 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; |
491 base::string16 text(input); | 487 base::string16 text = input.as_string(); |
492 for (base::string16::iterator iter = text.begin(); | 488 for (base::string16::iterator iter = text.begin(); |
493 iter != text.end(); ++iter) { | 489 iter != text.end(); ++iter) { |
494 if (*iter == '&') { | 490 if (*iter == '&') { |
495 // Potential ampersand encode char. | 491 // Potential ampersand encode char. |
496 size_t index = iter - text.begin(); | 492 size_t index = iter - text.begin(); |
497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { | 493 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { |
498 if (ampersand_chars[i].empty()) { | 494 if (ampersand_chars[i].empty()) { |
499 ampersand_chars[i] = | 495 ampersand_chars[i] = |
500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); | 496 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); |
501 } | 497 } |
502 if (text.find(ampersand_chars[i], index) == index) { | 498 if (text.find(ampersand_chars[i], index) == index) { |
503 text.replace(iter, iter + ampersand_chars[i].length(), | 499 text.replace(iter, iter + ampersand_chars[i].length(), |
504 1, kEscapeToChars[i].replacement); | 500 1, kEscapeToChars[i].replacement); |
505 break; | 501 break; |
506 } | 502 } |
507 } | 503 } |
508 } | 504 } |
509 } | 505 } |
510 return text; | 506 return text; |
511 } | 507 } |
512 | 508 |
513 } // namespace net | 509 } // namespace net |
OLD | NEW |