Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(187)

Side by Side Diff: net/base/escape.cc

Issue 2615633007: Change net/base/escape.h to use StringPiece. (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm>
8 #include <memory>
9
10 #include "base/logging.h" 7 #include "base/logging.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h" 8 #include "base/strings/string_util.h"
13 #include "base/strings/utf_offset_string_conversions.h"
14 #include "base/strings/utf_string_conversions.h" 9 #include "base/strings/utf_string_conversions.h"
15 10
16 namespace net { 11 namespace net {
17 12
18 namespace { 13 namespace {
19 14
20 const char kHexString[] = "0123456789ABCDEF"; 15 const char kHexString[] = "0123456789ABCDEF";
21 inline char IntToHex(int i) { 16 inline char IntToHex(int i) {
22 DCHECK_GE(i, 0) << i << " not a hex value"; 17 DCHECK_GE(i, 0) << i << " not a hex value";
23 DCHECK_LE(i, 15) << i << " not a hex value"; 18 DCHECK_LE(i, 15) << i << " not a hex value";
(...skipping 10 matching lines...) Expand all
34 } 29 }
35 30
36 uint32_t map[8]; 31 uint32_t map[8];
37 }; 32 };
38 33
39 // Given text to escape and a Charmap defining which values to escape, 34 // Given text to escape and a Charmap defining which values to escape,
40 // return an escaped string. If use_plus is true, spaces are converted 35 // return an escaped string. If use_plus is true, spaces are converted
41 // to +, otherwise, if spaces are in the charmap, they are converted to 36 // to +, otherwise, if spaces are in the charmap, they are converted to
42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if 37 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if
43 // '%' is in the charmap, it is converted to %25. 38 // '%' is in the charmap, it is converted to %25.
44 std::string Escape(const std::string& text, 39 std::string Escape(base::StringPiece text,
45 const Charmap& charmap, 40 const Charmap& charmap,
46 bool use_plus, 41 bool use_plus,
47 bool keep_escaped = false) { 42 bool keep_escaped = false) {
48 std::string escaped; 43 std::string escaped;
49 escaped.reserve(text.length() * 3); 44 escaped.reserve(text.length() * 3);
50 for (unsigned int i = 0; i < text.length(); ++i) { 45 for (unsigned int i = 0; i < text.length(); ++i) {
51 unsigned char c = static_cast<unsigned char>(text[i]); 46 unsigned char c = static_cast<unsigned char>(text[i]);
52 if (use_plus && ' ' == c) { 47 if (use_plus && ' ' == c) {
53 escaped.push_back('+'); 48 escaped.push_back('+');
54 } else if (keep_escaped && '%' == c && i + 2 < text.length() && 49 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
100 // ` a b c d e f g h i j k l m n o 95 // ` a b c d e f g h i j k l m n o
101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 96 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 // p q r s t u v w x y z { | } ~ <NBSP> 97 // p q r s t u v w x y z { | } ~ <NBSP>
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
104 }; 99 };
105 100
106 // Attempts to unescape the sequence at |index| within |escaped_text|. If 101 // Attempts to unescape the sequence at |index| within |escaped_text|. If
107 // successful, sets |value| to the unescaped value. Returns whether 102 // successful, sets |value| to the unescaped value. Returns whether
108 // unescaping succeeded. 103 // unescaping succeeded.
109 template<typename STR> 104 template <typename STR>
110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, 105 bool UnescapeUnsignedCharAtIndex(STR escaped_text,
111 size_t index, 106 size_t index,
112 unsigned char* value) { 107 unsigned char* value) {
113 if ((index + 2) >= escaped_text.size()) 108 if ((index + 2) >= escaped_text.size())
114 return false; 109 return false;
115 if (escaped_text[index] != '%') 110 if (escaped_text[index] != '%')
116 return false; 111 return false;
117 const typename STR::value_type most_sig_digit( 112 const typename STR::value_type most_sig_digit(
118 static_cast<typename STR::value_type>(escaped_text[index + 1])); 113 static_cast<typename STR::value_type>(escaped_text[index + 1]));
119 const typename STR::value_type least_sig_digit( 114 const typename STR::value_type least_sig_digit(
120 static_cast<typename STR::value_type>(escaped_text[index + 2])); 115 static_cast<typename STR::value_type>(escaped_text[index + 2]));
121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { 116 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {
122 *value = base::HexDigitToInt(most_sig_digit) * 16 + 117 *value = base::HexDigitToInt(most_sig_digit) * 16 +
123 base::HexDigitToInt(least_sig_digit); 118 base::HexDigitToInt(least_sig_digit);
124 return true; 119 return true;
125 } 120 }
126 return false; 121 return false;
127 } 122 }
128 123
129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| 124 // Returns true if there is an Arabic Language Mark at |index|. |first_byte|
130 // is the byte at |index|. 125 // is the byte at |index|.
131 template<typename STR> 126 template <typename STR>
132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, 127 bool HasArabicLanguageMarkAtIndex(STR escaped_text,
133 unsigned char first_byte, 128 unsigned char first_byte,
134 size_t index) { 129 size_t index) {
135 if (first_byte != 0xD8) 130 if (first_byte != 0xD8)
136 return false; 131 return false;
137 unsigned char second_byte; 132 unsigned char second_byte;
138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) 133 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
139 return false; 134 return false;
140 return second_byte == 0x9c; 135 return second_byte == 0x9c;
141 } 136 }
142 137
143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the 138 // Returns true if there is a BiDi control char at |index|. |first_byte| is the
144 // byte at |index|. 139 // byte at |index|.
145 template<typename STR> 140 template <typename STR>
146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, 141 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,
147 unsigned char first_byte, 142 unsigned char first_byte,
148 size_t index) { 143 size_t index) {
149 if (first_byte != 0xE2) 144 if (first_byte != 0xE2)
150 return false; 145 return false;
151 unsigned char second_byte; 146 unsigned char second_byte;
152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) 147 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
153 return false; 148 return false;
154 if (second_byte != 0x80 && second_byte != 0x81) 149 if (second_byte != 0x80 && second_byte != 0x81)
155 return false; 150 return false;
156 unsigned char third_byte; 151 unsigned char third_byte;
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) 152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
158 return false; 153 return false;
159 if (second_byte == 0x80) { 154 if (second_byte == 0x80) {
160 return third_byte == 0x8E || 155 return third_byte == 0x8E ||
161 third_byte == 0x8F || 156 third_byte == 0x8F ||
162 (third_byte >= 0xAA && third_byte <= 0xAE); 157 (third_byte >= 0xAA && third_byte <= 0xAE);
163 } 158 }
164 return third_byte >= 0xA6 && third_byte <= 0xA9; 159 return third_byte >= 0xA6 && third_byte <= 0xA9;
165 } 160 }
166 161
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is 162 // Returns true if there is a four-byte banned char at |index|. |first_byte| is
168 // the byte at |index|. 163 // the byte at |index|.
169 template <typename STR> 164 template <typename STR>
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text, 165 bool HasFourByteBannedCharAtIndex(STR escaped_text,
171 unsigned char first_byte, 166 unsigned char first_byte,
172 size_t index) { 167 size_t index) {
173 // The following characters are blacklisted for spoofability concerns. 168 // The following characters are blacklisted for spoofability concerns.
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) 169 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) 170 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
176 // U+1F512 LOCK (%F0%9F%94%92) 171 // U+1F512 LOCK (%F0%9F%94%92)
177 // U+1F513 OPEN LOCK (%F0%9F%94%93) 172 // U+1F513 OPEN LOCK (%F0%9F%94%93)
178 if (first_byte != 0xF0) 173 if (first_byte != 0xF0)
179 return false; 174 return false;
180 175
(...skipping 13 matching lines...) Expand all
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && 189 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&
195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || 190 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
196 fourth_byte == 0x93); 191 fourth_byte == 0x93);
197 } 192 }
198 193
199 // Unescapes |escaped_text| according to |rules|, returning the resulting 194 // Unescapes |escaped_text| according to |rules|, returning the resulting
200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 195 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
201 // the alterations done to the string that are not one-character-to-one- 196 // the alterations done to the string that are not one-character-to-one-
202 // character. The resulting |adjustments| will always be sorted by increasing 197 // character. The resulting |adjustments| will always be sorted by increasing
203 // offset. 198 // offset.
204 template<typename STR> 199 template <typename STR>
205 STR UnescapeURLWithAdjustmentsImpl( 200 STR UnescapeURLWithAdjustmentsImpl(
206 const STR& escaped_text, 201 base::BasicStringPiece<STR> escaped_text,
207 UnescapeRule::Type rules, 202 UnescapeRule::Type rules,
208 base::OffsetAdjuster::Adjustments* adjustments) { 203 base::OffsetAdjuster::Adjustments* adjustments) {
209 if (adjustments) 204 if (adjustments)
210 adjustments->clear(); 205 adjustments->clear();
211 // Do not unescape anything, return the |escaped_text| text. 206 // Do not unescape anything, return the |escaped_text| text.
212 if (rules == UnescapeRule::NONE) 207 if (rules == UnescapeRule::NONE)
213 return escaped_text; 208 return escaped_text.as_string();
214 209
215 // The output of the unescaping is always smaller than the input, so we can 210 // The output of the unescaping is always smaller than the input, so we can
216 // reserve the input size to make sure we have enough buffer and don't have 211 // reserve the input size to make sure we have enough buffer and don't have
217 // to allocate in the loop below. 212 // to allocate in the loop below.
218 STR result; 213 STR result;
219 result.reserve(escaped_text.length()); 214 result.reserve(escaped_text.length());
220 215
221 // Locations of adjusted text. 216 // Locations of adjusted text.
222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 217 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { 218 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
258 // U+1F513 OPEN LOCK (%F0%9F%94%93) 253 // U+1F513 OPEN LOCK (%F0%9F%94%93)
259 // 254 //
260 // However, some schemes such as data: and file: need to parse the exact 255 // However, some schemes such as data: and file: need to parse the exact
261 // binary data when loading the URL. For that reason, 256 // binary data when loading the URL. For that reason,
262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. 257 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.
263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be 258 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be
264 // displayed in the UI. 259 // displayed in the UI.
265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { 260 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {
266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { 261 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
267 // Keep Arabic Language Mark escaped. 262 // Keep Arabic Language Mark escaped.
268 result.append(escaped_text, i, 6); 263 escaped_text.substr(i, 6).AppendToString(&result);
269 i += 5; 264 i += 5;
270 continue; 265 continue;
271 } 266 }
272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { 267 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
273 // Keep BiDi control char escaped. 268 // Keep BiDi control char escaped.
274 result.append(escaped_text, i, 9); 269 escaped_text.substr(i, 9).AppendToString(&result);
275 i += 8; 270 i += 8;
276 continue; 271 continue;
277 } 272 }
278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { 273 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
279 // Keep banned char escaped. 274 // Keep banned char escaped.
280 result.append(escaped_text, i, 12); 275 escaped_text.substr(i, 12).AppendToString(&result);
281 i += 11; 276 i += 11;
282 continue; 277 continue;
283 } 278 }
284 } 279 }
285 280
286 if (first_byte >= 0x80 || // Unescape all high-bit characters. 281 if (first_byte >= 0x80 || // Unescape all high-bit characters.
287 // For 7-bit characters, the lookup table tells us all valid chars. 282 // For 7-bit characters, the lookup table tells us all valid chars.
288 (kUrlUnescape[first_byte] || 283 (kUrlUnescape[first_byte] ||
289 // ...and we allow some additional unescaping when flags are set. 284 // ...and we allow some additional unescaping when flags are set.
290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 285 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
338 while (*p) 333 while (*p)
339 output->push_back(*p++); 334 output->push_back(*p++);
340 break; 335 break;
341 } 336 }
342 } 337 }
343 if (k == arraysize(kCharsToEscape)) 338 if (k == arraysize(kCharsToEscape))
344 output->push_back(c); 339 output->push_back(c);
345 } 340 }
346 341
347 template <class str> 342 template <class str>
348 str EscapeForHTMLImpl(const str& input) { 343 str EscapeForHTMLImpl(base::BasicStringPiece<str> input) {
349 str result; 344 str result;
350 result.reserve(input.size()); // Optimize for no escaping. 345 result.reserve(input.size()); // Optimize for no escaping.
351 346
352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i) 347 for (auto c : input) {
353 AppendEscapedCharForHTMLImpl(*i, &result); 348 AppendEscapedCharForHTMLImpl(c, &result);
349 }
354 350
355 return result; 351 return result;
356 } 352 }
357 353
358 // Everything except alphanumerics and !'()*-._~ 354 // Everything except alphanumerics and !'()*-._~
359 // See RFC 2396 for the list of reserved characters. 355 // See RFC 2396 for the list of reserved characters.
360 static const Charmap kQueryCharmap = {{ 356 static const Charmap kQueryCharmap = {{
361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, 357 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL 358 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
363 }}; 359 }};
(...skipping 26 matching lines...) Expand all
390 386
391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and 387 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and
392 // !'()*-._~#[] 388 // !'()*-._~#[]
393 static const Charmap kExternalHandlerCharmap = {{ 389 static const Charmap kExternalHandlerCharmap = {{
394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, 390 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,
395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL 391 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
396 }}; 392 }};
397 393
398 } // namespace 394 } // namespace
399 395
400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { 396 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) {
401 return Escape(text, kQueryCharmap, use_plus); 397 return Escape(text, kQueryCharmap, use_plus);
402 } 398 }
403 399
404 std::string EscapePath(const std::string& path) { 400 std::string EscapePath(base::StringPiece path) {
405 return Escape(path, kPathCharmap, false); 401 return Escape(path, kPathCharmap, false);
406 } 402 }
407 403
408 #if defined(OS_MACOSX) 404 #if defined(OS_MACOSX)
409 std::string EscapeNSURLPrecursor(const std::string& precursor) { 405 std::string EscapeNSURLPrecursor(base::StringPiece precursor) {
410 return Escape(precursor, kNSURLCharmap, false, true); 406 return Escape(precursor, kNSURLCharmap, false, true);
411 } 407 }
412 #endif // defined(OS_MACOSX) 408 #endif // defined(OS_MACOSX)
413 409
414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) { 410 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) {
415 return Escape(path, kUrlEscape, use_plus); 411 return Escape(path, kUrlEscape, use_plus);
416 } 412 }
417 413
418 std::string EscapeNonASCII(const std::string& input) { 414 std::string EscapeNonASCII(base::StringPiece input) {
419 return Escape(input, kNonASCIICharmap, false); 415 return Escape(input, kNonASCIICharmap, false);
420 } 416 }
421 417
422 std::string EscapeExternalHandlerValue(const std::string& text) { 418 std::string EscapeExternalHandlerValue(base::StringPiece text) {
423 return Escape(text, kExternalHandlerCharmap, false, true); 419 return Escape(text, kExternalHandlerCharmap, false, true);
424 } 420 }
425 421
426 void AppendEscapedCharForHTML(char c, std::string* output) { 422 void AppendEscapedCharForHTML(char c, std::string* output) {
427 AppendEscapedCharForHTMLImpl(c, output); 423 AppendEscapedCharForHTMLImpl(c, output);
428 } 424 }
429 425
430 std::string EscapeForHTML(const std::string& input) { 426 std::string EscapeForHTML(base::StringPiece input) {
431 return EscapeForHTMLImpl(input); 427 return EscapeForHTMLImpl(input);
432 } 428 }
433 429
434 base::string16 EscapeForHTML(const base::string16& input) { 430 base::string16 EscapeForHTML(base::StringPiece16 input) {
435 return EscapeForHTMLImpl(input); 431 return EscapeForHTMLImpl(input);
436 } 432 }
437 433
438 std::string UnescapeURLComponent(const std::string& escaped_text, 434 std::string UnescapeURLComponent(base::StringPiece escaped_text,
439 UnescapeRule::Type rules) { 435 UnescapeRule::Type rules) {
440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); 436 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
441 } 437 }
442 438
443 base::string16 UnescapeURLComponent(const base::string16& escaped_text, 439 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,
444 UnescapeRule::Type rules) { 440 UnescapeRule::Type rules) {
445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); 441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
446 } 442 }
447 443
448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, 444 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text,
449 UnescapeRule::Type rules) { 445 UnescapeRule::Type rules) {
450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); 446 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);
451 } 447 }
452 448
453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( 449 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
454 const std::string& text, 450 base::StringPiece text,
455 UnescapeRule::Type rules, 451 UnescapeRule::Type rules,
456 base::OffsetAdjuster::Adjustments* adjustments) { 452 base::OffsetAdjuster::Adjustments* adjustments) {
457 base::string16 result; 453 base::string16 result;
458 base::OffsetAdjuster::Adjustments unescape_adjustments; 454 base::OffsetAdjuster::Adjustments unescape_adjustments;
459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( 455 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(
460 text, rules, &unescape_adjustments)); 456 text, rules, &unescape_adjustments));
461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), 457 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),
462 unescaped_url.length(), 458 unescaped_url.length(),
463 &result, adjustments)) { 459 &result, adjustments)) {
464 // Character set looks like it's valid. 460 // Character set looks like it's valid.
465 if (adjustments) { 461 if (adjustments) {
466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, 462 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,
467 adjustments); 463 adjustments);
468 } 464 }
469 return result; 465 return result;
470 } 466 }
471 // Character set is not valid. Return the escaped version. 467 // Character set is not valid. Return the escaped version.
472 return base::UTF8ToUTF16WithAdjustments(text, adjustments); 468 return base::UTF8ToUTF16WithAdjustments(text, adjustments);
473 } 469 }
474 470
475 base::string16 UnescapeForHTML(const base::string16& input) { 471 base::string16 UnescapeForHTML(base::StringPiece16 input) {
476 static const struct { 472 static const struct {
477 const char* ampersand_code; 473 const char* ampersand_code;
478 const char replacement; 474 const char replacement;
479 } kEscapeToChars[] = { 475 } kEscapeToChars[] = {
480 { "&lt;", '<' }, 476 { "&lt;", '<' },
481 { "&gt;", '>' }, 477 { "&gt;", '>' },
482 { "&amp;", '&' }, 478 { "&amp;", '&' },
483 { "&quot;", '"' }, 479 { "&quot;", '"' },
484 { "&#39;", '\''}, 480 { "&#39;", '\''},
485 }; 481 };
486 482
487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) 483 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)
488 return input; 484 return input.as_string();
489 485
490 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; 486 base::string16 ampersand_chars[arraysize(kEscapeToChars)];
491 base::string16 text(input); 487 base::string16 text = input.as_string();
492 for (base::string16::iterator iter = text.begin(); 488 for (base::string16::iterator iter = text.begin();
493 iter != text.end(); ++iter) { 489 iter != text.end(); ++iter) {
494 if (*iter == '&') { 490 if (*iter == '&') {
495 // Potential ampersand encode char. 491 // Potential ampersand encode char.
496 size_t index = iter - text.begin(); 492 size_t index = iter - text.begin();
497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { 493 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {
498 if (ampersand_chars[i].empty()) { 494 if (ampersand_chars[i].empty()) {
499 ampersand_chars[i] = 495 ampersand_chars[i] =
500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); 496 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);
501 } 497 }
502 if (text.find(ampersand_chars[i], index) == index) { 498 if (text.find(ampersand_chars[i], index) == index) {
503 text.replace(iter, iter + ampersand_chars[i].length(), 499 text.replace(iter, iter + ampersand_chars[i].length(),
504 1, kEscapeToChars[i].replacement); 500 1, kEscapeToChars[i].replacement);
505 break; 501 break;
506 } 502 }
507 } 503 }
508 } 504 }
509 } 505 }
510 return text; 506 return text;
511 } 507 }
512 508
513 } // namespace net 509 } // namespace net
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698