Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(530)

Side by Side Diff: net/base/escape.cc

Issue 2615633007: Change net/base/escape.h to use StringPiece. (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <utility>
8 #include <memory>
9 8
10 #include "base/logging.h" 9 #include "base/logging.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h" 10 #include "base/strings/string_util.h"
13 #include "base/strings/utf_offset_string_conversions.h"
mmenke 2017/01/05 16:19:54 This is needed for UTF8ToUTF16WithAdjustments, no?
mmenke 2017/01/05 16:21:05 Oops, ignore this. I just assumed this was a meth
14 #include "base/strings/utf_string_conversions.h" 11 #include "base/strings/utf_string_conversions.h"
15 12
16 namespace net { 13 namespace net {
17 14
18 namespace { 15 namespace {
19 16
17 template <typename STR>
18 using StringTypeForStringPiece = decltype(std::declval<STR>().as_string());
mmenke 2017/01/05 16:19:54 I think this is sufficiently obscure that it needs
Sam McNally 2017/01/06 00:20:51 I found an alternative that avoids this.
19
20 const char kHexString[] = "0123456789ABCDEF"; 20 const char kHexString[] = "0123456789ABCDEF";
21 inline char IntToHex(int i) { 21 inline char IntToHex(int i) {
22 DCHECK_GE(i, 0) << i << " not a hex value"; 22 DCHECK_GE(i, 0) << i << " not a hex value";
23 DCHECK_LE(i, 15) << i << " not a hex value"; 23 DCHECK_LE(i, 15) << i << " not a hex value";
24 return kHexString[i]; 24 return kHexString[i];
25 } 25 }
26 26
27 // A fast bit-vector map for ascii characters. 27 // A fast bit-vector map for ascii characters.
28 // 28 //
29 // Internally stores 256 bits in an array of 8 ints. 29 // Internally stores 256 bits in an array of 8 ints.
30 // Does quick bit-flicking to lookup needed characters. 30 // Does quick bit-flicking to lookup needed characters.
31 struct Charmap { 31 struct Charmap {
32 bool Contains(unsigned char c) const { 32 bool Contains(unsigned char c) const {
33 return ((map[c >> 5] & (1 << (c & 31))) != 0); 33 return ((map[c >> 5] & (1 << (c & 31))) != 0);
34 } 34 }
35 35
36 uint32_t map[8]; 36 uint32_t map[8];
37 }; 37 };
38 38
39 // Given text to escape and a Charmap defining which values to escape, 39 // Given text to escape and a Charmap defining which values to escape,
40 // return an escaped string. If use_plus is true, spaces are converted 40 // return an escaped string. If use_plus is true, spaces are converted
41 // to +, otherwise, if spaces are in the charmap, they are converted to 41 // to +, otherwise, if spaces are in the charmap, they are converted to
42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if 42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if
43 // '%' is in the charmap, it is converted to %25. 43 // '%' is in the charmap, it is converted to %25.
44 std::string Escape(const std::string& text, 44 std::string Escape(base::StringPiece text,
45 const Charmap& charmap, 45 const Charmap& charmap,
46 bool use_plus, 46 bool use_plus,
47 bool keep_escaped = false) { 47 bool keep_escaped = false) {
48 std::string escaped; 48 std::string escaped;
49 escaped.reserve(text.length() * 3); 49 escaped.reserve(text.length() * 3);
50 for (unsigned int i = 0; i < text.length(); ++i) { 50 for (unsigned int i = 0; i < text.length(); ++i) {
51 unsigned char c = static_cast<unsigned char>(text[i]); 51 unsigned char c = static_cast<unsigned char>(text[i]);
52 if (use_plus && ' ' == c) { 52 if (use_plus && ' ' == c) {
53 escaped.push_back('+'); 53 escaped.push_back('+');
54 } else if (keep_escaped && '%' == c && i + 2 < text.length() && 54 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
100 // ` a b c d e f g h i j k l m n o 100 // ` a b c d e f g h i j k l m n o
101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 // p q r s t u v w x y z { | } ~ <NBSP> 102 // p q r s t u v w x y z { | } ~ <NBSP>
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
104 }; 104 };
105 105
106 // Attempts to unescape the sequence at |index| within |escaped_text|. If 106 // Attempts to unescape the sequence at |index| within |escaped_text|. If
107 // successful, sets |value| to the unescaped value. Returns whether 107 // successful, sets |value| to the unescaped value. Returns whether
108 // unescaping succeeded. 108 // unescaping succeeded.
109 template<typename STR> 109 template <typename STR>
110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, 110 bool UnescapeUnsignedCharAtIndex(STR escaped_text,
111 size_t index, 111 size_t index,
112 unsigned char* value) { 112 unsigned char* value) {
113 if ((index + 2) >= escaped_text.size()) 113 if ((index + 2) >= escaped_text.size())
114 return false; 114 return false;
115 if (escaped_text[index] != '%') 115 if (escaped_text[index] != '%')
116 return false; 116 return false;
117 const typename STR::value_type most_sig_digit( 117 const typename STR::value_type most_sig_digit(
118 static_cast<typename STR::value_type>(escaped_text[index + 1])); 118 static_cast<typename STR::value_type>(escaped_text[index + 1]));
119 const typename STR::value_type least_sig_digit( 119 const typename STR::value_type least_sig_digit(
120 static_cast<typename STR::value_type>(escaped_text[index + 2])); 120 static_cast<typename STR::value_type>(escaped_text[index + 2]));
121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { 121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {
122 *value = base::HexDigitToInt(most_sig_digit) * 16 + 122 *value = base::HexDigitToInt(most_sig_digit) * 16 +
123 base::HexDigitToInt(least_sig_digit); 123 base::HexDigitToInt(least_sig_digit);
124 return true; 124 return true;
125 } 125 }
126 return false; 126 return false;
127 } 127 }
128 128
129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| 129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte|
130 // is the byte at |index|. 130 // is the byte at |index|.
131 template<typename STR> 131 template <typename STR>
132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, 132 bool HasArabicLanguageMarkAtIndex(STR escaped_text,
133 unsigned char first_byte, 133 unsigned char first_byte,
134 size_t index) { 134 size_t index) {
135 if (first_byte != 0xD8) 135 if (first_byte != 0xD8)
136 return false; 136 return false;
137 unsigned char second_byte; 137 unsigned char second_byte;
138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) 138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
139 return false; 139 return false;
140 return second_byte == 0x9c; 140 return second_byte == 0x9c;
141 } 141 }
142 142
143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the 143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the
144 // byte at |index|. 144 // byte at |index|.
145 template<typename STR> 145 template <typename STR>
146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, 146 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,
147 unsigned char first_byte, 147 unsigned char first_byte,
148 size_t index) { 148 size_t index) {
149 if (first_byte != 0xE2) 149 if (first_byte != 0xE2)
150 return false; 150 return false;
151 unsigned char second_byte; 151 unsigned char second_byte;
152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) 152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
153 return false; 153 return false;
154 if (second_byte != 0x80 && second_byte != 0x81) 154 if (second_byte != 0x80 && second_byte != 0x81)
155 return false; 155 return false;
156 unsigned char third_byte; 156 unsigned char third_byte;
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
158 return false; 158 return false;
159 if (second_byte == 0x80) { 159 if (second_byte == 0x80) {
160 return third_byte == 0x8E || 160 return third_byte == 0x8E ||
161 third_byte == 0x8F || 161 third_byte == 0x8F ||
162 (third_byte >= 0xAA && third_byte <= 0xAE); 162 (third_byte >= 0xAA && third_byte <= 0xAE);
163 } 163 }
164 return third_byte >= 0xA6 && third_byte <= 0xA9; 164 return third_byte >= 0xA6 && third_byte <= 0xA9;
165 } 165 }
166 166
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is 167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is
168 // the byte at |index|. 168 // the byte at |index|.
169 template <typename STR> 169 template <typename STR>
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text, 170 bool HasFourByteBannedCharAtIndex(STR escaped_text,
171 unsigned char first_byte, 171 unsigned char first_byte,
172 size_t index) { 172 size_t index) {
173 // The following characters are blacklisted for spoofability concerns. 173 // The following characters are blacklisted for spoofability concerns.
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) 174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) 175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
176 // U+1F512 LOCK (%F0%9F%94%92) 176 // U+1F512 LOCK (%F0%9F%94%92)
177 // U+1F513 OPEN LOCK (%F0%9F%94%93) 177 // U+1F513 OPEN LOCK (%F0%9F%94%93)
178 if (first_byte != 0xF0) 178 if (first_byte != 0xF0)
179 return false; 179 return false;
180 180
(...skipping 13 matching lines...) Expand all
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && 194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&
195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || 195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
196 fourth_byte == 0x93); 196 fourth_byte == 0x93);
197 } 197 }
198 198
199 // Unescapes |escaped_text| according to |rules|, returning the resulting 199 // Unescapes |escaped_text| according to |rules|, returning the resulting
200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
201 // the alterations done to the string that are not one-character-to-one- 201 // the alterations done to the string that are not one-character-to-one-
202 // character. The resulting |adjustments| will always be sorted by increasing 202 // character. The resulting |adjustments| will always be sorted by increasing
203 // offset. 203 // offset.
204 template<typename STR> 204 template <typename STR>
205 STR UnescapeURLWithAdjustmentsImpl( 205 StringTypeForStringPiece<STR> UnescapeURLWithAdjustmentsImpl(
206 const STR& escaped_text, 206 STR escaped_text,
207 UnescapeRule::Type rules, 207 UnescapeRule::Type rules,
208 base::OffsetAdjuster::Adjustments* adjustments) { 208 base::OffsetAdjuster::Adjustments* adjustments) {
209 if (adjustments) 209 if (adjustments)
210 adjustments->clear(); 210 adjustments->clear();
211 // Do not unescape anything, return the |escaped_text| text. 211 // Do not unescape anything, return the |escaped_text| text.
212 if (rules == UnescapeRule::NONE) 212 if (rules == UnescapeRule::NONE)
213 return escaped_text; 213 return escaped_text.as_string();
214 214
215 // The output of the unescaping is always smaller than the input, so we can 215 // The output of the unescaping is always smaller than the input, so we can
216 // reserve the input size to make sure we have enough buffer and don't have 216 // reserve the input size to make sure we have enough buffer and don't have
217 // to allocate in the loop below. 217 // to allocate in the loop below.
218 STR result; 218 StringTypeForStringPiece<STR> result;
219 result.reserve(escaped_text.length()); 219 result.reserve(escaped_text.length());
220 220
221 // Locations of adjusted text. 221 // Locations of adjusted text.
222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { 223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
224 // Non ASCII character, append as is. 224 // Non ASCII character, append as is.
225 result.push_back(escaped_text[i]); 225 result.push_back(escaped_text[i]);
226 continue; 226 continue;
227 } 227 }
228 228
(...skipping 29 matching lines...) Expand all
258 // U+1F513 OPEN LOCK (%F0%9F%94%93) 258 // U+1F513 OPEN LOCK (%F0%9F%94%93)
259 // 259 //
260 // However, some schemes such as data: and file: need to parse the exact 260 // However, some schemes such as data: and file: need to parse the exact
261 // binary data when loading the URL. For that reason, 261 // binary data when loading the URL. For that reason,
262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. 262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.
263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be 263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be
264 // displayed in the UI. 264 // displayed in the UI.
265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { 265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {
266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { 266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
267 // Keep Arabic Language Mark escaped. 267 // Keep Arabic Language Mark escaped.
268 result.append(escaped_text, i, 6); 268 escaped_text.substr(i, 6).AppendToString(&result);
269 i += 5; 269 i += 5;
270 continue; 270 continue;
271 } 271 }
272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { 272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
273 // Keep BiDi control char escaped. 273 // Keep BiDi control char escaped.
274 result.append(escaped_text, i, 9); 274 escaped_text.substr(i, 9).AppendToString(&result);
275 i += 8; 275 i += 8;
276 continue; 276 continue;
277 } 277 }
278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { 278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
279 // Keep banned char escaped. 279 // Keep banned char escaped.
280 result.append(escaped_text, i, 12); 280 escaped_text.substr(i, 12).AppendToString(&result);
281 i += 11; 281 i += 11;
282 continue; 282 continue;
283 } 283 }
284 } 284 }
285 285
286 if (first_byte >= 0x80 || // Unescape all high-bit characters. 286 if (first_byte >= 0x80 || // Unescape all high-bit characters.
287 // For 7-bit characters, the lookup table tells us all valid chars. 287 // For 7-bit characters, the lookup table tells us all valid chars.
288 (kUrlUnescape[first_byte] || 288 (kUrlUnescape[first_byte] ||
289 // ...and we allow some additional unescaping when flags are set. 289 // ...and we allow some additional unescaping when flags are set.
290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
338 while (*p) 338 while (*p)
339 output->push_back(*p++); 339 output->push_back(*p++);
340 break; 340 break;
341 } 341 }
342 } 342 }
343 if (k == arraysize(kCharsToEscape)) 343 if (k == arraysize(kCharsToEscape))
344 output->push_back(c); 344 output->push_back(c);
345 } 345 }
346 346
347 template <class str> 347 template <class str>
348 str EscapeForHTMLImpl(const str& input) { 348 StringTypeForStringPiece<str> EscapeForHTMLImpl(str input) {
349 str result; 349 StringTypeForStringPiece<str> result;
350 result.reserve(input.size()); // Optimize for no escaping. 350 result.reserve(input.size()); // Optimize for no escaping.
351 351
352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i) 352 for (auto c : input) {
353 AppendEscapedCharForHTMLImpl(*i, &result); 353 AppendEscapedCharForHTMLImpl(c, &result);
354 }
354 355
355 return result; 356 return result;
356 } 357 }
357 358
358 // Everything except alphanumerics and !'()*-._~ 359 // Everything except alphanumerics and !'()*-._~
359 // See RFC 2396 for the list of reserved characters. 360 // See RFC 2396 for the list of reserved characters.
360 static const Charmap kQueryCharmap = {{ 361 static const Charmap kQueryCharmap = {{
361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, 362 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL 363 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
363 }}; 364 }};
(...skipping 26 matching lines...) Expand all
390 391
391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and 392 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and
392 // !'()*-._~#[] 393 // !'()*-._~#[]
393 static const Charmap kExternalHandlerCharmap = {{ 394 static const Charmap kExternalHandlerCharmap = {{
394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, 395 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,
395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL 396 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
396 }}; 397 }};
397 398
398 } // namespace 399 } // namespace
399 400
400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { 401 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) {
401 return Escape(text, kQueryCharmap, use_plus); 402 return Escape(text, kQueryCharmap, use_plus);
402 } 403 }
403 404
404 std::string EscapePath(const std::string& path) { 405 std::string EscapePath(base::StringPiece path) {
405 return Escape(path, kPathCharmap, false); 406 return Escape(path, kPathCharmap, false);
406 } 407 }
407 408
408 #if defined(OS_MACOSX) 409 #if defined(OS_MACOSX)
409 std::string EscapeNSURLPrecursor(const std::string& precursor) { 410 std::string EscapeNSURLPrecursor(base::StringPiece precursor) {
410 return Escape(precursor, kNSURLCharmap, false, true); 411 return Escape(precursor, kNSURLCharmap, false, true);
411 } 412 }
412 #endif // defined(OS_MACOSX) 413 #endif // defined(OS_MACOSX)
413 414
414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) { 415 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) {
415 return Escape(path, kUrlEscape, use_plus); 416 return Escape(path, kUrlEscape, use_plus);
416 } 417 }
417 418
418 std::string EscapeNonASCII(const std::string& input) { 419 std::string EscapeNonASCII(base::StringPiece input) {
419 return Escape(input, kNonASCIICharmap, false); 420 return Escape(input, kNonASCIICharmap, false);
420 } 421 }
421 422
422 std::string EscapeExternalHandlerValue(const std::string& text) { 423 std::string EscapeExternalHandlerValue(base::StringPiece text) {
423 return Escape(text, kExternalHandlerCharmap, false, true); 424 return Escape(text, kExternalHandlerCharmap, false, true);
424 } 425 }
425 426
426 void AppendEscapedCharForHTML(char c, std::string* output) { 427 void AppendEscapedCharForHTML(char c, std::string* output) {
427 AppendEscapedCharForHTMLImpl(c, output); 428 AppendEscapedCharForHTMLImpl(c, output);
428 } 429 }
429 430
430 std::string EscapeForHTML(const std::string& input) { 431 std::string EscapeForHTML(base::StringPiece input) {
431 return EscapeForHTMLImpl(input); 432 return EscapeForHTMLImpl(input);
432 } 433 }
433 434
434 base::string16 EscapeForHTML(const base::string16& input) { 435 base::string16 EscapeForHTML(base::StringPiece16 input) {
435 return EscapeForHTMLImpl(input); 436 return EscapeForHTMLImpl(input);
436 } 437 }
437 438
438 std::string UnescapeURLComponent(const std::string& escaped_text, 439 std::string UnescapeURLComponent(base::StringPiece escaped_text,
439 UnescapeRule::Type rules) { 440 UnescapeRule::Type rules) {
440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); 441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
441 } 442 }
442 443
443 base::string16 UnescapeURLComponent(const base::string16& escaped_text, 444 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,
444 UnescapeRule::Type rules) { 445 UnescapeRule::Type rules) {
445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); 446 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);
446 } 447 }
447 448
448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, 449 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text,
449 UnescapeRule::Type rules) { 450 UnescapeRule::Type rules) {
450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); 451 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);
451 } 452 }
452 453
453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( 454 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
454 const std::string& text, 455 base::StringPiece text,
455 UnescapeRule::Type rules, 456 UnescapeRule::Type rules,
456 base::OffsetAdjuster::Adjustments* adjustments) { 457 base::OffsetAdjuster::Adjustments* adjustments) {
457 base::string16 result; 458 base::string16 result;
458 base::OffsetAdjuster::Adjustments unescape_adjustments; 459 base::OffsetAdjuster::Adjustments unescape_adjustments;
459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( 460 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(
460 text, rules, &unescape_adjustments)); 461 text, rules, &unescape_adjustments));
461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), 462 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),
462 unescaped_url.length(), 463 unescaped_url.length(),
463 &result, adjustments)) { 464 &result, adjustments)) {
464 // Character set looks like it's valid. 465 // Character set looks like it's valid.
465 if (adjustments) { 466 if (adjustments) {
466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, 467 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,
467 adjustments); 468 adjustments);
468 } 469 }
469 return result; 470 return result;
470 } 471 }
471 // Character set is not valid. Return the escaped version. 472 // Character set is not valid. Return the escaped version.
472 return base::UTF8ToUTF16WithAdjustments(text, adjustments); 473 return base::UTF8ToUTF16WithAdjustments(text, adjustments);
473 } 474 }
474 475
475 base::string16 UnescapeForHTML(const base::string16& input) { 476 base::string16 UnescapeForHTML(base::StringPiece16 input) {
476 static const struct { 477 static const struct {
477 const char* ampersand_code; 478 const char* ampersand_code;
478 const char replacement; 479 const char replacement;
479 } kEscapeToChars[] = { 480 } kEscapeToChars[] = {
480 { "&lt;", '<' }, 481 { "&lt;", '<' },
481 { "&gt;", '>' }, 482 { "&gt;", '>' },
482 { "&amp;", '&' }, 483 { "&amp;", '&' },
483 { "&quot;", '"' }, 484 { "&quot;", '"' },
484 { "&#39;", '\''}, 485 { "&#39;", '\''},
485 }; 486 };
486 487
487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) 488 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)
488 return input; 489 return input.as_string();
489 490
490 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; 491 base::string16 ampersand_chars[arraysize(kEscapeToChars)];
491 base::string16 text(input); 492 base::string16 text = input.as_string();
492 for (base::string16::iterator iter = text.begin(); 493 for (base::string16::iterator iter = text.begin();
493 iter != text.end(); ++iter) { 494 iter != text.end(); ++iter) {
494 if (*iter == '&') { 495 if (*iter == '&') {
495 // Potential ampersand encode char. 496 // Potential ampersand encode char.
496 size_t index = iter - text.begin(); 497 size_t index = iter - text.begin();
497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { 498 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {
498 if (ampersand_chars[i].empty()) { 499 if (ampersand_chars[i].empty()) {
499 ampersand_chars[i] = 500 ampersand_chars[i] =
500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); 501 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);
501 } 502 }
502 if (text.find(ampersand_chars[i], index) == index) { 503 if (text.find(ampersand_chars[i], index) == index) {
503 text.replace(iter, iter + ampersand_chars[i].length(), 504 text.replace(iter, iter + ampersand_chars[i].length(),
504 1, kEscapeToChars[i].replacement); 505 1, kEscapeToChars[i].replacement);
505 break; 506 break;
506 } 507 }
507 } 508 }
508 } 509 }
509 } 510 }
510 return text; 511 return text;
511 } 512 }
512 513
513 } // namespace net 514 } // namespace net
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698