net/base/escape.cc - Issue 2615633007: Change net/base/escape.h to use StringPiece.

Side by Side Diff: net/base/escape.cc

Issue 2615633007: Change net/base/escape.h to use StringPiece. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/escape.h"	5 #include "net/base/escape.h"

6	6

7 #include <algorithm>

8 #include <memory>

9

10 #include "base/logging.h"	7 #include "base/logging.h"

11 #include "base/strings/string_piece.h"

12 #include "base/strings/string_util.h"	8 #include "base/strings/string_util.h"

13 #include "base/strings/utf_offset_string_conversions.h"

14 #include "base/strings/utf_string_conversions.h"	9 #include "base/strings/utf_string_conversions.h"

15	10

16 namespace net {	11 namespace net {

17	12

18 namespace {	13 namespace {

19	14

20 const char kHexString[] = "0123456789ABCDEF";	15 const char kHexString[] = "0123456789ABCDEF";

21 inline char IntToHex(int i) {	16 inline char IntToHex(int i) {

22 DCHECK_GE(i, 0) << i << " not a hex value";	17 DCHECK_GE(i, 0) << i << " not a hex value";

23 DCHECK_LE(i, 15) << i << " not a hex value";	18 DCHECK_LE(i, 15) << i << " not a hex value";

(...skipping 10 matching lines...) Expand all Loading...
34 }	29 }

35	30

36 uint32_t map[8];	31 uint32_t map[8];

37 };	32 };

38	33

39 // Given text to escape and a Charmap defining which values to escape,	34 // Given text to escape and a Charmap defining which values to escape,

40 // return an escaped string. If use_plus is true, spaces are converted	35 // return an escaped string. If use_plus is true, spaces are converted

41 // to +, otherwise, if spaces are in the charmap, they are converted to	36 // to +, otherwise, if spaces are in the charmap, they are converted to

42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if	37 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if

43 // '%' is in the charmap, it is converted to %25.	38 // '%' is in the charmap, it is converted to %25.

44 std::string Escape(const std::string& text,	39 std::string Escape(base::StringPiece text,

45 const Charmap& charmap,	40 const Charmap& charmap,

46 bool use_plus,	41 bool use_plus,

47 bool keep_escaped = false) {	42 bool keep_escaped = false) {

48 std::string escaped;	43 std::string escaped;

49 escaped.reserve(text.length() * 3);	44 escaped.reserve(text.length() * 3);

50 for (unsigned int i = 0; i < text.length(); ++i) {	45 for (unsigned int i = 0; i < text.length(); ++i) {

51 unsigned char c = static_cast<unsigned char>(text[i]);	46 unsigned char c = static_cast<unsigned char>(text[i]);

52 if (use_plus && ' ' == c) {	47 if (use_plus && ' ' == c) {

53 escaped.push_back('+');	48 escaped.push_back('+');

54 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&	49 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,	94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,

100 // ` a b c d e f g h i j k l m n o	95 // ` a b c d e f g h i j k l m n o

101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	96 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

102 // p q r s t u v w x y z { \| } ~ <NBSP>	97 // p q r s t u v w x y z { \| } ~ <NBSP>

103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0	98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0

104 };	99 };

105	100

106 // Attempts to unescape the sequence at \|index\| within \|escaped_text\|. If	101 // Attempts to unescape the sequence at \|index\| within \|escaped_text\|. If

107 // successful, sets \|value\| to the unescaped value. Returns whether	102 // successful, sets \|value\| to the unescaped value. Returns whether

108 // unescaping succeeded.	103 // unescaping succeeded.

109 template<typename STR>	104 template <typename STR>

110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,	105 bool UnescapeUnsignedCharAtIndex(STR escaped_text,

111 size_t index,	106 size_t index,

112 unsigned char* value) {	107 unsigned char* value) {

113 if ((index + 2) >= escaped_text.size())	108 if ((index + 2) >= escaped_text.size())

114 return false;	109 return false;

115 if (escaped_text[index] != '%')	110 if (escaped_text[index] != '%')

116 return false;	111 return false;

117 const typename STR::value_type most_sig_digit(	112 const typename STR::value_type most_sig_digit(

118 static_cast<typename STR::value_type>(escaped_text[index + 1]));	113 static_cast<typename STR::value_type>(escaped_text[index + 1]));

119 const typename STR::value_type least_sig_digit(	114 const typename STR::value_type least_sig_digit(

120 static_cast<typename STR::value_type>(escaped_text[index + 2]));	115 static_cast<typename STR::value_type>(escaped_text[index + 2]));

121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {	116 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {

122 value = base::HexDigitToInt(most_sig_digit) 16 +	117 value = base::HexDigitToInt(most_sig_digit) 16 +

123 base::HexDigitToInt(least_sig_digit);	118 base::HexDigitToInt(least_sig_digit);

124 return true;	119 return true;

125 }	120 }

126 return false;	121 return false;

127 }	122 }

128	123

129 // Returns true if there is an Arabic Language Mark at \|index\|. \|first_byte\|	124 // Returns true if there is an Arabic Language Mark at \|index\|. \|first_byte\|

130 // is the byte at \|index\|.	125 // is the byte at \|index\|.

131 template<typename STR>	126 template <typename STR>

132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,	127 bool HasArabicLanguageMarkAtIndex(STR escaped_text,

133 unsigned char first_byte,	128 unsigned char first_byte,

134 size_t index) {	129 size_t index) {

135 if (first_byte != 0xD8)	130 if (first_byte != 0xD8)

136 return false;	131 return false;

137 unsigned char second_byte;	132 unsigned char second_byte;

138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))	133 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

139 return false;	134 return false;

140 return second_byte == 0x9c;	135 return second_byte == 0x9c;

141 }	136 }

142	137

143 // Returns true if there is a BiDi control char at \|index\|. \|first_byte\| is the	138 // Returns true if there is a BiDi control char at \|index\|. \|first_byte\| is the

144 // byte at \|index\|.	139 // byte at \|index\|.

145 template<typename STR>	140 template <typename STR>

146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,	141 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,

147 unsigned char first_byte,	142 unsigned char first_byte,

148 size_t index) {	143 size_t index) {

149 if (first_byte != 0xE2)	144 if (first_byte != 0xE2)

150 return false;	145 return false;

151 unsigned char second_byte;	146 unsigned char second_byte;

152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))	147 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

153 return false;	148 return false;

154 if (second_byte != 0x80 && second_byte != 0x81)	149 if (second_byte != 0x80 && second_byte != 0x81)

155 return false;	150 return false;

156 unsigned char third_byte;	151 unsigned char third_byte;

157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))	152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))

158 return false;	153 return false;

159 if (second_byte == 0x80) {	154 if (second_byte == 0x80) {

160 return third_byte == 0x8E \|\|	155 return third_byte == 0x8E \|\|

161 third_byte == 0x8F \|\|	156 third_byte == 0x8F \|\|

162 (third_byte >= 0xAA && third_byte <= 0xAE);	157 (third_byte >= 0xAA && third_byte <= 0xAE);

163 }	158 }

164 return third_byte >= 0xA6 && third_byte <= 0xA9;	159 return third_byte >= 0xA6 && third_byte <= 0xA9;

165 }	160 }

166	161

167 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is	162 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is

168 // the byte at \|index\|.	163 // the byte at \|index\|.

169 template <typename STR>	164 template <typename STR>

170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,	165 bool HasFourByteBannedCharAtIndex(STR escaped_text,

171 unsigned char first_byte,	166 unsigned char first_byte,

172 size_t index) {	167 size_t index) {

173 // The following characters are blacklisted for spoofability concerns.	168 // The following characters are blacklisted for spoofability concerns.

174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)	169 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)	170 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

176 // U+1F512 LOCK (%F0%9F%94%92)	171 // U+1F512 LOCK (%F0%9F%94%92)

177 // U+1F513 OPEN LOCK (%F0%9F%94%93)	172 // U+1F513 OPEN LOCK (%F0%9F%94%93)

178 if (first_byte != 0xF0)	173 if (first_byte != 0xF0)

179 return false;	174 return false;

180	175

(...skipping 13 matching lines...) Expand all Loading...
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&	189 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&

195 (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\|	190 (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\|

196 fourth_byte == 0x93);	191 fourth_byte == 0x93);

197 }	192 }

198	193

199 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting	194 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting

200 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects	195 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects

201 // the alterations done to the string that are not one-character-to-one-	196 // the alterations done to the string that are not one-character-to-one-

202 // character. The resulting \|adjustments\| will always be sorted by increasing	197 // character. The resulting \|adjustments\| will always be sorted by increasing

203 // offset.	198 // offset.

204 template<typename STR>	199 template <typename STR>

205 STR UnescapeURLWithAdjustmentsImpl(	200 STR UnescapeURLWithAdjustmentsImpl(

206 const STR& escaped_text,	201 base::BasicStringPiece<STR> escaped_text,

207 UnescapeRule::Type rules,	202 UnescapeRule::Type rules,

208 base::OffsetAdjuster::Adjustments* adjustments) {	203 base::OffsetAdjuster::Adjustments* adjustments) {

209 if (adjustments)	204 if (adjustments)

210 adjustments->clear();	205 adjustments->clear();

211 // Do not unescape anything, return the \|escaped_text\| text.	206 // Do not unescape anything, return the \|escaped_text\| text.

212 if (rules == UnescapeRule::NONE)	207 if (rules == UnescapeRule::NONE)

213 return escaped_text;	208 return escaped_text.as_string();

214	209

215 // The output of the unescaping is always smaller than the input, so we can	210 // The output of the unescaping is always smaller than the input, so we can

216 // reserve the input size to make sure we have enough buffer and don't have	211 // reserve the input size to make sure we have enough buffer and don't have

217 // to allocate in the loop below.	212 // to allocate in the loop below.

218 STR result;	213 STR result;

219 result.reserve(escaped_text.length());	214 result.reserve(escaped_text.length());

220	215

221 // Locations of adjusted text.	216 // Locations of adjusted text.

222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {	217 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {

223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {	218 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
258 // U+1F513 OPEN LOCK (%F0%9F%94%93)	253 // U+1F513 OPEN LOCK (%F0%9F%94%93)

259 //	254 //

260 // However, some schemes such as data: and file: need to parse the exact	255 // However, some schemes such as data: and file: need to parse the exact

261 // binary data when loading the URL. For that reason,	256 // binary data when loading the URL. For that reason,

262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.	257 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.

263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be	258 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be

264 // displayed in the UI.	259 // displayed in the UI.

265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {	260 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {

266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {	261 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {

267 // Keep Arabic Language Mark escaped.	262 // Keep Arabic Language Mark escaped.

268 result.append(escaped_text, i, 6);	263 escaped_text.substr(i, 6).AppendToString(&result);

269 i += 5;	264 i += 5;

270 continue;	265 continue;

271 }	266 }

272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {	267 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {

273 // Keep BiDi control char escaped.	268 // Keep BiDi control char escaped.

274 result.append(escaped_text, i, 9);	269 escaped_text.substr(i, 9).AppendToString(&result);

275 i += 8;	270 i += 8;

276 continue;	271 continue;

277 }	272 }

278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {	273 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {

279 // Keep banned char escaped.	274 // Keep banned char escaped.

280 result.append(escaped_text, i, 12);	275 escaped_text.substr(i, 12).AppendToString(&result);

281 i += 11;	276 i += 11;

282 continue;	277 continue;

283 }	278 }

284 }	279 }

285	280

286 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.	281 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.

287 // For 7-bit characters, the lookup table tells us all valid chars.	282 // For 7-bit characters, the lookup table tells us all valid chars.

288 (kUrlUnescape[first_byte] \|\|	283 (kUrlUnescape[first_byte] \|\|

289 // ...and we allow some additional unescaping when flags are set.	284 // ...and we allow some additional unescaping when flags are set.

290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|	285 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
338 while (*p)	333 while (*p)

339 output->push_back(*p++);	334 output->push_back(*p++);

340 break;	335 break;

341 }	336 }

342 }	337 }

343 if (k == arraysize(kCharsToEscape))	338 if (k == arraysize(kCharsToEscape))

344 output->push_back(c);	339 output->push_back(c);

345 }	340 }

346	341

347 template <class str>	342 template <class str>

348 str EscapeForHTMLImpl(const str& input) {	343 str EscapeForHTMLImpl(base::BasicStringPiece<str> input) {

349 str result;	344 str result;

350 result.reserve(input.size()); // Optimize for no escaping.	345 result.reserve(input.size()); // Optimize for no escaping.

351	346

352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i)	347 for (auto c : input) {

353 AppendEscapedCharForHTMLImpl(*i, &result);	348 AppendEscapedCharForHTMLImpl(c, &result);

	349 }

354	350

355 return result;	351 return result;

356 }	352 }

357	353

358 // Everything except alphanumerics and !'()*-._~	354 // Everything except alphanumerics and !'()*-._~

359 // See RFC 2396 for the list of reserved characters.	355 // See RFC 2396 for the list of reserved characters.

360 static const Charmap kQueryCharmap = {{	356 static const Charmap kQueryCharmap = {{

361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,	357 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,

362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL	358 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL

363 }};	359 }};

(...skipping 26 matching lines...) Expand all Loading...
390	386

391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and	387 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and

392 // !'()*-._~#[]	388 // !'()*-._~#[]

393 static const Charmap kExternalHandlerCharmap = {{	389 static const Charmap kExternalHandlerCharmap = {{

394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,	390 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,

395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL	391 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL

396 }};	392 }};

397	393

398 } // namespace	394 } // namespace

399	395

400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) {	396 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) {

401 return Escape(text, kQueryCharmap, use_plus);	397 return Escape(text, kQueryCharmap, use_plus);

402 }	398 }

403	399

404 std::string EscapePath(const std::string& path) {	400 std::string EscapePath(base::StringPiece path) {

405 return Escape(path, kPathCharmap, false);	401 return Escape(path, kPathCharmap, false);

406 }	402 }

407	403

408 #if defined(OS_MACOSX)	404 #if defined(OS_MACOSX)

409 std::string EscapeNSURLPrecursor(const std::string& precursor) {	405 std::string EscapeNSURLPrecursor(base::StringPiece precursor) {

410 return Escape(precursor, kNSURLCharmap, false, true);	406 return Escape(precursor, kNSURLCharmap, false, true);

411 }	407 }

412 #endif // defined(OS_MACOSX)	408 #endif // defined(OS_MACOSX)

413	409

414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) {	410 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) {

415 return Escape(path, kUrlEscape, use_plus);	411 return Escape(path, kUrlEscape, use_plus);

416 }	412 }

417	413

418 std::string EscapeNonASCII(const std::string& input) {	414 std::string EscapeNonASCII(base::StringPiece input) {

419 return Escape(input, kNonASCIICharmap, false);	415 return Escape(input, kNonASCIICharmap, false);

420 }	416 }

421	417

422 std::string EscapeExternalHandlerValue(const std::string& text) {	418 std::string EscapeExternalHandlerValue(base::StringPiece text) {

423 return Escape(text, kExternalHandlerCharmap, false, true);	419 return Escape(text, kExternalHandlerCharmap, false, true);

424 }	420 }

425	421

426 void AppendEscapedCharForHTML(char c, std::string* output) {	422 void AppendEscapedCharForHTML(char c, std::string* output) {

427 AppendEscapedCharForHTMLImpl(c, output);	423 AppendEscapedCharForHTMLImpl(c, output);

428 }	424 }

429	425

430 std::string EscapeForHTML(const std::string& input) {	426 std::string EscapeForHTML(base::StringPiece input) {

431 return EscapeForHTMLImpl(input);	427 return EscapeForHTMLImpl(input);

432 }	428 }

433	429

434 base::string16 EscapeForHTML(const base::string16& input) {	430 base::string16 EscapeForHTML(base::StringPiece16 input) {

435 return EscapeForHTMLImpl(input);	431 return EscapeForHTMLImpl(input);

436 }	432 }

437	433

438 std::string UnescapeURLComponent(const std::string& escaped_text,	434 std::string UnescapeURLComponent(base::StringPiece escaped_text,

439 UnescapeRule::Type rules) {	435 UnescapeRule::Type rules) {

440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);	436 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);

441 }	437 }

442	438

443 base::string16 UnescapeURLComponent(const base::string16& escaped_text,	439 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,

444 UnescapeRule::Type rules) {	440 UnescapeRule::Type rules) {

445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);	441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);

446 }	442 }

447	443

448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,	444 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text,

449 UnescapeRule::Type rules) {	445 UnescapeRule::Type rules) {

450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);	446 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);

451 }	447 }

452	448

453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(	449 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(

454 const std::string& text,	450 base::StringPiece text,

455 UnescapeRule::Type rules,	451 UnescapeRule::Type rules,

456 base::OffsetAdjuster::Adjustments* adjustments) {	452 base::OffsetAdjuster::Adjustments* adjustments) {

457 base::string16 result;	453 base::string16 result;

458 base::OffsetAdjuster::Adjustments unescape_adjustments;	454 base::OffsetAdjuster::Adjustments unescape_adjustments;

459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(	455 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(

460 text, rules, &unescape_adjustments));	456 text, rules, &unescape_adjustments));

461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),	457 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),

462 unescaped_url.length(),	458 unescaped_url.length(),

463 &result, adjustments)) {	459 &result, adjustments)) {

464 // Character set looks like it's valid.	460 // Character set looks like it's valid.

465 if (adjustments) {	461 if (adjustments) {

466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,	462 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,

467 adjustments);	463 adjustments);

468 }	464 }

469 return result;	465 return result;

470 }	466 }

471 // Character set is not valid. Return the escaped version.	467 // Character set is not valid. Return the escaped version.

472 return base::UTF8ToUTF16WithAdjustments(text, adjustments);	468 return base::UTF8ToUTF16WithAdjustments(text, adjustments);

473 }	469 }

474	470

475 base::string16 UnescapeForHTML(const base::string16& input) {	471 base::string16 UnescapeForHTML(base::StringPiece16 input) {

476 static const struct {	472 static const struct {

477 const char* ampersand_code;	473 const char* ampersand_code;

478 const char replacement;	474 const char replacement;

479 } kEscapeToChars[] = {	475 } kEscapeToChars[] = {

480 { "<", '<' },	476 { "<", '<' },

481 { ">", '>' },	477 { ">", '>' },

482 { "&", '&' },	478 { "&", '&' },

483 { """, '"' },	479 { """, '"' },

484 { "'", '\''},	480 { "'", '\''},

485 };	481 };

486	482

487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)	483 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)

488 return input;	484 return input.as_string();

489	485

490 base::string16 ampersand_chars[arraysize(kEscapeToChars)];	486 base::string16 ampersand_chars[arraysize(kEscapeToChars)];

491 base::string16 text(input);	487 base::string16 text = input.as_string();

492 for (base::string16::iterator iter = text.begin();	488 for (base::string16::iterator iter = text.begin();

493 iter != text.end(); ++iter) {	489 iter != text.end(); ++iter) {

494 if (*iter == '&') {	490 if (*iter == '&') {

495 // Potential ampersand encode char.	491 // Potential ampersand encode char.

496 size_t index = iter - text.begin();	492 size_t index = iter - text.begin();

497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {	493 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {

498 if (ampersand_chars[i].empty()) {	494 if (ampersand_chars[i].empty()) {

499 ampersand_chars[i] =	495 ampersand_chars[i] =

500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);	496 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);

501 }	497 }

502 if (text.find(ampersand_chars[i], index) == index) {	498 if (text.find(ampersand_chars[i], index) == index) {

503 text.replace(iter, iter + ampersand_chars[i].length(),	499 text.replace(iter, iter + ampersand_chars[i].length(),

504 1, kEscapeToChars[i].replacement);	500 1, kEscapeToChars[i].replacement);

505 break;	501 break;

506 }	502 }

507 }	503 }

508 }	504 }

509 }	505 }

510 return text;	506 return text;

511 }	507 }

512	508

513 } // namespace net	509 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »