net/base/escape.cc - Issue 2615633007: Change net/base/escape.h to use StringPiece.

Side by Side Diff: net/base/escape.cc

Issue 2615633007: Change net/base/escape.h to use StringPiece. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/escape.h"	5 #include "net/base/escape.h"

6	6

7 #include <algorithm>	7 #include <utility>

8 #include <memory>

9	8

10 #include "base/logging.h"	9 #include "base/logging.h"

11 #include "base/strings/string_piece.h"

12 #include "base/strings/string_util.h"	10 #include "base/strings/string_util.h"

13 #include "base/strings/utf_offset_string_conversions.h"
mmenke 2017/01/05 16:19:54 This is needed for UTF8ToUTF16WithAdjustments, no? This is needed for UTF8ToUTF16WithAdjustments, no? mmenke 2017/01/05 16:21:05 Oops, ignore this. I just assumed this was a meth Show quoted text On 2017/01/05 16:19:54, mmenke wrote: > This is needed for UTF8ToUTF16WithAdjustments, no? Oops, ignore this. I just assumed this was a method-only file, so naturally wouldn't be included in the header file (And I was wrong).
14 #include "base/strings/utf_string_conversions.h"	11 #include "base/strings/utf_string_conversions.h"

15	12

16 namespace net {	13 namespace net {

17	14

18 namespace {	15 namespace {

19	16

	17 template <typename STR>

	18 using StringTypeForStringPiece = decltype(std::declval<STR>().as_string());
	mmenke 2017/01/05 16:19:54 I think this is sufficiently obscure that it needs I think this is sufficiently obscure that it needs a comment. (It's C++0x11 that is unlikely to ever see common enough use for people to intuitively know what it actually does, and it's rather verbose as well) Sam McNally 2017/01/06 00:20:51 I found an alternative that avoids this. Show quoted text On 2017/01/05 16:19:54, mmenke wrote: > I think this is sufficiently obscure that it needs a comment. (It's C++0x11 > that is unlikely to ever see common enough use for people to intuitively know > what it actually does, and it's rather verbose as well) I found an alternative that avoids this.
	19

20 const char kHexString[] = "0123456789ABCDEF";	20 const char kHexString[] = "0123456789ABCDEF";

21 inline char IntToHex(int i) {	21 inline char IntToHex(int i) {

22 DCHECK_GE(i, 0) << i << " not a hex value";	22 DCHECK_GE(i, 0) << i << " not a hex value";

23 DCHECK_LE(i, 15) << i << " not a hex value";	23 DCHECK_LE(i, 15) << i << " not a hex value";

24 return kHexString[i];	24 return kHexString[i];

25 }	25 }

26	26

27 // A fast bit-vector map for ascii characters.	27 // A fast bit-vector map for ascii characters.

28 //	28 //

29 // Internally stores 256 bits in an array of 8 ints.	29 // Internally stores 256 bits in an array of 8 ints.

30 // Does quick bit-flicking to lookup needed characters.	30 // Does quick bit-flicking to lookup needed characters.

31 struct Charmap {	31 struct Charmap {

32 bool Contains(unsigned char c) const {	32 bool Contains(unsigned char c) const {

33 return ((map[c >> 5] & (1 << (c & 31))) != 0);	33 return ((map[c >> 5] & (1 << (c & 31))) != 0);

34 }	34 }

35	35

36 uint32_t map[8];	36 uint32_t map[8];

37 };	37 };

38	38

39 // Given text to escape and a Charmap defining which values to escape,	39 // Given text to escape and a Charmap defining which values to escape,

40 // return an escaped string. If use_plus is true, spaces are converted	40 // return an escaped string. If use_plus is true, spaces are converted

41 // to +, otherwise, if spaces are in the charmap, they are converted to	41 // to +, otherwise, if spaces are in the charmap, they are converted to

42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if	42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if

43 // '%' is in the charmap, it is converted to %25.	43 // '%' is in the charmap, it is converted to %25.

44 std::string Escape(const std::string& text,	44 std::string Escape(base::StringPiece text,

45 const Charmap& charmap,	45 const Charmap& charmap,

46 bool use_plus,	46 bool use_plus,

47 bool keep_escaped = false) {	47 bool keep_escaped = false) {

48 std::string escaped;	48 std::string escaped;

49 escaped.reserve(text.length() * 3);	49 escaped.reserve(text.length() * 3);

50 for (unsigned int i = 0; i < text.length(); ++i) {	50 for (unsigned int i = 0; i < text.length(); ++i) {

51 unsigned char c = static_cast<unsigned char>(text[i]);	51 unsigned char c = static_cast<unsigned char>(text[i]);

52 if (use_plus && ' ' == c) {	52 if (use_plus && ' ' == c) {

53 escaped.push_back('+');	53 escaped.push_back('+');

54 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&	54 } else if (keep_escaped && '%' == c && i + 2 < text.length() &&

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,	99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,

100 // ` a b c d e f g h i j k l m n o	100 // ` a b c d e f g h i j k l m n o

101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

102 // p q r s t u v w x y z { \| } ~ <NBSP>	102 // p q r s t u v w x y z { \| } ~ <NBSP>

103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0	103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0

104 };	104 };

105	105

106 // Attempts to unescape the sequence at \|index\| within \|escaped_text\|. If	106 // Attempts to unescape the sequence at \|index\| within \|escaped_text\|. If

107 // successful, sets \|value\| to the unescaped value. Returns whether	107 // successful, sets \|value\| to the unescaped value. Returns whether

108 // unescaping succeeded.	108 // unescaping succeeded.

109 template<typename STR>	109 template <typename STR>

110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,	110 bool UnescapeUnsignedCharAtIndex(STR escaped_text,

111 size_t index,	111 size_t index,

112 unsigned char* value) {	112 unsigned char* value) {

113 if ((index + 2) >= escaped_text.size())	113 if ((index + 2) >= escaped_text.size())

114 return false;	114 return false;

115 if (escaped_text[index] != '%')	115 if (escaped_text[index] != '%')

116 return false;	116 return false;

117 const typename STR::value_type most_sig_digit(	117 const typename STR::value_type most_sig_digit(

118 static_cast<typename STR::value_type>(escaped_text[index + 1]));	118 static_cast<typename STR::value_type>(escaped_text[index + 1]));

119 const typename STR::value_type least_sig_digit(	119 const typename STR::value_type least_sig_digit(

120 static_cast<typename STR::value_type>(escaped_text[index + 2]));	120 static_cast<typename STR::value_type>(escaped_text[index + 2]));

121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {	121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) {

122 value = base::HexDigitToInt(most_sig_digit) 16 +	122 value = base::HexDigitToInt(most_sig_digit) 16 +

123 base::HexDigitToInt(least_sig_digit);	123 base::HexDigitToInt(least_sig_digit);

124 return true;	124 return true;

125 }	125 }

126 return false;	126 return false;

127 }	127 }

128	128

129 // Returns true if there is an Arabic Language Mark at \|index\|. \|first_byte\|	129 // Returns true if there is an Arabic Language Mark at \|index\|. \|first_byte\|

130 // is the byte at \|index\|.	130 // is the byte at \|index\|.

131 template<typename STR>	131 template <typename STR>

132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,	132 bool HasArabicLanguageMarkAtIndex(STR escaped_text,

133 unsigned char first_byte,	133 unsigned char first_byte,

134 size_t index) {	134 size_t index) {

135 if (first_byte != 0xD8)	135 if (first_byte != 0xD8)

136 return false;	136 return false;

137 unsigned char second_byte;	137 unsigned char second_byte;

138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))	138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

139 return false;	139 return false;

140 return second_byte == 0x9c;	140 return second_byte == 0x9c;

141 }	141 }

142	142

143 // Returns true if there is a BiDi control char at \|index\|. \|first_byte\| is the	143 // Returns true if there is a BiDi control char at \|index\|. \|first_byte\| is the

144 // byte at \|index\|.	144 // byte at \|index\|.

145 template<typename STR>	145 template <typename STR>

146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,	146 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text,

147 unsigned char first_byte,	147 unsigned char first_byte,

148 size_t index) {	148 size_t index) {

149 if (first_byte != 0xE2)	149 if (first_byte != 0xE2)

150 return false;	150 return false;

151 unsigned char second_byte;	151 unsigned char second_byte;

152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))	152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

153 return false;	153 return false;

154 if (second_byte != 0x80 && second_byte != 0x81)	154 if (second_byte != 0x80 && second_byte != 0x81)

155 return false;	155 return false;

156 unsigned char third_byte;	156 unsigned char third_byte;

157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))	157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))

158 return false;	158 return false;

159 if (second_byte == 0x80) {	159 if (second_byte == 0x80) {

160 return third_byte == 0x8E \|\|	160 return third_byte == 0x8E \|\|

161 third_byte == 0x8F \|\|	161 third_byte == 0x8F \|\|

162 (third_byte >= 0xAA && third_byte <= 0xAE);	162 (third_byte >= 0xAA && third_byte <= 0xAE);

163 }	163 }

164 return third_byte >= 0xA6 && third_byte <= 0xA9;	164 return third_byte >= 0xA6 && third_byte <= 0xA9;

165 }	165 }

166	166

167 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is	167 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is

168 // the byte at \|index\|.	168 // the byte at \|index\|.

169 template <typename STR>	169 template <typename STR>

170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,	170 bool HasFourByteBannedCharAtIndex(STR escaped_text,

171 unsigned char first_byte,	171 unsigned char first_byte,

172 size_t index) {	172 size_t index) {

173 // The following characters are blacklisted for spoofability concerns.	173 // The following characters are blacklisted for spoofability concerns.

174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)	174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)	175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

176 // U+1F512 LOCK (%F0%9F%94%92)	176 // U+1F512 LOCK (%F0%9F%94%92)

177 // U+1F513 OPEN LOCK (%F0%9F%94%93)	177 // U+1F513 OPEN LOCK (%F0%9F%94%93)

178 if (first_byte != 0xF0)	178 if (first_byte != 0xF0)

179 return false;	179 return false;

180	180

(...skipping 13 matching lines...) Expand all Loading...
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&	194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&

195 (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\|	195 (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\|

196 fourth_byte == 0x93);	196 fourth_byte == 0x93);

197 }	197 }

198	198

199 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting	199 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting

200 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects	200 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects

201 // the alterations done to the string that are not one-character-to-one-	201 // the alterations done to the string that are not one-character-to-one-

202 // character. The resulting \|adjustments\| will always be sorted by increasing	202 // character. The resulting \|adjustments\| will always be sorted by increasing

203 // offset.	203 // offset.

204 template<typename STR>	204 template <typename STR>

205 STR UnescapeURLWithAdjustmentsImpl(	205 StringTypeForStringPiece<STR> UnescapeURLWithAdjustmentsImpl(

206 const STR& escaped_text,	206 STR escaped_text,

207 UnescapeRule::Type rules,	207 UnescapeRule::Type rules,

208 base::OffsetAdjuster::Adjustments* adjustments) {	208 base::OffsetAdjuster::Adjustments* adjustments) {

209 if (adjustments)	209 if (adjustments)

210 adjustments->clear();	210 adjustments->clear();

211 // Do not unescape anything, return the \|escaped_text\| text.	211 // Do not unescape anything, return the \|escaped_text\| text.

212 if (rules == UnescapeRule::NONE)	212 if (rules == UnescapeRule::NONE)

213 return escaped_text;	213 return escaped_text.as_string();

214	214

215 // The output of the unescaping is always smaller than the input, so we can	215 // The output of the unescaping is always smaller than the input, so we can

216 // reserve the input size to make sure we have enough buffer and don't have	216 // reserve the input size to make sure we have enough buffer and don't have

217 // to allocate in the loop below.	217 // to allocate in the loop below.

218 STR result;	218 StringTypeForStringPiece<STR> result;

219 result.reserve(escaped_text.length());	219 result.reserve(escaped_text.length());

220	220

221 // Locations of adjusted text.	221 // Locations of adjusted text.

222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {	222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {

223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {	223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {

224 // Non ASCII character, append as is.	224 // Non ASCII character, append as is.

225 result.push_back(escaped_text[i]);	225 result.push_back(escaped_text[i]);

226 continue;	226 continue;

227 }	227 }

228	228

(...skipping 29 matching lines...) Expand all Loading...
258 // U+1F513 OPEN LOCK (%F0%9F%94%93)	258 // U+1F513 OPEN LOCK (%F0%9F%94%93)

259 //	259 //

260 // However, some schemes such as data: and file: need to parse the exact	260 // However, some schemes such as data: and file: need to parse the exact

261 // binary data when loading the URL. For that reason,	261 // binary data when loading the URL. For that reason,

262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.	262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.

263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be	263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be

264 // displayed in the UI.	264 // displayed in the UI.

265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {	265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {

266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {	266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {

267 // Keep Arabic Language Mark escaped.	267 // Keep Arabic Language Mark escaped.

268 result.append(escaped_text, i, 6);	268 escaped_text.substr(i, 6).AppendToString(&result);

269 i += 5;	269 i += 5;

270 continue;	270 continue;

271 }	271 }

272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {	272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {

273 // Keep BiDi control char escaped.	273 // Keep BiDi control char escaped.

274 result.append(escaped_text, i, 9);	274 escaped_text.substr(i, 9).AppendToString(&result);

275 i += 8;	275 i += 8;

276 continue;	276 continue;

277 }	277 }

278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {	278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {

279 // Keep banned char escaped.	279 // Keep banned char escaped.

280 result.append(escaped_text, i, 12);	280 escaped_text.substr(i, 12).AppendToString(&result);

281 i += 11;	281 i += 11;

282 continue;	282 continue;

283 }	283 }

284 }	284 }

285	285

286 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.	286 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.

287 // For 7-bit characters, the lookup table tells us all valid chars.	287 // For 7-bit characters, the lookup table tells us all valid chars.

288 (kUrlUnescape[first_byte] \|\|	288 (kUrlUnescape[first_byte] \|\|

289 // ...and we allow some additional unescaping when flags are set.	289 // ...and we allow some additional unescaping when flags are set.

290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|	290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
338 while (*p)	338 while (*p)

339 output->push_back(*p++);	339 output->push_back(*p++);

340 break;	340 break;

341 }	341 }

342 }	342 }

343 if (k == arraysize(kCharsToEscape))	343 if (k == arraysize(kCharsToEscape))

344 output->push_back(c);	344 output->push_back(c);

345 }	345 }

346	346

347 template <class str>	347 template <class str>

348 str EscapeForHTMLImpl(const str& input) {	348 StringTypeForStringPiece<str> EscapeForHTMLImpl(str input) {

349 str result;	349 StringTypeForStringPiece<str> result;

350 result.reserve(input.size()); // Optimize for no escaping.	350 result.reserve(input.size()); // Optimize for no escaping.

351	351

352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i)	352 for (auto c : input) {

353 AppendEscapedCharForHTMLImpl(*i, &result);	353 AppendEscapedCharForHTMLImpl(c, &result);

	354 }

354	355

355 return result;	356 return result;

356 }	357 }

357	358

358 // Everything except alphanumerics and !'()*-._~	359 // Everything except alphanumerics and !'()*-._~

359 // See RFC 2396 for the list of reserved characters.	360 // See RFC 2396 for the list of reserved characters.

360 static const Charmap kQueryCharmap = {{	361 static const Charmap kQueryCharmap = {{

361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,	362 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,

362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL	363 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL

363 }};	364 }};

(...skipping 26 matching lines...) Expand all Loading...
390	391

391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and	392 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and

392 // !'()*-._~#[]	393 // !'()*-._~#[]

393 static const Charmap kExternalHandlerCharmap = {{	394 static const Charmap kExternalHandlerCharmap = {{

394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,	395 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L,

395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL	396 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL

396 }};	397 }};

397	398

398 } // namespace	399 } // namespace

399	400

400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) {	401 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) {

401 return Escape(text, kQueryCharmap, use_plus);	402 return Escape(text, kQueryCharmap, use_plus);

402 }	403 }

403	404

404 std::string EscapePath(const std::string& path) {	405 std::string EscapePath(base::StringPiece path) {

405 return Escape(path, kPathCharmap, false);	406 return Escape(path, kPathCharmap, false);

406 }	407 }

407	408

408 #if defined(OS_MACOSX)	409 #if defined(OS_MACOSX)

409 std::string EscapeNSURLPrecursor(const std::string& precursor) {	410 std::string EscapeNSURLPrecursor(base::StringPiece precursor) {

410 return Escape(precursor, kNSURLCharmap, false, true);	411 return Escape(precursor, kNSURLCharmap, false, true);

411 }	412 }

412 #endif // defined(OS_MACOSX)	413 #endif // defined(OS_MACOSX)

413	414

414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) {	415 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) {

415 return Escape(path, kUrlEscape, use_plus);	416 return Escape(path, kUrlEscape, use_plus);

416 }	417 }

417	418

418 std::string EscapeNonASCII(const std::string& input) {	419 std::string EscapeNonASCII(base::StringPiece input) {

419 return Escape(input, kNonASCIICharmap, false);	420 return Escape(input, kNonASCIICharmap, false);

420 }	421 }

421	422

422 std::string EscapeExternalHandlerValue(const std::string& text) {	423 std::string EscapeExternalHandlerValue(base::StringPiece text) {

423 return Escape(text, kExternalHandlerCharmap, false, true);	424 return Escape(text, kExternalHandlerCharmap, false, true);

424 }	425 }

425	426

426 void AppendEscapedCharForHTML(char c, std::string* output) {	427 void AppendEscapedCharForHTML(char c, std::string* output) {

427 AppendEscapedCharForHTMLImpl(c, output);	428 AppendEscapedCharForHTMLImpl(c, output);

428 }	429 }

429	430

430 std::string EscapeForHTML(const std::string& input) {	431 std::string EscapeForHTML(base::StringPiece input) {

431 return EscapeForHTMLImpl(input);	432 return EscapeForHTMLImpl(input);

432 }	433 }

433	434

434 base::string16 EscapeForHTML(const base::string16& input) {	435 base::string16 EscapeForHTML(base::StringPiece16 input) {

435 return EscapeForHTMLImpl(input);	436 return EscapeForHTMLImpl(input);

436 }	437 }

437	438

438 std::string UnescapeURLComponent(const std::string& escaped_text,	439 std::string UnescapeURLComponent(base::StringPiece escaped_text,

439 UnescapeRule::Type rules) {	440 UnescapeRule::Type rules) {

440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);	441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);

441 }	442 }

442	443

443 base::string16 UnescapeURLComponent(const base::string16& escaped_text,	444 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text,

444 UnescapeRule::Type rules) {	445 UnescapeRule::Type rules) {

445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);	446 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL);

446 }	447 }

447	448

448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,	449 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text,

449 UnescapeRule::Type rules) {	450 UnescapeRule::Type rules) {

450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);	451 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL);

451 }	452 }

452	453

453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(	454 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(

454 const std::string& text,	455 base::StringPiece text,

455 UnescapeRule::Type rules,	456 UnescapeRule::Type rules,

456 base::OffsetAdjuster::Adjustments* adjustments) {	457 base::OffsetAdjuster::Adjustments* adjustments) {

457 base::string16 result;	458 base::string16 result;

458 base::OffsetAdjuster::Adjustments unescape_adjustments;	459 base::OffsetAdjuster::Adjustments unescape_adjustments;

459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(	460 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl(

460 text, rules, &unescape_adjustments));	461 text, rules, &unescape_adjustments));

461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),	462 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(),

462 unescaped_url.length(),	463 unescaped_url.length(),

463 &result, adjustments)) {	464 &result, adjustments)) {

464 // Character set looks like it's valid.	465 // Character set looks like it's valid.

465 if (adjustments) {	466 if (adjustments) {

466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,	467 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments,

467 adjustments);	468 adjustments);

468 }	469 }

469 return result;	470 return result;

470 }	471 }

471 // Character set is not valid. Return the escaped version.	472 // Character set is not valid. Return the escaped version.

472 return base::UTF8ToUTF16WithAdjustments(text, adjustments);	473 return base::UTF8ToUTF16WithAdjustments(text, adjustments);

473 }	474 }

474	475

475 base::string16 UnescapeForHTML(const base::string16& input) {	476 base::string16 UnescapeForHTML(base::StringPiece16 input) {

476 static const struct {	477 static const struct {

477 const char* ampersand_code;	478 const char* ampersand_code;

478 const char replacement;	479 const char replacement;

479 } kEscapeToChars[] = {	480 } kEscapeToChars[] = {

480 { "<", '<' },	481 { "<", '<' },

481 { ">", '>' },	482 { ">", '>' },

482 { "&", '&' },	483 { "&", '&' },

483 { """, '"' },	484 { """, '"' },

484 { "'", '\''},	485 { "'", '\''},

485 };	486 };

486	487

487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)	488 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos)

488 return input;	489 return input.as_string();

489	490

490 base::string16 ampersand_chars[arraysize(kEscapeToChars)];	491 base::string16 ampersand_chars[arraysize(kEscapeToChars)];

491 base::string16 text(input);	492 base::string16 text = input.as_string();

492 for (base::string16::iterator iter = text.begin();	493 for (base::string16::iterator iter = text.begin();

493 iter != text.end(); ++iter) {	494 iter != text.end(); ++iter) {

494 if (*iter == '&') {	495 if (*iter == '&') {

495 // Potential ampersand encode char.	496 // Potential ampersand encode char.

496 size_t index = iter - text.begin();	497 size_t index = iter - text.begin();

497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {	498 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) {

498 if (ampersand_chars[i].empty()) {	499 if (ampersand_chars[i].empty()) {

499 ampersand_chars[i] =	500 ampersand_chars[i] =

500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);	501 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code);

501 }	502 }

502 if (text.find(ampersand_chars[i], index) == index) {	503 if (text.find(ampersand_chars[i], index) == index) {

503 text.replace(iter, iter + ampersand_chars[i].length(),	504 text.replace(iter, iter + ampersand_chars[i].length(),

504 1, kEscapeToChars[i].replacement);	505 1, kEscapeToChars[i].replacement);

505 break;	506 break;

506 }	507 }

507 }	508 }

508 }	509 }

509 }	510 }

510 return text;	511 return text;

511 }	512 }

512	513

513 } // namespace net	514 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | no next file » | no next file with comments »