net/base/escape.cc - Issue 548088: Adding some more escaping method....

Side by Side Diff: net/base/escape.cc

Issue 548088: Adding some more escaping method.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 10 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <algorithm>	5 #include <algorithm>

6	6

7 #include "net/base/escape.h"	7 #include "net/base/escape.h"

8	8

9 #include "base/i18n/icu_string_conversions.h"	9 #include "base/i18n/icu_string_conversions.h"

10 #include "base/logging.h"	10 #include "base/logging.h"

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101 // @ A B C D E F G H I J K L M N O	101 // @ A B C D E F G H I J K L M N O

102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

103 // P Q R S T U V W X Y Z [ \ ] ^ _	103 // P Q R S T U V W X Y Z [ \ ] ^ _

104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

105 // ` a b c d e f g h i j k l m n o	105 // ` a b c d e f g h i j k l m n o

106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

107 // p q r s t u v w x y z { \| } ~ <NBSP>	107 // p q r s t u v w x y z { \| } ~ <NBSP>

108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0	108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0

109 };	109 };

110	110

111 std::string UnescapeURLImpl(const std::string& escaped_text,	111 template<typename STR>

112 UnescapeRule::Type rules,	112 STR UnescapeURLImpl(const STR& escaped_text,

113 size_t* offset_for_adjustment) {	113 UnescapeRule::Type rules,

	114 size_t* offset_for_adjustment) {

114 size_t offset_temp = string16::npos;	115 size_t offset_temp = string16::npos;

115 if (!offset_for_adjustment)	116 if (!offset_for_adjustment)

116 offset_for_adjustment = &offset_temp;	117 offset_for_adjustment = &offset_temp;

117 else if (*offset_for_adjustment >= escaped_text.length())	118 else if (*offset_for_adjustment >= escaped_text.length())

118 *offset_for_adjustment = string16::npos;	119 *offset_for_adjustment = string16::npos;

119	120

120 // Do not unescape anything, return the \|escaped_text\| text.	121 // Do not unescape anything, return the \|escaped_text\| text.

121 if (rules == UnescapeRule::NONE)	122 if (rules == UnescapeRule::NONE)

122 return escaped_text;	123 return escaped_text;

123	124

124 // The output of the unescaping is always smaller than the input, so we can	125 // The output of the unescaping is always smaller than the input, so we can

125 // reserve the input size to make sure we have enough buffer and don't have	126 // reserve the input size to make sure we have enough buffer and don't have

126 // to allocate in the loop below.	127 // to allocate in the loop below.

127 std::string result;	128 STR result;

128 result.reserve(escaped_text.length());	129 result.reserve(escaped_text.length());

129	130

130 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {	131 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {

131 if (escaped_text[i] == '%' && i + 2 < max) {	132 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {

132 const std::string::value_type most_sig_digit(escaped_text[i + 1]);	133 // Non ASCII character, append as is.

133 const std::string::value_type least_sig_digit(escaped_text[i + 2]);	134 result.push_back(escaped_text[i]);

	135 continue;

	136 }

	137

	138 char current_char = static_cast<char>(escaped_text[i]);

	139 if (current_char == '%' && i + 2 < max) {

	140 const typename STR::value_type most_sig_digit(

	141 static_cast<typename STR::value_type>(escaped_text[i + 1]));

	142 const typename STR::value_type least_sig_digit(

	143 static_cast<typename STR::value_type>(escaped_text[i + 2]));

134 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {	144 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {

135 unsigned char value = HexToInt(most_sig_digit) * 16 +	145 unsigned char value = HexToInt(most_sig_digit) * 16 +

136 HexToInt(least_sig_digit);	146 HexToInt(least_sig_digit);

137 if (value >= 0x80 \|\| // Unescape all high-bit characters.	147 if (value >= 0x80 \|\| // Unescape all high-bit characters.

138 // For 7-bit characters, the lookup table tells us all valid chars.	148 // For 7-bit characters, the lookup table tells us all valid chars.

139 (kUrlUnescape[value] \|\|	149 (kUrlUnescape[value] \|\|

140 // ...and we allow some additional unescaping when flags are set.	150 // ...and we allow some additional unescaping when flags are set.

141 (value == ' ' && (rules & UnescapeRule::SPACES)) \|\|	151 (value == ' ' && (rules & UnescapeRule::SPACES)) \|\|

142 // Allow any of the prohibited but non-control characters when	152 // Allow any of the prohibited but non-control characters when

143 // we're doing "special" chars.	153 // we're doing "special" chars.

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
268 *offset_for_adjustment = original_offset;	278 *offset_for_adjustment = original_offset;

269 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,	279 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,

270 offset_for_adjustment));	280 offset_for_adjustment));

271 }	281 }

272	282

273 std::string UnescapeURLComponent(const std::string& escaped_text,	283 std::string UnescapeURLComponent(const std::string& escaped_text,

274 UnescapeRule::Type rules) {	284 UnescapeRule::Type rules) {

275 return UnescapeURLImpl(escaped_text, rules, NULL);	285 return UnescapeURLImpl(escaped_text, rules, NULL);

276 }	286 }

277	287

	288 string16 UnescapeURLComponent(const string16& escaped_text,

	289 UnescapeRule::Type rules) {

	290 return UnescapeURLImpl(escaped_text, rules, NULL);

	291 }

	292

	293

278 template <class str>	294 template <class str>

279 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {	295 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {

280 static const struct {	296 static const struct {

281 char key;	297 char key;

282 const char *replacement;	298 const char* replacement;

283 } kCharsToEscape[] = {	299 } kCharsToEscape[] = {

284 { '<', "<" },	300 { '<', "<" },

285 { '>', ">" },	301 { '>', ">" },

286 { '&', "&" },	302 { '&', "&" },

287 { '"', """ },	303 { '"', """ },

288 { '\'', "'" },	304 { '\'', "'" },

289 };	305 };

290 size_t k;	306 size_t k;

291 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) {	307 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) {

292 if (c == kCharsToEscape[k].key) {	308 if (c == kCharsToEscape[k].key) {

(...skipping 26 matching lines...) Expand all Loading...
319 return result;	335 return result;

320 }	336 }

321	337

322 std::string EscapeForHTML(const std::string& input) {	338 std::string EscapeForHTML(const std::string& input) {

323 return EscapeForHTMLImpl(input);	339 return EscapeForHTMLImpl(input);

324 }	340 }

325	341

326 string16 EscapeForHTML(const string16& input) {	342 string16 EscapeForHTML(const string16& input) {

327 return EscapeForHTMLImpl(input);	343 return EscapeForHTMLImpl(input);

328 }	344 }

	345

	346 string16 UnescapeForHTML(const string16& input) {

	347 static const struct {

	348 const wchar_t* ampersand_code;

	349 const char replacement;

	350 } kEscapeToChars[] = {

	351 { L"<", '<' },

	352 { L">", '>' },

	353 { L"&", '&' },

	354 { L""", '"' },

	355 { L"'", '\''},

	356 };

	357

	358 if (input.find(WideToUTF16(L"&")) == std::string::npos)

	359 return input;

	360

	361 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)];

	362 string16 text(input);

	363 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) {

	364 if (*iter == '&') {

	365 // Potential ampersand encode char.

	366 size_t index = iter - text.begin();

	367 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) {

	368 if (ampersand_chars[i].empty())

	369 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code);

	370 if (text.find(ampersand_chars[i], index) == index) {

	371 text.replace(iter, iter + ampersand_chars[i].length(),

	372 1, kEscapeToChars[i].replacement);

	373 break;

	374 }

	375 }

	376 }

	377 }

	378 return text;

	379 }

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »