net/base/escape.cc - Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur...

Side by Side Diff: net/base/escape.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <algorithm>	5 #include <algorithm>

6	6

7 #include "net/base/escape.h"	7 #include "net/base/escape.h"

8	8

9 #include "base/i18n/icu_string_conversions.h"	9 #include "base/i18n/icu_string_conversions.h"

10 #include "base/logging.h"	10 #include "base/logging.h"

11 #include "base/string_util.h"	11 #include "base/string_util.h"

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

101 // P Q R S T U V W X Y Z [ \ ] ^ _	101 // P Q R S T U V W X Y Z [ \ ] ^ _

102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

103 // ` a b c d e f g h i j k l m n o	103 // ` a b c d e f g h i j k l m n o

104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

105 // p q r s t u v w x y z { \| } ~ <NBSP>	105 // p q r s t u v w x y z { \| } ~ <NBSP>

106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0	106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0

107 };	107 };

108	108

109 std::string UnescapeURLImpl(const std::string& escaped_text,	109 std::string UnescapeURLImpl(const std::string& escaped_text,

110 UnescapeRule::Type rules) {	110 UnescapeRule::Type rules,

	111 size_t* offset_for_adjustment) {

	112 size_t offset_temp = std::wstring::npos;

	113 if (!offset_for_adjustment)

	114 offset_for_adjustment = &offset_temp;

	115 else if (*offset_for_adjustment >= escaped_text.length())

	116 *offset_for_adjustment = std::wstring::npos;

	117

111 // Do not unescape anything, return the \|escaped_text\| text.	118 // Do not unescape anything, return the \|escaped_text\| text.

112 if (rules == UnescapeRule::NONE)	119 if (rules == UnescapeRule::NONE)

113 return escaped_text;	120 return escaped_text;

114	121

115 // The output of the unescaping is always smaller than the input, so we can	122 // The output of the unescaping is always smaller than the input, so we can

116 // reserve the input size to make sure we have enough buffer and don't have	123 // reserve the input size to make sure we have enough buffer and don't have

117 // to allocate in the loop below.	124 // to allocate in the loop below.

118 std::string result;	125 std::string result;

119 result.reserve(escaped_text.length());	126 result.reserve(escaped_text.length());

120	127

121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {	128 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {

122 if (escaped_text[i] == '%' && i + 2 < max) {	129 if (escaped_text[i] == '%' && i + 2 < max) {

123 const std::string::value_type most_sig_digit(escaped_text[i + 1]);	130 const std::string::value_type most_sig_digit(escaped_text[i + 1]);

124 const std::string::value_type least_sig_digit(escaped_text[i + 2]);	131 const std::string::value_type least_sig_digit(escaped_text[i + 2]);

125 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {	132 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {

126 unsigned char value = HexToInt(most_sig_digit) * 16 +	133 unsigned char value = HexToInt(most_sig_digit) * 16 +

127 HexToInt(least_sig_digit);	134 HexToInt(least_sig_digit);

128 if (value >= 0x80 \|\| // Unescape all high-bit characters.	135 if (value >= 0x80 \|\| // Unescape all high-bit characters.

129 // For 7-bit characters, the lookup table tells us all valid chars.	136 // For 7-bit characters, the lookup table tells us all valid chars.

130 (kUrlUnescape[value] \|\|	137 (kUrlUnescape[value] \|\|

131 // ...and we allow some additional unescaping when flags are set.	138 // ...and we allow some additional unescaping when flags are set.

132 (value == ' ' && (rules & UnescapeRule::SPACES)) \|\|	139 (value == ' ' && (rules & UnescapeRule::SPACES)) \|\|

133 // Allow any of the prohibited but non-control characters when	140 // Allow any of the prohibited but non-control characters when

134 // we're doing "special" chars.	141 // we're doing "special" chars.

135 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|	142 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|

136 // Additionally allow control characters if requested.	143 // Additionally allow control characters if requested.

137 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {	144 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {

138 // Use the unescaped version of the character.	145 // Use the unescaped version of the character.

	146 size_t length_before_append = result.length();

139 result.push_back(value);	147 result.push_back(value);

140 i += 2;	148 i += 2;

	149

	150 // Adjust offset to match length change.

	151 if (*offset_for_adjustment != std::string::npos) {

	152 if (*offset_for_adjustment > (length_before_append + 2))

	153 *offset_for_adjustment -= 2;

	154 else if (*offset_for_adjustment > length_before_append)

	155 *offset_for_adjustment = std::string::npos;

	156 }

141 } else {	157 } else {

142 // Keep escaped. Append a percent and we'll get the following two	158 // Keep escaped. Append a percent and we'll get the following two

143 // digits on the next loops through.	159 // digits on the next loops through.

144 result.push_back('%');	160 result.push_back('%');

145 }	161 }

146 } else {	162 } else {

147 // Invalid escape sequence, just pass the percent through and continue	163 // Invalid escape sequence, just pass the percent through and continue

148 // right after it.	164 // right after it.

149 result.push_back('%');	165 result.push_back('%');

150 }	166 }

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
224 if (!base::WideToCodepage(text, codepage,	240 if (!base::WideToCodepage(text, codepage,

225 base::OnStringConversionError::SKIP, &encoded))	241 base::OnStringConversionError::SKIP, &encoded))

226 return false;	242 return false;

227	243

228 // It's safe to use UTF8ToWide here because Escape should only return	244 // It's safe to use UTF8ToWide here because Escape should only return

229 // alphanumerics and !'()*-._~	245 // alphanumerics and !'()*-._~

230 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true)));	246 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true)));

231 return true;	247 return true;

232 }	248 }

233	249

234 std::wstring UnescapeAndDecodeURLComponent(const std::string& text,	250 std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,

235 const char* codepage,	251 UnescapeRule::Type rules,

236 UnescapeRule::Type rules) {	252 size_t* offset_for_adjustment) {

237 std::wstring result;	253 std::wstring result;

238 if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage,	254 size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;

239 base::OnStringConversionError::FAIL, &result))	255 if (base::CodepageToWideAndAdjustOffset(

	256 UnescapeURLImpl(text, rules, offset_for_adjustment),

	257 "UTF-8", base::OnStringConversionError::FAIL, &result,

	258 offset_for_adjustment))

240 return result; // Character set looks like it's valid.	259 return result; // Character set looks like it's valid.

241 return UTF8ToWide(text); // Return the escaped version when it's not.	260

	261 // Not valid. Return the escaped version. Undo our changes to

	262 // \|offset_for_adjustment\| since we haven't changed the string after all.

	263 if (offset_for_adjustment)

	264 *offset_for_adjustment = original_offset;

	265 return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment);

242 }	266 }

243	267

244 std::string UnescapeURLComponent(const std::string& escaped_text,	268 std::string UnescapeURLComponent(const std::string& escaped_text,

245 UnescapeRule::Type rules) {	269 UnescapeRule::Type rules) {

246 return UnescapeURLImpl(escaped_text, rules);	270 return UnescapeURLImpl(escaped_text, rules, NULL);

247 }	271 }

248	272

249 template <class str>	273 template <class str>

250 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {	274 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {

251 static const struct {	275 static const struct {

252 char key;	276 char key;

253 const char *replacement;	277 const char *replacement;

254 } kCharsToEscape[] = {	278 } kCharsToEscape[] = {

255 { '<', "<" },	279 { '<', "<" },

256 { '>', ">" },	280 { '>', ">" },

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
290 return result;	314 return result;

291 }	315 }

292	316

293 std::string EscapeForHTML(const std::string& input) {	317 std::string EscapeForHTML(const std::string& input) {

294 return EscapeForHTMLImpl(input);	318 return EscapeForHTMLImpl(input);

295 }	319 }

296	320

297 std::wstring EscapeForHTML(const std::wstring& input) {	321 std::wstring EscapeForHTML(const std::wstring& input) {

298 return EscapeForHTMLImpl(input);	322 return EscapeForHTMLImpl(input);

299 }	323 }

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »