Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(72)

Side by Side Diff: net/base/escape.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <algorithm> 5 #include <algorithm>
6 6
7 #include "net/base/escape.h" 7 #include "net/base/escape.h"
8 8
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/string_util.h" 11 #include "base/string_util.h"
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 // P Q R S T U V W X Y Z [ \ ] ^ _ 101 // P Q R S T U V W X Y Z [ \ ] ^ _
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 // ` a b c d e f g h i j k l m n o 103 // ` a b c d e f g h i j k l m n o
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 // p q r s t u v w x y z { | } ~ <NBSP> 105 // p q r s t u v w x y z { | } ~ <NBSP>
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
107 }; 107 };
108 108
109 std::string UnescapeURLImpl(const std::string& escaped_text, 109 std::string UnescapeURLImpl(const std::string& escaped_text,
110 UnescapeRule::Type rules) { 110 UnescapeRule::Type rules,
111 size_t* offset_for_adjustment) {
112 size_t offset_temp = std::wstring::npos;
113 if (!offset_for_adjustment)
114 offset_for_adjustment = &offset_temp;
115 else if (*offset_for_adjustment >= escaped_text.length())
116 *offset_for_adjustment = std::wstring::npos;
117
111 // Do not unescape anything, return the |escaped_text| text. 118 // Do not unescape anything, return the |escaped_text| text.
112 if (rules == UnescapeRule::NONE) 119 if (rules == UnescapeRule::NONE)
113 return escaped_text; 120 return escaped_text;
114 121
115 // The output of the unescaping is always smaller than the input, so we can 122 // The output of the unescaping is always smaller than the input, so we can
116 // reserve the input size to make sure we have enough buffer and don't have 123 // reserve the input size to make sure we have enough buffer and don't have
117 // to allocate in the loop below. 124 // to allocate in the loop below.
118 std::string result; 125 std::string result;
119 result.reserve(escaped_text.length()); 126 result.reserve(escaped_text.length());
120 127
121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 128 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
122 if (escaped_text[i] == '%' && i + 2 < max) { 129 if (escaped_text[i] == '%' && i + 2 < max) {
123 const std::string::value_type most_sig_digit(escaped_text[i + 1]); 130 const std::string::value_type most_sig_digit(escaped_text[i + 1]);
124 const std::string::value_type least_sig_digit(escaped_text[i + 2]); 131 const std::string::value_type least_sig_digit(escaped_text[i + 2]);
125 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { 132 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {
126 unsigned char value = HexToInt(most_sig_digit) * 16 + 133 unsigned char value = HexToInt(most_sig_digit) * 16 +
127 HexToInt(least_sig_digit); 134 HexToInt(least_sig_digit);
128 if (value >= 0x80 || // Unescape all high-bit characters. 135 if (value >= 0x80 || // Unescape all high-bit characters.
129 // For 7-bit characters, the lookup table tells us all valid chars. 136 // For 7-bit characters, the lookup table tells us all valid chars.
130 (kUrlUnescape[value] || 137 (kUrlUnescape[value] ||
131 // ...and we allow some additional unescaping when flags are set. 138 // ...and we allow some additional unescaping when flags are set.
132 (value == ' ' && (rules & UnescapeRule::SPACES)) || 139 (value == ' ' && (rules & UnescapeRule::SPACES)) ||
133 // Allow any of the prohibited but non-control characters when 140 // Allow any of the prohibited but non-control characters when
134 // we're doing "special" chars. 141 // we're doing "special" chars.
135 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 142 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
136 // Additionally allow control characters if requested. 143 // Additionally allow control characters if requested.
137 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { 144 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
138 // Use the unescaped version of the character. 145 // Use the unescaped version of the character.
146 size_t length_before_append = result.length();
139 result.push_back(value); 147 result.push_back(value);
140 i += 2; 148 i += 2;
149
150 // Adjust offset to match length change.
151 if (*offset_for_adjustment != std::string::npos) {
152 if (*offset_for_adjustment > (length_before_append + 2))
153 *offset_for_adjustment -= 2;
154 else if (*offset_for_adjustment > length_before_append)
155 *offset_for_adjustment = std::string::npos;
156 }
141 } else { 157 } else {
142 // Keep escaped. Append a percent and we'll get the following two 158 // Keep escaped. Append a percent and we'll get the following two
143 // digits on the next loops through. 159 // digits on the next loops through.
144 result.push_back('%'); 160 result.push_back('%');
145 } 161 }
146 } else { 162 } else {
147 // Invalid escape sequence, just pass the percent through and continue 163 // Invalid escape sequence, just pass the percent through and continue
148 // right after it. 164 // right after it.
149 result.push_back('%'); 165 result.push_back('%');
150 } 166 }
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
224 if (!base::WideToCodepage(text, codepage, 240 if (!base::WideToCodepage(text, codepage,
225 base::OnStringConversionError::SKIP, &encoded)) 241 base::OnStringConversionError::SKIP, &encoded))
226 return false; 242 return false;
227 243
228 // It's safe to use UTF8ToWide here because Escape should only return 244 // It's safe to use UTF8ToWide here because Escape should only return
229 // alphanumerics and !'()*-._~ 245 // alphanumerics and !'()*-._~
230 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); 246 escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true)));
231 return true; 247 return true;
232 } 248 }
233 249
234 std::wstring UnescapeAndDecodeURLComponent(const std::string& text, 250 std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
235 const char* codepage, 251 UnescapeRule::Type rules,
236 UnescapeRule::Type rules) { 252 size_t* offset_for_adjustment) {
237 std::wstring result; 253 std::wstring result;
238 if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage, 254 size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
239 base::OnStringConversionError::FAIL, &result)) 255 if (base::CodepageToWideAndAdjustOffset(
256 UnescapeURLImpl(text, rules, offset_for_adjustment),
257 "UTF-8", base::OnStringConversionError::FAIL, &result,
258 offset_for_adjustment))
240 return result; // Character set looks like it's valid. 259 return result; // Character set looks like it's valid.
241 return UTF8ToWide(text); // Return the escaped version when it's not. 260
261 // Not valid. Return the escaped version. Undo our changes to
262 // |offset_for_adjustment| since we haven't changed the string after all.
263 if (offset_for_adjustment)
264 *offset_for_adjustment = original_offset;
265 return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment);
242 } 266 }
243 267
244 std::string UnescapeURLComponent(const std::string& escaped_text, 268 std::string UnescapeURLComponent(const std::string& escaped_text,
245 UnescapeRule::Type rules) { 269 UnescapeRule::Type rules) {
246 return UnescapeURLImpl(escaped_text, rules); 270 return UnescapeURLImpl(escaped_text, rules, NULL);
247 } 271 }
248 272
249 template <class str> 273 template <class str>
250 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { 274 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
251 static const struct { 275 static const struct {
252 char key; 276 char key;
253 const char *replacement; 277 const char *replacement;
254 } kCharsToEscape[] = { 278 } kCharsToEscape[] = {
255 { '<', "&lt;" }, 279 { '<', "&lt;" },
256 { '>', "&gt;" }, 280 { '>', "&gt;" },
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
290 return result; 314 return result;
291 } 315 }
292 316
293 std::string EscapeForHTML(const std::string& input) { 317 std::string EscapeForHTML(const std::string& input) {
294 return EscapeForHTMLImpl(input); 318 return EscapeForHTMLImpl(input);
295 } 319 }
296 320
297 std::wstring EscapeForHTML(const std::wstring& input) { 321 std::wstring EscapeForHTML(const std::wstring& input) {
298 return EscapeForHTMLImpl(input); 322 return EscapeForHTMLImpl(input);
299 } 323 }
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698