Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: net/base/escape.cc

Issue 548088: Adding some more escaping method.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <algorithm> 5 #include <algorithm>
6 6
7 #include "net/base/escape.h" 7 #include "net/base/escape.h"
8 8
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 // @ A B C D E F G H I J K L M N O 101 // @ A B C D E F G H I J K L M N O
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 // P Q R S T U V W X Y Z [ \ ] ^ _ 103 // P Q R S T U V W X Y Z [ \ ] ^ _
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 // ` a b c d e f g h i j k l m n o 105 // ` a b c d e f g h i j k l m n o
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 // p q r s t u v w x y z { | } ~ <NBSP> 107 // p q r s t u v w x y z { | } ~ <NBSP>
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
109 }; 109 };
110 110
111 std::string UnescapeURLImpl(const std::string& escaped_text, 111 template<typename STR>
112 UnescapeRule::Type rules, 112 STR UnescapeURLImpl(const STR& escaped_text,
113 size_t* offset_for_adjustment) { 113 UnescapeRule::Type rules,
114 size_t* offset_for_adjustment) {
114 size_t offset_temp = string16::npos; 115 size_t offset_temp = string16::npos;
115 if (!offset_for_adjustment) 116 if (!offset_for_adjustment)
116 offset_for_adjustment = &offset_temp; 117 offset_for_adjustment = &offset_temp;
117 else if (*offset_for_adjustment >= escaped_text.length()) 118 else if (*offset_for_adjustment >= escaped_text.length())
118 *offset_for_adjustment = string16::npos; 119 *offset_for_adjustment = string16::npos;
119 120
120 // Do not unescape anything, return the |escaped_text| text. 121 // Do not unescape anything, return the |escaped_text| text.
121 if (rules == UnescapeRule::NONE) 122 if (rules == UnescapeRule::NONE)
122 return escaped_text; 123 return escaped_text;
123 124
124 // The output of the unescaping is always smaller than the input, so we can 125 // The output of the unescaping is always smaller than the input, so we can
125 // reserve the input size to make sure we have enough buffer and don't have 126 // reserve the input size to make sure we have enough buffer and don't have
126 // to allocate in the loop below. 127 // to allocate in the loop below.
127 std::string result; 128 STR result;
128 result.reserve(escaped_text.length()); 129 result.reserve(escaped_text.length());
129 130
130 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 131 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
131 if (escaped_text[i] == '%' && i + 2 < max) { 132 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
132 const std::string::value_type most_sig_digit(escaped_text[i + 1]); 133 // Non ASCII character, append as is.
133 const std::string::value_type least_sig_digit(escaped_text[i + 2]); 134 result.push_back(escaped_text[i]);
135 continue;
136 }
137
138 char current_char = static_cast<char>(escaped_text[i]);
139 if (current_char == '%' && i + 2 < max) {
140 const typename STR::value_type most_sig_digit(
141 static_cast<typename STR::value_type>(escaped_text[i + 1]));
142 const typename STR::value_type least_sig_digit(
143 static_cast<typename STR::value_type>(escaped_text[i + 2]));
134 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { 144 if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {
135 unsigned char value = HexToInt(most_sig_digit) * 16 + 145 unsigned char value = HexToInt(most_sig_digit) * 16 +
136 HexToInt(least_sig_digit); 146 HexToInt(least_sig_digit);
137 if (value >= 0x80 || // Unescape all high-bit characters. 147 if (value >= 0x80 || // Unescape all high-bit characters.
138 // For 7-bit characters, the lookup table tells us all valid chars. 148 // For 7-bit characters, the lookup table tells us all valid chars.
139 (kUrlUnescape[value] || 149 (kUrlUnescape[value] ||
140 // ...and we allow some additional unescaping when flags are set. 150 // ...and we allow some additional unescaping when flags are set.
141 (value == ' ' && (rules & UnescapeRule::SPACES)) || 151 (value == ' ' && (rules & UnescapeRule::SPACES)) ||
142 // Allow any of the prohibited but non-control characters when 152 // Allow any of the prohibited but non-control characters when
143 // we're doing "special" chars. 153 // we're doing "special" chars.
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 *offset_for_adjustment = original_offset; 278 *offset_for_adjustment = original_offset;
269 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text, 279 return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,
270 offset_for_adjustment)); 280 offset_for_adjustment));
271 } 281 }
272 282
273 std::string UnescapeURLComponent(const std::string& escaped_text, 283 std::string UnescapeURLComponent(const std::string& escaped_text,
274 UnescapeRule::Type rules) { 284 UnescapeRule::Type rules) {
275 return UnescapeURLImpl(escaped_text, rules, NULL); 285 return UnescapeURLImpl(escaped_text, rules, NULL);
276 } 286 }
277 287
288 string16 UnescapeURLComponent(const string16& escaped_text,
289 UnescapeRule::Type rules) {
290 return UnescapeURLImpl(escaped_text, rules, NULL);
291 }
292
293
278 template <class str> 294 template <class str>
279 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { 295 void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
280 static const struct { 296 static const struct {
281 char key; 297 char key;
282 const char *replacement; 298 const char* replacement;
283 } kCharsToEscape[] = { 299 } kCharsToEscape[] = {
284 { '<', "&lt;" }, 300 { '<', "&lt;" },
285 { '>', "&gt;" }, 301 { '>', "&gt;" },
286 { '&', "&amp;" }, 302 { '&', "&amp;" },
287 { '"', "&quot;" }, 303 { '"', "&quot;" },
288 { '\'', "&#39;" }, 304 { '\'', "&#39;" },
289 }; 305 };
290 size_t k; 306 size_t k;
291 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { 307 for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) {
292 if (c == kCharsToEscape[k].key) { 308 if (c == kCharsToEscape[k].key) {
(...skipping 26 matching lines...) Expand all
319 return result; 335 return result;
320 } 336 }
321 337
322 std::string EscapeForHTML(const std::string& input) { 338 std::string EscapeForHTML(const std::string& input) {
323 return EscapeForHTMLImpl(input); 339 return EscapeForHTMLImpl(input);
324 } 340 }
325 341
326 string16 EscapeForHTML(const string16& input) { 342 string16 EscapeForHTML(const string16& input) {
327 return EscapeForHTMLImpl(input); 343 return EscapeForHTMLImpl(input);
328 } 344 }
345
346 string16 UnescapeForHTML(const string16& input) {
347 static const struct {
348 const wchar_t* ampersand_code;
349 const char replacement;
350 } kEscapeToChars[] = {
351 { L"&lt;", '<' },
352 { L"&gt;", '>' },
353 { L"&amp;", '&' },
354 { L"&quot;", '"' },
355 { L"&#39;", '\''},
356 };
357
358 if (input.find(WideToUTF16(L"&")) == std::string::npos)
359 return input;
360
361 string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)];
362 string16 text(input);
363 for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) {
364 if (*iter == '&') {
365 // Potential ampersand encode char.
366 size_t index = iter - text.begin();
367 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) {
368 if (ampersand_chars[i].empty())
369 ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code);
370 if (text.find(ampersand_chars[i], index) == index) {
371 text.replace(iter, iter + ampersand_chars[i].length(),
372 1, kEscapeToChars[i].replacement);
373 break;
374 }
375 }
376 }
377 }
378 return text;
379 }
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698