Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <utility> |
| 8 #include <memory> | |
| 9 | 8 |
| 10 #include "base/logging.h" | 9 #include "base/logging.h" |
| 11 #include "base/strings/string_piece.h" | |
| 12 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
| 13 #include "base/strings/utf_offset_string_conversions.h" | |
|
mmenke
2017/01/05 16:19:54
This is needed for UTF8ToUTF16WithAdjustments, no?
mmenke
2017/01/05 16:21:05
Oops, ignore this. I just assumed this was a meth
| |
| 14 #include "base/strings/utf_string_conversions.h" | 11 #include "base/strings/utf_string_conversions.h" |
| 15 | 12 |
| 16 namespace net { | 13 namespace net { |
| 17 | 14 |
| 18 namespace { | 15 namespace { |
| 19 | 16 |
| 17 template <typename STR> | |
| 18 using StringTypeForStringPiece = decltype(std::declval<STR>().as_string()); | |
|
mmenke
2017/01/05 16:19:54
I think this is sufficiently obscure that it needs
Sam McNally
2017/01/06 00:20:51
I found an alternative that avoids this.
| |
| 19 | |
| 20 const char kHexString[] = "0123456789ABCDEF"; | 20 const char kHexString[] = "0123456789ABCDEF"; |
| 21 inline char IntToHex(int i) { | 21 inline char IntToHex(int i) { |
| 22 DCHECK_GE(i, 0) << i << " not a hex value"; | 22 DCHECK_GE(i, 0) << i << " not a hex value"; |
| 23 DCHECK_LE(i, 15) << i << " not a hex value"; | 23 DCHECK_LE(i, 15) << i << " not a hex value"; |
| 24 return kHexString[i]; | 24 return kHexString[i]; |
| 25 } | 25 } |
| 26 | 26 |
| 27 // A fast bit-vector map for ascii characters. | 27 // A fast bit-vector map for ascii characters. |
| 28 // | 28 // |
| 29 // Internally stores 256 bits in an array of 8 ints. | 29 // Internally stores 256 bits in an array of 8 ints. |
| 30 // Does quick bit-flicking to lookup needed characters. | 30 // Does quick bit-flicking to lookup needed characters. |
| 31 struct Charmap { | 31 struct Charmap { |
| 32 bool Contains(unsigned char c) const { | 32 bool Contains(unsigned char c) const { |
| 33 return ((map[c >> 5] & (1 << (c & 31))) != 0); | 33 return ((map[c >> 5] & (1 << (c & 31))) != 0); |
| 34 } | 34 } |
| 35 | 35 |
| 36 uint32_t map[8]; | 36 uint32_t map[8]; |
| 37 }; | 37 }; |
| 38 | 38 |
| 39 // Given text to escape and a Charmap defining which values to escape, | 39 // Given text to escape and a Charmap defining which values to escape, |
| 40 // return an escaped string. If use_plus is true, spaces are converted | 40 // return an escaped string. If use_plus is true, spaces are converted |
| 41 // to +, otherwise, if spaces are in the charmap, they are converted to | 41 // to +, otherwise, if spaces are in the charmap, they are converted to |
| 42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if | 42 // %20. And if keep_escaped is true, %XX will be kept as it is, otherwise, if |
| 43 // '%' is in the charmap, it is converted to %25. | 43 // '%' is in the charmap, it is converted to %25. |
| 44 std::string Escape(const std::string& text, | 44 std::string Escape(base::StringPiece text, |
| 45 const Charmap& charmap, | 45 const Charmap& charmap, |
| 46 bool use_plus, | 46 bool use_plus, |
| 47 bool keep_escaped = false) { | 47 bool keep_escaped = false) { |
| 48 std::string escaped; | 48 std::string escaped; |
| 49 escaped.reserve(text.length() * 3); | 49 escaped.reserve(text.length() * 3); |
| 50 for (unsigned int i = 0; i < text.length(); ++i) { | 50 for (unsigned int i = 0; i < text.length(); ++i) { |
| 51 unsigned char c = static_cast<unsigned char>(text[i]); | 51 unsigned char c = static_cast<unsigned char>(text[i]); |
| 52 if (use_plus && ' ' == c) { | 52 if (use_plus && ' ' == c) { |
| 53 escaped.push_back('+'); | 53 escaped.push_back('+'); |
| 54 } else if (keep_escaped && '%' == c && i + 2 < text.length() && | 54 } else if (keep_escaped && '%' == c && i + 2 < text.length() && |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, | 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
| 100 // ` a b c d e f g h i j k l m n o | 100 // ` a b c d e f g h i j k l m n o |
| 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 102 // p q r s t u v w x y z { | } ~ <NBSP> | 102 // p q r s t u v w x y z { | } ~ <NBSP> |
| 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 | 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
| 104 }; | 104 }; |
| 105 | 105 |
| 106 // Attempts to unescape the sequence at |index| within |escaped_text|. If | 106 // Attempts to unescape the sequence at |index| within |escaped_text|. If |
| 107 // successful, sets |value| to the unescaped value. Returns whether | 107 // successful, sets |value| to the unescaped value. Returns whether |
| 108 // unescaping succeeded. | 108 // unescaping succeeded. |
| 109 template<typename STR> | 109 template <typename STR> |
| 110 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, | 110 bool UnescapeUnsignedCharAtIndex(STR escaped_text, |
| 111 size_t index, | 111 size_t index, |
| 112 unsigned char* value) { | 112 unsigned char* value) { |
| 113 if ((index + 2) >= escaped_text.size()) | 113 if ((index + 2) >= escaped_text.size()) |
| 114 return false; | 114 return false; |
| 115 if (escaped_text[index] != '%') | 115 if (escaped_text[index] != '%') |
| 116 return false; | 116 return false; |
| 117 const typename STR::value_type most_sig_digit( | 117 const typename STR::value_type most_sig_digit( |
| 118 static_cast<typename STR::value_type>(escaped_text[index + 1])); | 118 static_cast<typename STR::value_type>(escaped_text[index + 1])); |
| 119 const typename STR::value_type least_sig_digit( | 119 const typename STR::value_type least_sig_digit( |
| 120 static_cast<typename STR::value_type>(escaped_text[index + 2])); | 120 static_cast<typename STR::value_type>(escaped_text[index + 2])); |
| 121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { | 121 if (base::IsHexDigit(most_sig_digit) && base::IsHexDigit(least_sig_digit)) { |
| 122 *value = base::HexDigitToInt(most_sig_digit) * 16 + | 122 *value = base::HexDigitToInt(most_sig_digit) * 16 + |
| 123 base::HexDigitToInt(least_sig_digit); | 123 base::HexDigitToInt(least_sig_digit); |
| 124 return true; | 124 return true; |
| 125 } | 125 } |
| 126 return false; | 126 return false; |
| 127 } | 127 } |
| 128 | 128 |
| 129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| | 129 // Returns true if there is an Arabic Language Mark at |index|. |first_byte| |
| 130 // is the byte at |index|. | 130 // is the byte at |index|. |
| 131 template<typename STR> | 131 template <typename STR> |
| 132 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, | 132 bool HasArabicLanguageMarkAtIndex(STR escaped_text, |
| 133 unsigned char first_byte, | 133 unsigned char first_byte, |
| 134 size_t index) { | 134 size_t index) { |
| 135 if (first_byte != 0xD8) | 135 if (first_byte != 0xD8) |
| 136 return false; | 136 return false; |
| 137 unsigned char second_byte; | 137 unsigned char second_byte; |
| 138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) | 138 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
| 139 return false; | 139 return false; |
| 140 return second_byte == 0x9c; | 140 return second_byte == 0x9c; |
| 141 } | 141 } |
| 142 | 142 |
| 143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the | 143 // Returns true if there is a BiDi control char at |index|. |first_byte| is the |
| 144 // byte at |index|. | 144 // byte at |index|. |
| 145 template<typename STR> | 145 template <typename STR> |
| 146 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, | 146 bool HasThreeByteBidiControlCharAtIndex(STR escaped_text, |
| 147 unsigned char first_byte, | 147 unsigned char first_byte, |
| 148 size_t index) { | 148 size_t index) { |
| 149 if (first_byte != 0xE2) | 149 if (first_byte != 0xE2) |
| 150 return false; | 150 return false; |
| 151 unsigned char second_byte; | 151 unsigned char second_byte; |
| 152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) | 152 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
| 153 return false; | 153 return false; |
| 154 if (second_byte != 0x80 && second_byte != 0x81) | 154 if (second_byte != 0x80 && second_byte != 0x81) |
| 155 return false; | 155 return false; |
| 156 unsigned char third_byte; | 156 unsigned char third_byte; |
| 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) | 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
| 158 return false; | 158 return false; |
| 159 if (second_byte == 0x80) { | 159 if (second_byte == 0x80) { |
| 160 return third_byte == 0x8E || | 160 return third_byte == 0x8E || |
| 161 third_byte == 0x8F || | 161 third_byte == 0x8F || |
| 162 (third_byte >= 0xAA && third_byte <= 0xAE); | 162 (third_byte >= 0xAA && third_byte <= 0xAE); |
| 163 } | 163 } |
| 164 return third_byte >= 0xA6 && third_byte <= 0xA9; | 164 return third_byte >= 0xA6 && third_byte <= 0xA9; |
| 165 } | 165 } |
| 166 | 166 |
| 167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is | 167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is |
| 168 // the byte at |index|. | 168 // the byte at |index|. |
| 169 template <typename STR> | 169 template <typename STR> |
| 170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text, | 170 bool HasFourByteBannedCharAtIndex(STR escaped_text, |
| 171 unsigned char first_byte, | 171 unsigned char first_byte, |
| 172 size_t index) { | 172 size_t index) { |
| 173 // The following characters are blacklisted for spoofability concerns. | 173 // The following characters are blacklisted for spoofability concerns. |
| 174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) | 174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
| 175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) | 175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
| 176 // U+1F512 LOCK (%F0%9F%94%92) | 176 // U+1F512 LOCK (%F0%9F%94%92) |
| 177 // U+1F513 OPEN LOCK (%F0%9F%94%93) | 177 // U+1F513 OPEN LOCK (%F0%9F%94%93) |
| 178 if (first_byte != 0xF0) | 178 if (first_byte != 0xF0) |
| 179 return false; | 179 return false; |
| 180 | 180 |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && | 194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && |
| 195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || | 195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || |
| 196 fourth_byte == 0x93); | 196 fourth_byte == 0x93); |
| 197 } | 197 } |
| 198 | 198 |
| 199 // Unescapes |escaped_text| according to |rules|, returning the resulting | 199 // Unescapes |escaped_text| according to |rules|, returning the resulting |
| 200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects | 200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
| 201 // the alterations done to the string that are not one-character-to-one- | 201 // the alterations done to the string that are not one-character-to-one- |
| 202 // character. The resulting |adjustments| will always be sorted by increasing | 202 // character. The resulting |adjustments| will always be sorted by increasing |
| 203 // offset. | 203 // offset. |
| 204 template<typename STR> | 204 template <typename STR> |
| 205 STR UnescapeURLWithAdjustmentsImpl( | 205 StringTypeForStringPiece<STR> UnescapeURLWithAdjustmentsImpl( |
| 206 const STR& escaped_text, | 206 STR escaped_text, |
| 207 UnescapeRule::Type rules, | 207 UnescapeRule::Type rules, |
| 208 base::OffsetAdjuster::Adjustments* adjustments) { | 208 base::OffsetAdjuster::Adjustments* adjustments) { |
| 209 if (adjustments) | 209 if (adjustments) |
| 210 adjustments->clear(); | 210 adjustments->clear(); |
| 211 // Do not unescape anything, return the |escaped_text| text. | 211 // Do not unescape anything, return the |escaped_text| text. |
| 212 if (rules == UnescapeRule::NONE) | 212 if (rules == UnescapeRule::NONE) |
| 213 return escaped_text; | 213 return escaped_text.as_string(); |
| 214 | 214 |
| 215 // The output of the unescaping is always smaller than the input, so we can | 215 // The output of the unescaping is always smaller than the input, so we can |
| 216 // reserve the input size to make sure we have enough buffer and don't have | 216 // reserve the input size to make sure we have enough buffer and don't have |
| 217 // to allocate in the loop below. | 217 // to allocate in the loop below. |
| 218 STR result; | 218 StringTypeForStringPiece<STR> result; |
| 219 result.reserve(escaped_text.length()); | 219 result.reserve(escaped_text.length()); |
| 220 | 220 |
| 221 // Locations of adjusted text. | 221 // Locations of adjusted text. |
| 222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 222 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
| 223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { | 223 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { |
| 224 // Non ASCII character, append as is. | 224 // Non ASCII character, append as is. |
| 225 result.push_back(escaped_text[i]); | 225 result.push_back(escaped_text[i]); |
| 226 continue; | 226 continue; |
| 227 } | 227 } |
| 228 | 228 |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 258 // U+1F513 OPEN LOCK (%F0%9F%94%93) | 258 // U+1F513 OPEN LOCK (%F0%9F%94%93) |
| 259 // | 259 // |
| 260 // However, some schemes such as data: and file: need to parse the exact | 260 // However, some schemes such as data: and file: need to parse the exact |
| 261 // binary data when loading the URL. For that reason, | 261 // binary data when loading the URL. For that reason, |
| 262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. | 262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. |
| 263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be | 263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be |
| 264 // displayed in the UI. | 264 // displayed in the UI. |
| 265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { | 265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { |
| 266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { | 266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
| 267 // Keep Arabic Language Mark escaped. | 267 // Keep Arabic Language Mark escaped. |
| 268 result.append(escaped_text, i, 6); | 268 escaped_text.substr(i, 6).AppendToString(&result); |
| 269 i += 5; | 269 i += 5; |
| 270 continue; | 270 continue; |
| 271 } | 271 } |
| 272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { | 272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { |
| 273 // Keep BiDi control char escaped. | 273 // Keep BiDi control char escaped. |
| 274 result.append(escaped_text, i, 9); | 274 escaped_text.substr(i, 9).AppendToString(&result); |
| 275 i += 8; | 275 i += 8; |
| 276 continue; | 276 continue; |
| 277 } | 277 } |
| 278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { | 278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { |
| 279 // Keep banned char escaped. | 279 // Keep banned char escaped. |
| 280 result.append(escaped_text, i, 12); | 280 escaped_text.substr(i, 12).AppendToString(&result); |
| 281 i += 11; | 281 i += 11; |
| 282 continue; | 282 continue; |
| 283 } | 283 } |
| 284 } | 284 } |
| 285 | 285 |
| 286 if (first_byte >= 0x80 || // Unescape all high-bit characters. | 286 if (first_byte >= 0x80 || // Unescape all high-bit characters. |
| 287 // For 7-bit characters, the lookup table tells us all valid chars. | 287 // For 7-bit characters, the lookup table tells us all valid chars. |
| 288 (kUrlUnescape[first_byte] || | 288 (kUrlUnescape[first_byte] || |
| 289 // ...and we allow some additional unescaping when flags are set. | 289 // ...and we allow some additional unescaping when flags are set. |
| 290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || | 290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 338 while (*p) | 338 while (*p) |
| 339 output->push_back(*p++); | 339 output->push_back(*p++); |
| 340 break; | 340 break; |
| 341 } | 341 } |
| 342 } | 342 } |
| 343 if (k == arraysize(kCharsToEscape)) | 343 if (k == arraysize(kCharsToEscape)) |
| 344 output->push_back(c); | 344 output->push_back(c); |
| 345 } | 345 } |
| 346 | 346 |
| 347 template <class str> | 347 template <class str> |
| 348 str EscapeForHTMLImpl(const str& input) { | 348 StringTypeForStringPiece<str> EscapeForHTMLImpl(str input) { |
| 349 str result; | 349 StringTypeForStringPiece<str> result; |
| 350 result.reserve(input.size()); // Optimize for no escaping. | 350 result.reserve(input.size()); // Optimize for no escaping. |
| 351 | 351 |
| 352 for (typename str::const_iterator i = input.begin(); i != input.end(); ++i) | 352 for (auto c : input) { |
| 353 AppendEscapedCharForHTMLImpl(*i, &result); | 353 AppendEscapedCharForHTMLImpl(c, &result); |
| 354 } | |
| 354 | 355 |
| 355 return result; | 356 return result; |
| 356 } | 357 } |
| 357 | 358 |
| 358 // Everything except alphanumerics and !'()*-._~ | 359 // Everything except alphanumerics and !'()*-._~ |
| 359 // See RFC 2396 for the list of reserved characters. | 360 // See RFC 2396 for the list of reserved characters. |
| 360 static const Charmap kQueryCharmap = {{ | 361 static const Charmap kQueryCharmap = {{ |
| 361 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, | 362 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, |
| 362 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL | 363 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL |
| 363 }}; | 364 }}; |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 390 | 391 |
| 391 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and | 392 // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and |
| 392 // !'()*-._~#[] | 393 // !'()*-._~#[] |
| 393 static const Charmap kExternalHandlerCharmap = {{ | 394 static const Charmap kExternalHandlerCharmap = {{ |
| 394 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, | 395 0xffffffffL, 0x50000025L, 0x50000000L, 0xb8000001L, |
| 395 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL | 396 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL |
| 396 }}; | 397 }}; |
| 397 | 398 |
| 398 } // namespace | 399 } // namespace |
| 399 | 400 |
| 400 std::string EscapeQueryParamValue(const std::string& text, bool use_plus) { | 401 std::string EscapeQueryParamValue(base::StringPiece text, bool use_plus) { |
| 401 return Escape(text, kQueryCharmap, use_plus); | 402 return Escape(text, kQueryCharmap, use_plus); |
| 402 } | 403 } |
| 403 | 404 |
| 404 std::string EscapePath(const std::string& path) { | 405 std::string EscapePath(base::StringPiece path) { |
| 405 return Escape(path, kPathCharmap, false); | 406 return Escape(path, kPathCharmap, false); |
| 406 } | 407 } |
| 407 | 408 |
| 408 #if defined(OS_MACOSX) | 409 #if defined(OS_MACOSX) |
| 409 std::string EscapeNSURLPrecursor(const std::string& precursor) { | 410 std::string EscapeNSURLPrecursor(base::StringPiece precursor) { |
| 410 return Escape(precursor, kNSURLCharmap, false, true); | 411 return Escape(precursor, kNSURLCharmap, false, true); |
| 411 } | 412 } |
| 412 #endif // defined(OS_MACOSX) | 413 #endif // defined(OS_MACOSX) |
| 413 | 414 |
| 414 std::string EscapeUrlEncodedData(const std::string& path, bool use_plus) { | 415 std::string EscapeUrlEncodedData(base::StringPiece path, bool use_plus) { |
| 415 return Escape(path, kUrlEscape, use_plus); | 416 return Escape(path, kUrlEscape, use_plus); |
| 416 } | 417 } |
| 417 | 418 |
| 418 std::string EscapeNonASCII(const std::string& input) { | 419 std::string EscapeNonASCII(base::StringPiece input) { |
| 419 return Escape(input, kNonASCIICharmap, false); | 420 return Escape(input, kNonASCIICharmap, false); |
| 420 } | 421 } |
| 421 | 422 |
| 422 std::string EscapeExternalHandlerValue(const std::string& text) { | 423 std::string EscapeExternalHandlerValue(base::StringPiece text) { |
| 423 return Escape(text, kExternalHandlerCharmap, false, true); | 424 return Escape(text, kExternalHandlerCharmap, false, true); |
| 424 } | 425 } |
| 425 | 426 |
| 426 void AppendEscapedCharForHTML(char c, std::string* output) { | 427 void AppendEscapedCharForHTML(char c, std::string* output) { |
| 427 AppendEscapedCharForHTMLImpl(c, output); | 428 AppendEscapedCharForHTMLImpl(c, output); |
| 428 } | 429 } |
| 429 | 430 |
| 430 std::string EscapeForHTML(const std::string& input) { | 431 std::string EscapeForHTML(base::StringPiece input) { |
| 431 return EscapeForHTMLImpl(input); | 432 return EscapeForHTMLImpl(input); |
| 432 } | 433 } |
| 433 | 434 |
| 434 base::string16 EscapeForHTML(const base::string16& input) { | 435 base::string16 EscapeForHTML(base::StringPiece16 input) { |
| 435 return EscapeForHTMLImpl(input); | 436 return EscapeForHTMLImpl(input); |
| 436 } | 437 } |
| 437 | 438 |
| 438 std::string UnescapeURLComponent(const std::string& escaped_text, | 439 std::string UnescapeURLComponent(base::StringPiece escaped_text, |
| 439 UnescapeRule::Type rules) { | 440 UnescapeRule::Type rules) { |
| 440 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); | 441 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); |
| 441 } | 442 } |
| 442 | 443 |
| 443 base::string16 UnescapeURLComponent(const base::string16& escaped_text, | 444 base::string16 UnescapeURLComponent(base::StringPiece16 escaped_text, |
| 444 UnescapeRule::Type rules) { | 445 UnescapeRule::Type rules) { |
| 445 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); | 446 return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, NULL); |
| 446 } | 447 } |
| 447 | 448 |
| 448 base::string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, | 449 base::string16 UnescapeAndDecodeUTF8URLComponent(base::StringPiece text, |
| 449 UnescapeRule::Type rules) { | 450 UnescapeRule::Type rules) { |
| 450 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); | 451 return UnescapeAndDecodeUTF8URLComponentWithAdjustments(text, rules, NULL); |
| 451 } | 452 } |
| 452 | 453 |
| 453 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( | 454 base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
| 454 const std::string& text, | 455 base::StringPiece text, |
| 455 UnescapeRule::Type rules, | 456 UnescapeRule::Type rules, |
| 456 base::OffsetAdjuster::Adjustments* adjustments) { | 457 base::OffsetAdjuster::Adjustments* adjustments) { |
| 457 base::string16 result; | 458 base::string16 result; |
| 458 base::OffsetAdjuster::Adjustments unescape_adjustments; | 459 base::OffsetAdjuster::Adjustments unescape_adjustments; |
| 459 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( | 460 std::string unescaped_url(UnescapeURLWithAdjustmentsImpl( |
| 460 text, rules, &unescape_adjustments)); | 461 text, rules, &unescape_adjustments)); |
| 461 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), | 462 if (base::UTF8ToUTF16WithAdjustments(unescaped_url.data(), |
| 462 unescaped_url.length(), | 463 unescaped_url.length(), |
| 463 &result, adjustments)) { | 464 &result, adjustments)) { |
| 464 // Character set looks like it's valid. | 465 // Character set looks like it's valid. |
| 465 if (adjustments) { | 466 if (adjustments) { |
| 466 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, | 467 base::OffsetAdjuster::MergeSequentialAdjustments(unescape_adjustments, |
| 467 adjustments); | 468 adjustments); |
| 468 } | 469 } |
| 469 return result; | 470 return result; |
| 470 } | 471 } |
| 471 // Character set is not valid. Return the escaped version. | 472 // Character set is not valid. Return the escaped version. |
| 472 return base::UTF8ToUTF16WithAdjustments(text, adjustments); | 473 return base::UTF8ToUTF16WithAdjustments(text, adjustments); |
| 473 } | 474 } |
| 474 | 475 |
| 475 base::string16 UnescapeForHTML(const base::string16& input) { | 476 base::string16 UnescapeForHTML(base::StringPiece16 input) { |
| 476 static const struct { | 477 static const struct { |
| 477 const char* ampersand_code; | 478 const char* ampersand_code; |
| 478 const char replacement; | 479 const char replacement; |
| 479 } kEscapeToChars[] = { | 480 } kEscapeToChars[] = { |
| 480 { "<", '<' }, | 481 { "<", '<' }, |
| 481 { ">", '>' }, | 482 { ">", '>' }, |
| 482 { "&", '&' }, | 483 { "&", '&' }, |
| 483 { """, '"' }, | 484 { """, '"' }, |
| 484 { "'", '\''}, | 485 { "'", '\''}, |
| 485 }; | 486 }; |
| 486 | 487 |
| 487 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) | 488 if (input.find(base::ASCIIToUTF16("&")) == std::string::npos) |
| 488 return input; | 489 return input.as_string(); |
| 489 | 490 |
| 490 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; | 491 base::string16 ampersand_chars[arraysize(kEscapeToChars)]; |
| 491 base::string16 text(input); | 492 base::string16 text = input.as_string(); |
| 492 for (base::string16::iterator iter = text.begin(); | 493 for (base::string16::iterator iter = text.begin(); |
| 493 iter != text.end(); ++iter) { | 494 iter != text.end(); ++iter) { |
| 494 if (*iter == '&') { | 495 if (*iter == '&') { |
| 495 // Potential ampersand encode char. | 496 // Potential ampersand encode char. |
| 496 size_t index = iter - text.begin(); | 497 size_t index = iter - text.begin(); |
| 497 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { | 498 for (size_t i = 0; i < arraysize(kEscapeToChars); i++) { |
| 498 if (ampersand_chars[i].empty()) { | 499 if (ampersand_chars[i].empty()) { |
| 499 ampersand_chars[i] = | 500 ampersand_chars[i] = |
| 500 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); | 501 base::ASCIIToUTF16(kEscapeToChars[i].ampersand_code); |
| 501 } | 502 } |
| 502 if (text.find(ampersand_chars[i], index) == index) { | 503 if (text.find(ampersand_chars[i], index) == index) { |
| 503 text.replace(iter, iter + ampersand_chars[i].length(), | 504 text.replace(iter, iter + ampersand_chars[i].length(), |
| 504 1, kEscapeToChars[i].replacement); | 505 1, kEscapeToChars[i].replacement); |
| 505 break; | 506 break; |
| 506 } | 507 } |
| 507 } | 508 } |
| 508 } | 509 } |
| 509 } | 510 } |
| 510 return text; | 511 return text; |
| 511 } | 512 } |
| 512 | 513 |
| 513 } // namespace net | 514 } // namespace net |
| OLD | NEW |