Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
| 6 | 6 |
| 7 #include "base/base64.h" | 7 #include "base/base64.h" |
| 8 #include "base/i18n/icu_string_conversions.h" | |
| 9 #include "base/logging.h" | 8 #include "base/logging.h" |
| 10 #include "base/strings/string_tokenizer.h" | 9 #include "base/strings/string_tokenizer.h" |
| 11 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
| 12 #include "base/strings/sys_string_conversions.h" | 11 #include "base/strings/sys_string_conversions.h" |
| 13 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
| 13 #include "net/base/net_string_util.h" | |
| 14 #include "net/base/net_util.h" | 14 #include "net/base/net_util.h" |
| 15 #include "net/http/http_util.h" | 15 #include "net/http/http_util.h" |
| 16 #include "third_party/icu/source/common/unicode/ucnv.h" | |
| 17 | 16 |
| 18 namespace { | 17 namespace { |
| 19 | 18 |
| 20 enum RFC2047EncodingType { | 19 enum RFC2047EncodingType { |
| 21 Q_ENCODING, | 20 Q_ENCODING, |
| 22 B_ENCODING | 21 B_ENCODING |
| 23 }; | 22 }; |
| 24 | 23 |
| 25 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to | 24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
| 26 // decoding a quoted-printable string. Returns true if the input was valid. | 25 // decoding a quoted-printable string. Returns true if the input was valid. |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 58 } | 57 } |
| 59 | 58 |
| 60 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding | 59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
| 61 // type is specified in |enc_type|. | 60 // type is specified in |enc_type|. |
| 62 bool DecodeBQEncoding(const std::string& part, | 61 bool DecodeBQEncoding(const std::string& part, |
| 63 RFC2047EncodingType enc_type, | 62 RFC2047EncodingType enc_type, |
| 64 const std::string& charset, | 63 const std::string& charset, |
| 65 std::string* output) { | 64 std::string* output) { |
| 66 std::string decoded; | 65 std::string decoded; |
| 67 if (!((enc_type == B_ENCODING) ? | 66 if (!((enc_type == B_ENCODING) ? |
| 68 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) | 67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { |
| 69 return false; | 68 return false; |
| 70 | |
| 71 if (decoded.empty()) { | |
|
mef
2014/04/28 19:19:51
why is this not needed?
mmenke
2014/04/28 19:29:18
An empty string should just end up being converted
mmenke
2014/04/28 19:31:38
Oh...I guess there's the invalid charset case, wit
| |
| 72 output->clear(); | |
| 73 return true; | |
| 74 } | 69 } |
| 75 | 70 |
| 76 UErrorCode err = U_ZERO_ERROR; | 71 return net::ConvertToUtf8(decoded, charset.c_str(), output); |
| 77 UConverter* converter(ucnv_open(charset.c_str(), &err)); | |
| 78 if (U_FAILURE(err)) | |
| 79 return false; | |
| 80 | |
| 81 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. | |
| 82 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes | |
| 83 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a | |
| 84 // trailing '\0'. | |
| 85 size_t output_length = decoded.length() * 3 + 1; | |
| 86 char* buf = WriteInto(output, output_length); | |
| 87 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, | |
| 88 decoded.data(), decoded.length(), &err); | |
| 89 ucnv_close(converter); | |
| 90 if (U_FAILURE(err)) | |
| 91 return false; | |
| 92 output->resize(output_length); | |
| 93 return true; | |
| 94 } | 72 } |
| 95 | 73 |
| 96 bool DecodeWord(const std::string& encoded_word, | 74 bool DecodeWord(const std::string& encoded_word, |
| 97 const std::string& referrer_charset, | 75 const std::string& referrer_charset, |
| 98 bool* is_rfc2047, | 76 bool* is_rfc2047, |
| 99 std::string* output, | 77 std::string* output, |
| 100 int* parse_result_flags) { | 78 int* parse_result_flags) { |
| 101 *is_rfc2047 = false; | 79 *is_rfc2047 = false; |
| 102 output->clear(); | 80 output->clear(); |
| 103 if (encoded_word.empty()) | 81 if (encoded_word.empty()) |
| 104 return true; | 82 return true; |
| 105 | 83 |
| 106 if (!IsStringASCII(encoded_word)) { | 84 if (!IsStringASCII(encoded_word)) { |
| 107 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 85 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 108 if (IsStringUTF8(encoded_word)) { | 86 if (IsStringUTF8(encoded_word)) { |
| 109 *output = encoded_word; | 87 *output = encoded_word; |
| 110 } else { | 88 } else { |
| 111 base::string16 utf16_output; | 89 base::string16 utf16_output; |
| 112 if (!referrer_charset.empty() && | 90 if (!referrer_charset.empty() && |
| 113 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 91 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), |
| 114 base::OnStringConversionError::FAIL, | 92 &utf16_output)) { |
| 115 &utf16_output)) { | |
| 116 *output = base::UTF16ToUTF8(utf16_output); | 93 *output = base::UTF16ToUTF8(utf16_output); |
| 117 } else { | 94 } else { |
| 118 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 95 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 119 } | 96 } |
| 120 } | 97 } |
| 121 | 98 |
| 122 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; | 99 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
| 123 return true; | 100 return true; |
| 124 } | 101 } |
| 125 | 102 |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 336 | 313 |
| 337 // RFC 5987 value should be ASCII-only. | 314 // RFC 5987 value should be ASCII-only. |
| 338 if (!IsStringASCII(value)) { | 315 if (!IsStringASCII(value)) { |
| 339 decoded->clear(); | 316 decoded->clear(); |
| 340 return true; | 317 return true; |
| 341 } | 318 } |
| 342 | 319 |
| 343 std::string unescaped = net::UnescapeURLComponent( | 320 std::string unescaped = net::UnescapeURLComponent( |
| 344 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 321 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
| 345 | 322 |
| 346 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 323 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
| 347 } | 324 } |
| 348 | 325 |
| 349 } // namespace | 326 } // namespace |
| 350 | 327 |
| 351 namespace net { | 328 namespace net { |
| 352 | 329 |
| 353 HttpContentDisposition::HttpContentDisposition( | 330 HttpContentDisposition::HttpContentDisposition( |
| 354 const std::string& header, const std::string& referrer_charset) | 331 const std::string& header, const std::string& referrer_charset) |
| 355 : type_(INLINE), | 332 : type_(INLINE), |
| 356 parse_result_flags_(INVALID) { | 333 parse_result_flags_(INVALID) { |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 447 | 424 |
| 448 if (!ext_filename.empty()) | 425 if (!ext_filename.empty()) |
| 449 filename_ = ext_filename; | 426 filename_ = ext_filename; |
| 450 else if (!filename.empty()) | 427 else if (!filename.empty()) |
| 451 filename_ = filename; | 428 filename_ = filename; |
| 452 else | 429 else |
| 453 filename_ = name; | 430 filename_ = name; |
| 454 } | 431 } |
| 455 | 432 |
| 456 } // namespace net | 433 } // namespace net |
| OLD | NEW |