Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
| 6 | 6 |
| 7 #include "base/base64.h" | 7 #include "base/base64.h" |
| 8 #include "base/i18n/icu_string_conversions.h" | |
| 9 #include "base/logging.h" | 8 #include "base/logging.h" |
| 10 #include "base/strings/string_tokenizer.h" | 9 #include "base/strings/string_tokenizer.h" |
| 11 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
| 12 #include "base/strings/sys_string_conversions.h" | 11 #include "base/strings/sys_string_conversions.h" |
| 13 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
| 13 #include "net/base/net_string_util.h" | |
| 14 #include "net/base/net_util.h" | 14 #include "net/base/net_util.h" |
| 15 #include "net/http/http_util.h" | 15 #include "net/http/http_util.h" |
| 16 #include "third_party/icu/source/common/unicode/ucnv.h" | |
| 17 | 16 |
| 18 namespace { | 17 namespace { |
| 19 | 18 |
| 20 enum RFC2047EncodingType { | 19 enum RFC2047EncodingType { |
| 21 Q_ENCODING, | 20 Q_ENCODING, |
| 22 B_ENCODING | 21 B_ENCODING |
| 23 }; | 22 }; |
| 24 | 23 |
| 25 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to | 24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
| 26 // decoding a quoted-printable string. Returns true if the input was valid. | 25 // decoding a quoted-printable string. Returns true if the input was valid. |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 58 } | 57 } |
| 59 | 58 |
| 60 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding | 59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
| 61 // type is specified in |enc_type|. | 60 // type is specified in |enc_type|. |
| 62 bool DecodeBQEncoding(const std::string& part, | 61 bool DecodeBQEncoding(const std::string& part, |
| 63 RFC2047EncodingType enc_type, | 62 RFC2047EncodingType enc_type, |
| 64 const std::string& charset, | 63 const std::string& charset, |
| 65 std::string* output) { | 64 std::string* output) { |
| 66 std::string decoded; | 65 std::string decoded; |
| 67 if (!((enc_type == B_ENCODING) ? | 66 if (!((enc_type == B_ENCODING) ? |
| 68 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) | 67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { |
| 69 return false; | 68 return false; |
| 69 } | |
|
mef
2014/04/28 20:27:42
is it better with braces?
mmenke
2014/04/28 20:45:03
My understanding (From reading a long thread on ch
| |
| 70 | 70 |
| 71 if (decoded.empty()) { | 71 if (decoded.empty()) { |
| 72 output->clear(); | 72 output->clear(); |
| 73 return true; | 73 return true; |
| 74 } | 74 } |
| 75 | 75 |
| 76 UErrorCode err = U_ZERO_ERROR; | 76 return net::ConvertToUtf8(decoded, charset.c_str(), output); |
| 77 UConverter* converter(ucnv_open(charset.c_str(), &err)); | |
| 78 if (U_FAILURE(err)) | |
| 79 return false; | |
| 80 | |
| 81 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. | |
| 82 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes | |
| 83 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a | |
| 84 // trailing '\0'. | |
| 85 size_t output_length = decoded.length() * 3 + 1; | |
| 86 char* buf = WriteInto(output, output_length); | |
| 87 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, | |
| 88 decoded.data(), decoded.length(), &err); | |
| 89 ucnv_close(converter); | |
| 90 if (U_FAILURE(err)) | |
| 91 return false; | |
| 92 output->resize(output_length); | |
| 93 return true; | |
| 94 } | 77 } |
| 95 | 78 |
| 96 bool DecodeWord(const std::string& encoded_word, | 79 bool DecodeWord(const std::string& encoded_word, |
| 97 const std::string& referrer_charset, | 80 const std::string& referrer_charset, |
| 98 bool* is_rfc2047, | 81 bool* is_rfc2047, |
| 99 std::string* output, | 82 std::string* output, |
| 100 int* parse_result_flags) { | 83 int* parse_result_flags) { |
| 101 *is_rfc2047 = false; | 84 *is_rfc2047 = false; |
| 102 output->clear(); | 85 output->clear(); |
| 103 if (encoded_word.empty()) | 86 if (encoded_word.empty()) |
| 104 return true; | 87 return true; |
| 105 | 88 |
| 106 if (!IsStringASCII(encoded_word)) { | 89 if (!IsStringASCII(encoded_word)) { |
| 107 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 90 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 108 if (IsStringUTF8(encoded_word)) { | 91 if (IsStringUTF8(encoded_word)) { |
| 109 *output = encoded_word; | 92 *output = encoded_word; |
| 110 } else { | 93 } else { |
| 111 base::string16 utf16_output; | 94 base::string16 utf16_output; |
| 112 if (!referrer_charset.empty() && | 95 if (!referrer_charset.empty() && |
| 113 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 96 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), |
| 114 base::OnStringConversionError::FAIL, | 97 &utf16_output)) { |
| 115 &utf16_output)) { | |
| 116 *output = base::UTF16ToUTF8(utf16_output); | 98 *output = base::UTF16ToUTF8(utf16_output); |
| 117 } else { | 99 } else { |
| 118 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 100 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 119 } | 101 } |
| 120 } | 102 } |
| 121 | 103 |
| 122 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; | 104 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
| 123 return true; | 105 return true; |
| 124 } | 106 } |
| 125 | 107 |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 336 | 318 |
| 337 // RFC 5987 value should be ASCII-only. | 319 // RFC 5987 value should be ASCII-only. |
| 338 if (!IsStringASCII(value)) { | 320 if (!IsStringASCII(value)) { |
| 339 decoded->clear(); | 321 decoded->clear(); |
| 340 return true; | 322 return true; |
| 341 } | 323 } |
| 342 | 324 |
| 343 std::string unescaped = net::UnescapeURLComponent( | 325 std::string unescaped = net::UnescapeURLComponent( |
| 344 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 326 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
| 345 | 327 |
| 346 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
| 347 } | 329 } |
| 348 | 330 |
| 349 } // namespace | 331 } // namespace |
| 350 | 332 |
| 351 namespace net { | 333 namespace net { |
| 352 | 334 |
| 353 HttpContentDisposition::HttpContentDisposition( | 335 HttpContentDisposition::HttpContentDisposition( |
| 354 const std::string& header, const std::string& referrer_charset) | 336 const std::string& header, const std::string& referrer_charset) |
| 355 : type_(INLINE), | 337 : type_(INLINE), |
| 356 parse_result_flags_(INVALID) { | 338 parse_result_flags_(INVALID) { |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 447 | 429 |
| 448 if (!ext_filename.empty()) | 430 if (!ext_filename.empty()) |
| 449 filename_ = ext_filename; | 431 filename_ = ext_filename; |
| 450 else if (!filename.empty()) | 432 else if (!filename.empty()) |
| 451 filename_ = filename; | 433 filename_ = filename; |
| 452 else | 434 else |
| 453 filename_ = name; | 435 filename_ = name; |
| 454 } | 436 } |
| 455 | 437 |
| 456 } // namespace net | 438 } // namespace net |
| OLD | NEW |