OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
6 | 6 |
7 #include "base/base64.h" | 7 #include "base/base64.h" |
8 #include "base/i18n/icu_string_conversions.h" | |
9 #include "base/logging.h" | 8 #include "base/logging.h" |
10 #include "base/strings/string_tokenizer.h" | 9 #include "base/strings/string_tokenizer.h" |
11 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
12 #include "base/strings/sys_string_conversions.h" | 11 #include "base/strings/sys_string_conversions.h" |
13 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
13 #include "net/base/net_string_util.h" | |
14 #include "net/base/net_util.h" | 14 #include "net/base/net_util.h" |
15 #include "net/http/http_util.h" | 15 #include "net/http/http_util.h" |
16 #include "third_party/icu/source/common/unicode/ucnv.h" | |
17 | 16 |
18 namespace { | 17 namespace { |
19 | 18 |
20 enum RFC2047EncodingType { | 19 enum RFC2047EncodingType { |
21 Q_ENCODING, | 20 Q_ENCODING, |
22 B_ENCODING | 21 B_ENCODING |
23 }; | 22 }; |
24 | 23 |
25 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to | 24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
26 // decoding a quoted-printable string. Returns true if the input was valid. | 25 // decoding a quoted-printable string. Returns true if the input was valid. |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
58 } | 57 } |
59 | 58 |
60 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding | 59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
61 // type is specified in |enc_type|. | 60 // type is specified in |enc_type|. |
62 bool DecodeBQEncoding(const std::string& part, | 61 bool DecodeBQEncoding(const std::string& part, |
63 RFC2047EncodingType enc_type, | 62 RFC2047EncodingType enc_type, |
64 const std::string& charset, | 63 const std::string& charset, |
65 std::string* output) { | 64 std::string* output) { |
66 std::string decoded; | 65 std::string decoded; |
67 if (!((enc_type == B_ENCODING) ? | 66 if (!((enc_type == B_ENCODING) ? |
68 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) | 67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { |
69 return false; | 68 return false; |
70 | |
71 if (decoded.empty()) { | |
mef
2014/04/28 19:19:51
why is this not needed?
mmenke
2014/04/28 19:29:18
An empty string should just end up being converted
mmenke
2014/04/28 19:31:38
Oh...I guess there's the invalid charset case, wit
| |
72 output->clear(); | |
73 return true; | |
74 } | 69 } |
75 | 70 |
76 UErrorCode err = U_ZERO_ERROR; | 71 return net::ConvertToUtf8(decoded, charset.c_str(), output); |
77 UConverter* converter(ucnv_open(charset.c_str(), &err)); | |
78 if (U_FAILURE(err)) | |
79 return false; | |
80 | |
81 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. | |
82 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes | |
83 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a | |
84 // trailing '\0'. | |
85 size_t output_length = decoded.length() * 3 + 1; | |
86 char* buf = WriteInto(output, output_length); | |
87 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, | |
88 decoded.data(), decoded.length(), &err); | |
89 ucnv_close(converter); | |
90 if (U_FAILURE(err)) | |
91 return false; | |
92 output->resize(output_length); | |
93 return true; | |
94 } | 72 } |
95 | 73 |
96 bool DecodeWord(const std::string& encoded_word, | 74 bool DecodeWord(const std::string& encoded_word, |
97 const std::string& referrer_charset, | 75 const std::string& referrer_charset, |
98 bool* is_rfc2047, | 76 bool* is_rfc2047, |
99 std::string* output, | 77 std::string* output, |
100 int* parse_result_flags) { | 78 int* parse_result_flags) { |
101 *is_rfc2047 = false; | 79 *is_rfc2047 = false; |
102 output->clear(); | 80 output->clear(); |
103 if (encoded_word.empty()) | 81 if (encoded_word.empty()) |
104 return true; | 82 return true; |
105 | 83 |
106 if (!IsStringASCII(encoded_word)) { | 84 if (!IsStringASCII(encoded_word)) { |
107 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 85 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
108 if (IsStringUTF8(encoded_word)) { | 86 if (IsStringUTF8(encoded_word)) { |
109 *output = encoded_word; | 87 *output = encoded_word; |
110 } else { | 88 } else { |
111 base::string16 utf16_output; | 89 base::string16 utf16_output; |
112 if (!referrer_charset.empty() && | 90 if (!referrer_charset.empty() && |
113 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 91 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), |
114 base::OnStringConversionError::FAIL, | 92 &utf16_output)) { |
115 &utf16_output)) { | |
116 *output = base::UTF16ToUTF8(utf16_output); | 93 *output = base::UTF16ToUTF8(utf16_output); |
117 } else { | 94 } else { |
118 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 95 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
119 } | 96 } |
120 } | 97 } |
121 | 98 |
122 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; | 99 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
123 return true; | 100 return true; |
124 } | 101 } |
125 | 102 |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
336 | 313 |
337 // RFC 5987 value should be ASCII-only. | 314 // RFC 5987 value should be ASCII-only. |
338 if (!IsStringASCII(value)) { | 315 if (!IsStringASCII(value)) { |
339 decoded->clear(); | 316 decoded->clear(); |
340 return true; | 317 return true; |
341 } | 318 } |
342 | 319 |
343 std::string unescaped = net::UnescapeURLComponent( | 320 std::string unescaped = net::UnescapeURLComponent( |
344 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 321 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
345 | 322 |
346 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 323 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
347 } | 324 } |
348 | 325 |
349 } // namespace | 326 } // namespace |
350 | 327 |
351 namespace net { | 328 namespace net { |
352 | 329 |
353 HttpContentDisposition::HttpContentDisposition( | 330 HttpContentDisposition::HttpContentDisposition( |
354 const std::string& header, const std::string& referrer_charset) | 331 const std::string& header, const std::string& referrer_charset) |
355 : type_(INLINE), | 332 : type_(INLINE), |
356 parse_result_flags_(INVALID) { | 333 parse_result_flags_(INVALID) { |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
447 | 424 |
448 if (!ext_filename.empty()) | 425 if (!ext_filename.empty()) |
449 filename_ = ext_filename; | 426 filename_ = ext_filename; |
450 else if (!filename.empty()) | 427 else if (!filename.empty()) |
451 filename_ = filename; | 428 filename_ = filename; |
452 else | 429 else |
453 filename_ = name; | 430 filename_ = name; |
454 } | 431 } |
455 | 432 |
456 } // namespace net | 433 } // namespace net |
OLD | NEW |