| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
| 6 | 6 |
| 7 #include "base/base64.h" | 7 #include "base/base64.h" |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #include "base/strings/string_tokenizer.h" | 9 #include "base/strings/string_tokenizer.h" |
| 10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
| 11 #include "base/strings/sys_string_conversions.h" | 11 #include "base/strings/sys_string_conversions.h" |
| 12 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
| 13 #include "net/base/net_string_util.h" | 13 #include "net/base/net_string_util.h" |
| 14 #include "net/base/net_util.h" | 14 #include "net/base/net_util.h" |
| 15 #include "net/http/http_util.h" | 15 #include "net/http/http_util.h" |
| 16 | 16 |
| 17 namespace { | 17 namespace { |
| 18 | 18 |
| 19 enum RFC2047EncodingType { | 19 enum RFC2047EncodingType { Q_ENCODING, B_ENCODING }; |
| 20 Q_ENCODING, | |
| 21 B_ENCODING | |
| 22 }; | |
| 23 | 20 |
| 24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to | 21 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
| 25 // decoding a quoted-printable string. Returns true if the input was valid. | 22 // decoding a quoted-printable string. Returns true if the input was valid. |
| 26 bool DecodeQEncoding(const std::string& input, std::string* output) { | 23 bool DecodeQEncoding(const std::string& input, std::string* output) { |
| 27 std::string temp; | 24 std::string temp; |
| 28 temp.reserve(input.size()); | 25 temp.reserve(input.size()); |
| 29 for (std::string::const_iterator it = input.begin(); it != input.end(); | 26 for (std::string::const_iterator it = input.begin(); it != input.end(); |
| 30 ++it) { | 27 ++it) { |
| 31 if (*it == '_') { | 28 if (*it == '_') { |
| 32 temp.push_back(' '); | 29 temp.push_back(' '); |
| 33 } else if (*it == '=') { | 30 } else if (*it == '=') { |
| 34 if ((input.end() - it < 3) || | 31 if ((input.end() - it < 3) || |
| 35 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || | 32 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || |
| 36 !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) | 33 !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) |
| 37 return false; | 34 return false; |
| 38 unsigned char ch = HexDigitToInt(*(it + 1)) * 16 + | 35 unsigned char ch = |
| 39 HexDigitToInt(*(it + 2)); | 36 HexDigitToInt(*(it + 1)) * 16 + HexDigitToInt(*(it + 2)); |
| 40 temp.push_back(static_cast<char>(ch)); | 37 temp.push_back(static_cast<char>(ch)); |
| 41 ++it; | 38 ++it; |
| 42 ++it; | 39 ++it; |
| 43 } else if (0x20 < *it && *it < 0x7F && *it != '?') { | 40 } else if (0x20 < *it && *it < 0x7F && *it != '?') { |
| 44 // In a Q-encoded word, only printable ASCII characters | 41 // In a Q-encoded word, only printable ASCII characters |
| 45 // represent themselves. Besides, space, '=', '_' and '?' are | 42 // represent themselves. Besides, space, '=', '_' and '?' are |
| 46 // not allowed, but they're already filtered out. | 43 // not allowed, but they're already filtered out. |
| 47 DCHECK_NE('=', *it); | 44 DCHECK_NE('=', *it); |
| 48 DCHECK_NE('?', *it); | 45 DCHECK_NE('?', *it); |
| 49 DCHECK_NE('_', *it); | 46 DCHECK_NE('_', *it); |
| 50 temp.push_back(*it); | 47 temp.push_back(*it); |
| 51 } else { | 48 } else { |
| 52 return false; | 49 return false; |
| 53 } | 50 } |
| 54 } | 51 } |
| 55 output->swap(temp); | 52 output->swap(temp); |
| 56 return true; | 53 return true; |
| 57 } | 54 } |
| 58 | 55 |
| 59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding | 56 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
| 60 // type is specified in |enc_type|. | 57 // type is specified in |enc_type|. |
| 61 bool DecodeBQEncoding(const std::string& part, | 58 bool DecodeBQEncoding(const std::string& part, |
| 62 RFC2047EncodingType enc_type, | 59 RFC2047EncodingType enc_type, |
| 63 const std::string& charset, | 60 const std::string& charset, |
| 64 std::string* output) { | 61 std::string* output) { |
| 65 std::string decoded; | 62 std::string decoded; |
| 66 if (!((enc_type == B_ENCODING) ? | 63 if (!((enc_type == B_ENCODING) ? base::Base64Decode(part, &decoded) |
| 67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { | 64 : DecodeQEncoding(part, &decoded))) { |
| 68 return false; | 65 return false; |
| 69 } | 66 } |
| 70 | 67 |
| 71 if (decoded.empty()) { | 68 if (decoded.empty()) { |
| 72 output->clear(); | 69 output->clear(); |
| 73 return true; | 70 return true; |
| 74 } | 71 } |
| 75 | 72 |
| 76 return net::ConvertToUtf8(decoded, charset.c_str(), output); | 73 return net::ConvertToUtf8(decoded, charset.c_str(), output); |
| 77 } | 74 } |
| 78 | 75 |
| 79 bool DecodeWord(const std::string& encoded_word, | 76 bool DecodeWord(const std::string& encoded_word, |
| 80 const std::string& referrer_charset, | 77 const std::string& referrer_charset, |
| 81 bool* is_rfc2047, | 78 bool* is_rfc2047, |
| 82 std::string* output, | 79 std::string* output, |
| 83 int* parse_result_flags) { | 80 int* parse_result_flags) { |
| 84 *is_rfc2047 = false; | 81 *is_rfc2047 = false; |
| 85 output->clear(); | 82 output->clear(); |
| 86 if (encoded_word.empty()) | 83 if (encoded_word.empty()) |
| 87 return true; | 84 return true; |
| 88 | 85 |
| 89 if (!IsStringASCII(encoded_word)) { | 86 if (!IsStringASCII(encoded_word)) { |
| 90 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 87 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 91 if (IsStringUTF8(encoded_word)) { | 88 if (IsStringUTF8(encoded_word)) { |
| 92 *output = encoded_word; | 89 *output = encoded_word; |
| 93 } else { | 90 } else { |
| 94 base::string16 utf16_output; | 91 base::string16 utf16_output; |
| 95 if (!referrer_charset.empty() && | 92 if (!referrer_charset.empty() && |
| 96 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), | 93 net::ConvertToUTF16( |
| 97 &utf16_output)) { | 94 encoded_word, referrer_charset.c_str(), &utf16_output)) { |
| 98 *output = base::UTF16ToUTF8(utf16_output); | 95 *output = base::UTF16ToUTF8(utf16_output); |
| 99 } else { | 96 } else { |
| 100 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 97 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 101 } | 98 } |
| 102 } | 99 } |
| 103 | 100 |
| 104 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; | 101 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
| 105 return true; | 102 return true; |
| 106 } | 103 } |
| 107 | 104 |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 179 // encoded_word ending prematurelly with '?' or extra '?' | 176 // encoded_word ending prematurelly with '?' or extra '?' |
| 180 *is_rfc2047 = false; | 177 *is_rfc2047 = false; |
| 181 return false; | 178 return false; |
| 182 } | 179 } |
| 183 | 180 |
| 184 // We're not handling 'especial' characters quoted with '\', but | 181 // We're not handling 'especial' characters quoted with '\', but |
| 185 // it should be Ok because we're not an email client but a | 182 // it should be Ok because we're not an email client but a |
| 186 // web browser. | 183 // web browser. |
| 187 | 184 |
| 188 // What IE6/7 does: %-escaped UTF-8. | 185 // What IE6/7 does: %-escaped UTF-8. |
| 189 decoded_word = net::UnescapeURLComponent(encoded_word, | 186 decoded_word = |
| 190 net::UnescapeRule::SPACES); | 187 net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES); |
| 191 if (decoded_word != encoded_word) | 188 if (decoded_word != encoded_word) |
| 192 *parse_result_flags |= | 189 *parse_result_flags |= |
| 193 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; | 190 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; |
| 194 if (IsStringUTF8(decoded_word)) { | 191 if (IsStringUTF8(decoded_word)) { |
| 195 output->swap(decoded_word); | 192 output->swap(decoded_word); |
| 196 return true; | 193 return true; |
| 197 // We can try either the OS default charset or 'origin charset' here, | 194 // We can try either the OS default charset or 'origin charset' here, |
| 198 // As far as I can tell, IE does not support it. However, I've seen | 195 // As far as I can tell, IE does not support it. However, I've seen |
| 199 // web servers emit %-escaped string in a legacy encoding (usually | 196 // web servers emit %-escaped string in a legacy encoding (usually |
| 200 // origin charset). | 197 // origin charset). |
| (...skipping 27 matching lines...) Expand all Loading... |
| 228 if (!is_previous_token_rfc2047) | 225 if (!is_previous_token_rfc2047) |
| 229 decoded_value.push_back(' '); | 226 decoded_value.push_back(' '); |
| 230 continue; | 227 continue; |
| 231 } | 228 } |
| 232 // We don't support a single multibyte character split into | 229 // We don't support a single multibyte character split into |
| 233 // adjacent encoded words. Some broken mail clients emit headers | 230 // adjacent encoded words. Some broken mail clients emit headers |
| 234 // with that problem, but most web servers usually encode a filename | 231 // with that problem, but most web servers usually encode a filename |
| 235 // in a single encoded-word. Firefox/Thunderbird do not support | 232 // in a single encoded-word. Firefox/Thunderbird do not support |
| 236 // it, either. | 233 // it, either. |
| 237 std::string decoded; | 234 std::string decoded; |
| 238 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 235 if (!DecodeWord(t.token(), |
| 239 &decoded, ¤t_parse_result_flags)) | 236 referrer_charset, |
| 237 &is_previous_token_rfc2047, |
| 238 &decoded, |
| 239 ¤t_parse_result_flags)) |
| 240 return false; | 240 return false; |
| 241 decoded_value.append(decoded); | 241 decoded_value.append(decoded); |
| 242 } | 242 } |
| 243 output->swap(decoded_value); | 243 output->swap(decoded_value); |
| 244 if (parse_result_flags && !output->empty()) | 244 if (parse_result_flags && !output->empty()) |
| 245 *parse_result_flags |= current_parse_result_flags; | 245 *parse_result_flags |= current_parse_result_flags; |
| 246 return true; | 246 return true; |
| 247 } | 247 } |
| 248 | 248 |
| 249 // Parses the charset and value-chars out of an ext-value string. | 249 // Parses the charset and value-chars out of an ext-value string. |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 321 decoded->clear(); | 321 decoded->clear(); |
| 322 return true; | 322 return true; |
| 323 } | 323 } |
| 324 | 324 |
| 325 std::string unescaped = net::UnescapeURLComponent( | 325 std::string unescaped = net::UnescapeURLComponent( |
| 326 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 326 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
| 327 | 327 |
| 328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); | 328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
| 329 } | 329 } |
| 330 | 330 |
| 331 } // namespace | 331 } // namespace |
| 332 | 332 |
| 333 namespace net { | 333 namespace net { |
| 334 | 334 |
| 335 HttpContentDisposition::HttpContentDisposition( | 335 HttpContentDisposition::HttpContentDisposition( |
| 336 const std::string& header, const std::string& referrer_charset) | 336 const std::string& header, |
| 337 : type_(INLINE), | 337 const std::string& referrer_charset) |
| 338 parse_result_flags_(INVALID) { | 338 : type_(INLINE), parse_result_flags_(INVALID) { |
| 339 Parse(header, referrer_charset); | 339 Parse(header, referrer_charset); |
| 340 } | 340 } |
| 341 | 341 |
| 342 HttpContentDisposition::~HttpContentDisposition() { | 342 HttpContentDisposition::~HttpContentDisposition() { |
| 343 } | 343 } |
| 344 | 344 |
| 345 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 345 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
| 346 std::string::const_iterator begin, std::string::const_iterator end) { | 346 std::string::const_iterator begin, |
| 347 std::string::const_iterator end) { |
| 347 DCHECK(type_ == INLINE); | 348 DCHECK(type_ == INLINE); |
| 348 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 349 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
| 349 | 350 |
| 350 std::string::const_iterator type_begin = begin; | 351 std::string::const_iterator type_begin = begin; |
| 351 std::string::const_iterator type_end = delimiter; | 352 std::string::const_iterator type_end = delimiter; |
| 352 HttpUtil::TrimLWS(&type_begin, &type_end); | 353 HttpUtil::TrimLWS(&type_begin, &type_end); |
| 353 | 354 |
| 354 // If the disposition-type isn't a valid token the then the | 355 // If the disposition-type isn't a valid token the then the |
| 355 // Content-Disposition header is malformed, and we treat the first bytes as | 356 // Content-Disposition header is malformed, and we treat the first bytes as |
| 356 // a parameter rather than a disposition-type. | 357 // a parameter rather than a disposition-type. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 398 std::string::const_iterator pos = header.begin(); | 399 std::string::const_iterator pos = header.begin(); |
| 399 std::string::const_iterator end = header.end(); | 400 std::string::const_iterator end = header.end(); |
| 400 pos = ConsumeDispositionType(pos, end); | 401 pos = ConsumeDispositionType(pos, end); |
| 401 | 402 |
| 402 std::string name; | 403 std::string name; |
| 403 std::string filename; | 404 std::string filename; |
| 404 std::string ext_filename; | 405 std::string ext_filename; |
| 405 | 406 |
| 406 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 407 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
| 407 while (iter.GetNext()) { | 408 while (iter.GetNext()) { |
| 408 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 409 if (filename.empty() && |
| 409 iter.name_end(), | 410 LowerCaseEqualsASCII(iter.name_begin(), iter.name_end(), "filename")) { |
| 410 "filename")) { | 411 DecodeFilenameValue( |
| 411 DecodeFilenameValue(iter.value(), referrer_charset, &filename, | 412 iter.value(), referrer_charset, &filename, &parse_result_flags_); |
| 412 &parse_result_flags_); | |
| 413 if (!filename.empty()) | 413 if (!filename.empty()) |
| 414 parse_result_flags_ |= HAS_FILENAME; | 414 parse_result_flags_ |= HAS_FILENAME; |
| 415 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 415 } else if (name.empty() && |
| 416 iter.name_end(), | 416 LowerCaseEqualsASCII( |
| 417 "name")) { | 417 iter.name_begin(), iter.name_end(), "name")) { |
| 418 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); | 418 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
| 419 if (!name.empty()) | 419 if (!name.empty()) |
| 420 parse_result_flags_ |= HAS_NAME; | 420 parse_result_flags_ |= HAS_NAME; |
| 421 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 421 } else if (ext_filename.empty() && |
| 422 iter.name_end(), | 422 LowerCaseEqualsASCII( |
| 423 "filename*")) { | 423 iter.name_begin(), iter.name_end(), "filename*")) { |
| 424 DecodeExtValue(iter.raw_value(), &ext_filename); | 424 DecodeExtValue(iter.raw_value(), &ext_filename); |
| 425 if (!ext_filename.empty()) | 425 if (!ext_filename.empty()) |
| 426 parse_result_flags_ |= HAS_EXT_FILENAME; | 426 parse_result_flags_ |= HAS_EXT_FILENAME; |
| 427 } | 427 } |
| 428 } | 428 } |
| 429 | 429 |
| 430 if (!ext_filename.empty()) | 430 if (!ext_filename.empty()) |
| 431 filename_ = ext_filename; | 431 filename_ = ext_filename; |
| 432 else if (!filename.empty()) | 432 else if (!filename.empty()) |
| 433 filename_ = filename; | 433 filename_ = filename; |
| 434 else | 434 else |
| 435 filename_ = name; | 435 filename_ = name; |
| 436 } | 436 } |
| 437 | 437 |
| 438 } // namespace net | 438 } // namespace net |
| OLD | NEW |