| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
| 6 | 6 |
| 7 #include "base/base64.h" | 7 #include "base/base64.h" |
| 8 #include "base/i18n/icu_string_conversions.h" | 8 #include "base/i18n/icu_string_conversions.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 ucnv_close(converter); | 88 ucnv_close(converter); |
| 89 if (U_FAILURE(err)) | 89 if (U_FAILURE(err)) |
| 90 return false; | 90 return false; |
| 91 output->resize(output_length); | 91 output->resize(output_length); |
| 92 return true; | 92 return true; |
| 93 } | 93 } |
| 94 | 94 |
| 95 bool DecodeWord(const std::string& encoded_word, | 95 bool DecodeWord(const std::string& encoded_word, |
| 96 const std::string& referrer_charset, | 96 const std::string& referrer_charset, |
| 97 bool* is_rfc2047, | 97 bool* is_rfc2047, |
| 98 std::string* output) { | 98 std::string* output, |
| 99 net::HttpContentDisposition::ParseResult* parse_result) { |
| 99 *is_rfc2047 = false; | 100 *is_rfc2047 = false; |
| 100 output->clear(); | 101 output->clear(); |
| 101 if (encoded_word.empty()) | 102 if (encoded_word.empty()) |
| 102 return true; | 103 return true; |
| 103 | 104 |
| 104 if (!IsStringASCII(encoded_word)) { | 105 if (!IsStringASCII(encoded_word)) { |
| 105 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 106 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 106 if (IsStringUTF8(encoded_word)) { | 107 if (IsStringUTF8(encoded_word)) { |
| 107 *output = encoded_word; | 108 *output = encoded_word; |
| 108 } else { | 109 } else { |
| 109 string16 utf16_output; | 110 string16 utf16_output; |
| 110 if (!referrer_charset.empty() && | 111 if (!referrer_charset.empty() && |
| 111 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 112 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
| 112 base::OnStringConversionError::FAIL, | 113 base::OnStringConversionError::FAIL, |
| 113 &utf16_output)) { | 114 &utf16_output)) { |
| 114 *output = UTF16ToUTF8(utf16_output); | 115 *output = UTF16ToUTF8(utf16_output); |
| 115 } else { | 116 } else { |
| 116 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 117 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 117 } | 118 } |
| 118 } | 119 } |
| 119 | 120 |
| 121 parse_result->has_non_ascii_strings = true; |
| 120 return true; | 122 return true; |
| 121 } | 123 } |
| 122 | 124 |
| 123 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 125 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
| 124 // widely used by web servers. | 126 // widely used by web servers. |
| 125 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. | 127 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. |
| 126 // We don't care about the length restriction (72 bytes) because | 128 // We don't care about the length restriction (72 bytes) because |
| 127 // many web servers generate encoded words longer than the limit. | 129 // many web servers generate encoded words longer than the limit. |
| 128 std::string tmp; | 130 std::string decoded_word; |
| 129 *is_rfc2047 = true; | 131 *is_rfc2047 = true; |
| 130 int part_index = 0; | 132 int part_index = 0; |
| 131 std::string charset; | 133 std::string charset; |
| 132 StringTokenizer t(encoded_word, "?"); | 134 StringTokenizer t(encoded_word, "?"); |
| 133 RFC2047EncodingType enc_type = Q_ENCODING; | 135 RFC2047EncodingType enc_type = Q_ENCODING; |
| 134 while (*is_rfc2047 && t.GetNext()) { | 136 while (*is_rfc2047 && t.GetNext()) { |
| 135 std::string part = t.token(); | 137 std::string part = t.token(); |
| 136 switch (part_index) { | 138 switch (part_index) { |
| 137 case 0: | 139 case 0: |
| 138 if (part != "=") { | 140 if (part != "=") { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 151 part.find_first_of("bBqQ") == std::string::npos) { | 153 part.find_first_of("bBqQ") == std::string::npos) { |
| 152 *is_rfc2047 = false; | 154 *is_rfc2047 = false; |
| 153 break; | 155 break; |
| 154 } | 156 } |
| 155 if (part[0] == 'b' || part[0] == 'B') { | 157 if (part[0] == 'b' || part[0] == 'B') { |
| 156 enc_type = B_ENCODING; | 158 enc_type = B_ENCODING; |
| 157 } | 159 } |
| 158 ++part_index; | 160 ++part_index; |
| 159 break; | 161 break; |
| 160 case 3: | 162 case 3: |
| 161 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); | 163 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); |
| 162 if (!*is_rfc2047) { | 164 if (!*is_rfc2047) { |
| 163 // Last minute failure. Invalid B/Q encoding. Rather than | 165 // Last minute failure. Invalid B/Q encoding. Rather than |
| 164 // passing it through, return now. | 166 // passing it through, return now. |
| 165 return false; | 167 return false; |
| 166 } | 168 } |
| 167 ++part_index; | 169 ++part_index; |
| 168 break; | 170 break; |
| 169 case 4: | 171 case 4: |
| 170 if (part != "=") { | 172 if (part != "=") { |
| 171 // Another last minute failure ! | 173 // Another last minute failure ! |
| 172 // Likely to be a case of two encoded-words in a row or | 174 // Likely to be a case of two encoded-words in a row or |
| 173 // an encoded word followed by a non-encoded word. We can be | 175 // an encoded word followed by a non-encoded word. We can be |
| 174 // generous, but it does not help much in terms of compatibility, | 176 // generous, but it does not help much in terms of compatibility, |
| 175 // I believe. Return immediately. | 177 // I believe. Return immediately. |
| 176 *is_rfc2047 = false; | 178 *is_rfc2047 = false; |
| 177 return false; | 179 return false; |
| 178 } | 180 } |
| 179 ++part_index; | 181 ++part_index; |
| 180 break; | 182 break; |
| 181 default: | 183 default: |
| 182 *is_rfc2047 = false; | 184 *is_rfc2047 = false; |
| 183 return false; | 185 return false; |
| 184 } | 186 } |
| 185 } | 187 } |
| 186 | 188 |
| 187 if (*is_rfc2047) { | 189 if (*is_rfc2047) { |
| 188 if (*(encoded_word.end() - 1) == '=') { | 190 if (*(encoded_word.end() - 1) == '=') { |
| 189 output->swap(tmp); | 191 output->swap(decoded_word); |
| 192 parse_result->has_rfc2047_encoded_strings = true; |
| 190 return true; | 193 return true; |
| 191 } | 194 } |
| 192 // encoded_word ending prematurelly with '?' or extra '?' | 195 // encoded_word ending prematurelly with '?' or extra '?' |
| 193 *is_rfc2047 = false; | 196 *is_rfc2047 = false; |
| 194 return false; | 197 return false; |
| 195 } | 198 } |
| 196 | 199 |
| 197 // We're not handling 'especial' characters quoted with '\', but | 200 // We're not handling 'especial' characters quoted with '\', but |
| 198 // it should be Ok because we're not an email client but a | 201 // it should be Ok because we're not an email client but a |
| 199 // web browser. | 202 // web browser. |
| 200 | 203 |
| 201 // What IE6/7 does: %-escaped UTF-8. | 204 // What IE6/7 does: %-escaped UTF-8. |
| 202 tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES); | 205 decoded_word = net::UnescapeURLComponent(encoded_word, |
| 203 if (IsStringUTF8(tmp)) { | 206 net::UnescapeRule::SPACES); |
| 204 output->swap(tmp); | 207 if (decoded_word != encoded_word) |
| 208 parse_result->has_percent_encoded_strings = true; |
| 209 if (IsStringUTF8(decoded_word)) { |
| 210 output->swap(decoded_word); |
| 205 return true; | 211 return true; |
| 206 // We can try either the OS default charset or 'origin charset' here, | 212 // We can try either the OS default charset or 'origin charset' here, |
| 207 // As far as I can tell, IE does not support it. However, I've seen | 213 // As far as I can tell, IE does not support it. However, I've seen |
| 208 // web servers emit %-escaped string in a legacy encoding (usually | 214 // web servers emit %-escaped string in a legacy encoding (usually |
| 209 // origin charset). | 215 // origin charset). |
| 210 // TODO(jungshik) : Test IE further and consider adding a fallback here. | 216 // TODO(jungshik) : Test IE further and consider adding a fallback here. |
| 211 } | 217 } |
| 212 return false; | 218 return false; |
| 213 } | 219 } |
| 214 | 220 |
| 215 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The | 221 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The |
| 216 // value is supposed to be of the form: | 222 // value is supposed to be of the form: |
| 217 // | 223 // |
| 218 // value = token | quoted-string | 224 // value = token | quoted-string |
| 219 // | 225 // |
| 220 // However we currently also allow RFC 2047 encoding and non-ASCII | 226 // However we currently also allow RFC 2047 encoding and non-ASCII |
| 221 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. | 227 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. |
| 222 bool DecodeFilenameValue(const std::string& input, | 228 bool DecodeFilenameValue( |
| 223 const std::string& referrer_charset, | 229 const std::string& input, |
| 224 std::string* output) { | 230 const std::string& referrer_charset, |
| 225 std::string tmp; | 231 std::string* output, |
| 232 net::HttpContentDisposition::ParseResult* parse_result) { |
| 233 net::HttpContentDisposition::ParseResult current_parse_result; |
| 234 std::string decoded_value; |
| 235 bool is_previous_token_rfc2047 = true; |
| 236 |
| 226 // Tokenize with whitespace characters. | 237 // Tokenize with whitespace characters. |
| 227 StringTokenizer t(input, " \t\n\r"); | 238 StringTokenizer t(input, " \t\n\r"); |
| 228 t.set_options(StringTokenizer::RETURN_DELIMS); | 239 t.set_options(StringTokenizer::RETURN_DELIMS); |
| 229 bool is_previous_token_rfc2047 = true; | |
| 230 while (t.GetNext()) { | 240 while (t.GetNext()) { |
| 231 if (t.token_is_delim()) { | 241 if (t.token_is_delim()) { |
| 232 // If the previous non-delimeter token is not RFC2047-encoded, | 242 // If the previous non-delimeter token is not RFC2047-encoded, |
| 233 // put in a space in its place. Otheriwse, skip over it. | 243 // put in a space in its place. Otheriwse, skip over it. |
| 234 if (!is_previous_token_rfc2047) { | 244 if (!is_previous_token_rfc2047) |
| 235 tmp.push_back(' '); | 245 decoded_value.push_back(' '); |
| 236 } | |
| 237 continue; | 246 continue; |
| 238 } | 247 } |
| 239 // We don't support a single multibyte character split into | 248 // We don't support a single multibyte character split into |
| 240 // adjacent encoded words. Some broken mail clients emit headers | 249 // adjacent encoded words. Some broken mail clients emit headers |
| 241 // with that problem, but most web servers usually encode a filename | 250 // with that problem, but most web servers usually encode a filename |
| 242 // in a single encoded-word. Firefox/Thunderbird do not support | 251 // in a single encoded-word. Firefox/Thunderbird do not support |
| 243 // it, either. | 252 // it, either. |
| 244 std::string decoded; | 253 std::string decoded; |
| 245 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 254 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
| 246 &decoded)) | 255 &decoded, ¤t_parse_result)) |
| 247 return false; | 256 return false; |
| 248 tmp.append(decoded); | 257 decoded_value.append(decoded); |
| 249 } | 258 } |
| 250 output->swap(tmp); | 259 output->swap(decoded_value); |
| 260 if (parse_result && !output->empty()) { |
| 261 parse_result->has_non_ascii_strings = |
| 262 current_parse_result.has_non_ascii_strings; |
| 263 parse_result->has_percent_encoded_strings = |
| 264 current_parse_result.has_percent_encoded_strings; |
| 265 parse_result->has_rfc2047_encoded_strings = |
| 266 current_parse_result.has_rfc2047_encoded_strings; |
| 267 } |
| 251 return true; | 268 return true; |
| 252 } | 269 } |
| 253 | 270 |
| 254 // Parses the charset and value-chars out of an ext-value string. | 271 // Parses the charset and value-chars out of an ext-value string. |
| 255 // | 272 // |
| 256 // ext-value = charset "'" [ language ] "'" value-chars | 273 // ext-value = charset "'" [ language ] "'" value-chars |
| 257 bool ParseExtValueComponents(const std::string& input, | 274 bool ParseExtValueComponents(const std::string& input, |
| 258 std::string* charset, | 275 std::string* charset, |
| 259 std::string* value_chars) { | 276 std::string* value_chars) { |
| 260 StringTokenizer t(input, "'"); | 277 StringTokenizer t(input, "'"); |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 std::string unescaped = net::UnescapeURLComponent( | 347 std::string unescaped = net::UnescapeURLComponent( |
| 331 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 348 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
| 332 | 349 |
| 333 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 350 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); |
| 334 } | 351 } |
| 335 | 352 |
| 336 } // namespace | 353 } // namespace |
| 337 | 354 |
| 338 namespace net { | 355 namespace net { |
| 339 | 356 |
| 357 HttpContentDisposition::ParseResult::ParseResult() |
| 358 : has_disposition_type(false), |
| 359 has_unknown_disposition_type(false), |
| 360 has_name(false), |
| 361 has_filename(false), |
| 362 has_ext_filename(false), |
| 363 has_non_ascii_strings(false), |
| 364 has_percent_encoded_strings(false), |
| 365 has_rfc2047_encoded_strings(false) { |
| 366 } |
| 367 |
| 340 HttpContentDisposition::HttpContentDisposition( | 368 HttpContentDisposition::HttpContentDisposition( |
| 341 const std::string& header, const std::string& referrer_charset) | 369 const std::string& header, const std::string& referrer_charset) |
| 342 : type_(INLINE) { | 370 : type_(INLINE) { |
| 343 Parse(header, referrer_charset); | 371 Parse(header, referrer_charset); |
| 344 } | 372 } |
| 345 | 373 |
| 346 HttpContentDisposition::~HttpContentDisposition() { | 374 HttpContentDisposition::~HttpContentDisposition() { |
| 347 } | 375 } |
| 348 | 376 |
| 349 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 377 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
| 350 std::string::const_iterator begin, std::string::const_iterator end) { | 378 std::string::const_iterator begin, std::string::const_iterator end) { |
| 351 DCHECK(type_ == INLINE); | 379 DCHECK(type_ == INLINE); |
| 352 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 380 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
| 353 | 381 |
| 354 std::string::const_iterator type_begin = begin; | 382 std::string::const_iterator type_begin = begin; |
| 355 std::string::const_iterator type_end = delimiter; | 383 std::string::const_iterator type_end = delimiter; |
| 356 HttpUtil::TrimLWS(&type_begin, &type_end); | 384 HttpUtil::TrimLWS(&type_begin, &type_end); |
| 357 | 385 |
| 358 // If the disposition-type isn't a valid token the then the | 386 // If the disposition-type isn't a valid token the then the |
| 359 // Content-Disposition header is malformed, and we treat the first bytes as | 387 // Content-Disposition header is malformed, and we treat the first bytes as |
| 360 // a parameter rather than a disposition-type. | 388 // a parameter rather than a disposition-type. |
| 361 if (!HttpUtil::IsToken(type_begin, type_end)) | 389 if (!HttpUtil::IsToken(type_begin, type_end)) |
| 362 return begin; | 390 return begin; |
| 363 | 391 |
| 392 parse_result_.has_disposition_type = true; |
| 393 |
| 364 DCHECK(std::find(type_begin, type_end, '=') == type_end); | 394 DCHECK(std::find(type_begin, type_end, '=') == type_end); |
| 365 | 395 |
| 366 if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) | 396 if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { |
| 397 type_ = INLINE; |
| 398 } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { |
| 367 type_ = ATTACHMENT; | 399 type_ = ATTACHMENT; |
| 400 } else { |
| 401 parse_result_.has_unknown_disposition_type = true; |
| 402 type_ = ATTACHMENT; |
| 403 } |
| 368 return delimiter; | 404 return delimiter; |
| 369 } | 405 } |
| 370 | 406 |
| 371 // http://tools.ietf.org/html/rfc6266 | 407 // http://tools.ietf.org/html/rfc6266 |
| 372 // | 408 // |
| 373 // content-disposition = "Content-Disposition" ":" | 409 // content-disposition = "Content-Disposition" ":" |
| 374 // disposition-type *( ";" disposition-parm ) | 410 // disposition-type *( ";" disposition-parm ) |
| 375 // | 411 // |
| 376 // disposition-type = "inline" | "attachment" | disp-ext-type | 412 // disposition-type = "inline" | "attachment" | disp-ext-type |
| 377 // ; case-insensitive | 413 // ; case-insensitive |
| (...skipping 19 matching lines...) Expand all Loading... |
| 397 | 433 |
| 398 std::string name; | 434 std::string name; |
| 399 std::string filename; | 435 std::string filename; |
| 400 std::string ext_filename; | 436 std::string ext_filename; |
| 401 | 437 |
| 402 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 438 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
| 403 while (iter.GetNext()) { | 439 while (iter.GetNext()) { |
| 404 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 440 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 405 iter.name_end(), | 441 iter.name_end(), |
| 406 "filename")) { | 442 "filename")) { |
| 407 DecodeFilenameValue(iter.value(), referrer_charset, &filename); | 443 DecodeFilenameValue(iter.value(), referrer_charset, &filename, |
| 444 &parse_result_); |
| 445 parse_result_.has_filename = !filename.empty(); |
| 408 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 446 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 409 iter.name_end(), | 447 iter.name_end(), |
| 410 "name")) { | 448 "name")) { |
| 411 DecodeFilenameValue(iter.value(), referrer_charset, &name); | 449 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
| 450 parse_result_.has_name = !name.empty(); |
| 412 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 451 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 413 iter.name_end(), | 452 iter.name_end(), |
| 414 "filename*")) { | 453 "filename*")) { |
| 415 DecodeExtValue(iter.raw_value(), &ext_filename); | 454 DecodeExtValue(iter.raw_value(), &ext_filename); |
| 455 parse_result_.has_ext_filename = !ext_filename.empty(); |
| 416 } | 456 } |
| 417 } | 457 } |
| 418 | 458 |
| 419 if (!ext_filename.empty()) | 459 if (!ext_filename.empty()) |
| 420 filename_ = ext_filename; | 460 filename_ = ext_filename; |
| 421 else if (!filename.empty()) | 461 else if (!filename.empty()) |
| 422 filename_ = filename; | 462 filename_ = filename; |
| 423 else | 463 else |
| 424 filename_ = name; | 464 filename_ = name; |
| 425 } | 465 } |
| 426 | 466 |
| 427 } // namespace net | 467 } // namespace net |
| OLD | NEW |