| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
| 6 | 6 |
| 7 #include "base/base64.h" | 7 #include "base/base64.h" |
| 8 #include "base/i18n/icu_string_conversions.h" | 8 #include "base/i18n/icu_string_conversions.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 90 ucnv_close(converter); | 90 ucnv_close(converter); |
| 91 if (U_FAILURE(err)) | 91 if (U_FAILURE(err)) |
| 92 return false; | 92 return false; |
| 93 output->resize(output_length); | 93 output->resize(output_length); |
| 94 return true; | 94 return true; |
| 95 } | 95 } |
| 96 | 96 |
| 97 bool DecodeWord(const std::string& encoded_word, | 97 bool DecodeWord(const std::string& encoded_word, |
| 98 const std::string& referrer_charset, | 98 const std::string& referrer_charset, |
| 99 bool* is_rfc2047, | 99 bool* is_rfc2047, |
| 100 std::string* output) { | 100 std::string* output, |
| 101 HttpContentDisposition::ParseResult* parse_result) { |
| 101 *is_rfc2047 = false; | 102 *is_rfc2047 = false; |
| 102 output->clear(); | 103 output->clear(); |
| 103 if (encoded_word.empty()) | 104 if (encoded_word.empty()) |
| 104 return true; | 105 return true; |
| 105 | 106 |
| 106 if (!IsStringASCII(encoded_word)) { | 107 if (!IsStringASCII(encoded_word)) { |
| 107 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 108 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 108 if (IsStringUTF8(encoded_word)) { | 109 if (IsStringUTF8(encoded_word)) { |
| 109 *output = encoded_word; | 110 *output = encoded_word; |
| 110 } else { | 111 } else { |
| 111 string16 utf16_output; | 112 string16 utf16_output; |
| 112 if (!referrer_charset.empty() && | 113 if (!referrer_charset.empty() && |
| 113 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 114 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
| 114 base::OnStringConversionError::FAIL, | 115 base::OnStringConversionError::FAIL, |
| 115 &utf16_output)) { | 116 &utf16_output)) { |
| 116 *output = UTF16ToUTF8(utf16_output); | 117 *output = UTF16ToUTF8(utf16_output); |
| 117 } else { | 118 } else { |
| 118 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 119 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 119 } | 120 } |
| 120 } | 121 } |
| 121 | 122 |
| 123 parse_result->has_non_ascii_strings = true; |
| 122 return true; | 124 return true; |
| 123 } | 125 } |
| 124 | 126 |
| 125 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 127 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
| 126 // widely used by web servers. | 128 // widely used by web servers. |
| 127 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. | 129 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. |
| 128 // We don't care about the length restriction (72 bytes) because | 130 // We don't care about the length restriction (72 bytes) because |
| 129 // many web servers generate encoded words longer than the limit. | 131 // many web servers generate encoded words longer than the limit. |
| 130 std::string tmp; | 132 std::string decoded_word; |
| 131 *is_rfc2047 = true; | 133 *is_rfc2047 = true; |
| 132 int part_index = 0; | 134 int part_index = 0; |
| 133 std::string charset; | 135 std::string charset; |
| 134 StringTokenizer t(encoded_word, "?"); | 136 StringTokenizer t(encoded_word, "?"); |
| 135 RFC2047EncodingType enc_type = Q_ENCODING; | 137 RFC2047EncodingType enc_type = Q_ENCODING; |
| 136 while (*is_rfc2047 && t.GetNext()) { | 138 while (*is_rfc2047 && t.GetNext()) { |
| 137 std::string part = t.token(); | 139 std::string part = t.token(); |
| 138 switch (part_index) { | 140 switch (part_index) { |
| 139 case 0: | 141 case 0: |
| 140 if (part != "=") { | 142 if (part != "=") { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 153 part.find_first_of("bBqQ") == std::string::npos) { | 155 part.find_first_of("bBqQ") == std::string::npos) { |
| 154 *is_rfc2047 = false; | 156 *is_rfc2047 = false; |
| 155 break; | 157 break; |
| 156 } | 158 } |
| 157 if (part[0] == 'b' || part[0] == 'B') { | 159 if (part[0] == 'b' || part[0] == 'B') { |
| 158 enc_type = B_ENCODING; | 160 enc_type = B_ENCODING; |
| 159 } | 161 } |
| 160 ++part_index; | 162 ++part_index; |
| 161 break; | 163 break; |
| 162 case 3: | 164 case 3: |
| 163 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); | 165 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); |
| 164 if (!*is_rfc2047) { | 166 if (!*is_rfc2047) { |
| 165 // Last minute failure. Invalid B/Q encoding. Rather than | 167 // Last minute failure. Invalid B/Q encoding. Rather than |
| 166 // passing it through, return now. | 168 // passing it through, return now. |
| 167 return false; | 169 return false; |
| 168 } | 170 } |
| 169 ++part_index; | 171 ++part_index; |
| 170 break; | 172 break; |
| 171 case 4: | 173 case 4: |
| 172 if (part != "=") { | 174 if (part != "=") { |
| 173 // Another last minute failure ! | 175 // Another last minute failure ! |
| 174 // Likely to be a case of two encoded-words in a row or | 176 // Likely to be a case of two encoded-words in a row or |
| 175 // an encoded word followed by a non-encoded word. We can be | 177 // an encoded word followed by a non-encoded word. We can be |
| 176 // generous, but it does not help much in terms of compatibility, | 178 // generous, but it does not help much in terms of compatibility, |
| 177 // I believe. Return immediately. | 179 // I believe. Return immediately. |
| 178 *is_rfc2047 = false; | 180 *is_rfc2047 = false; |
| 179 return false; | 181 return false; |
| 180 } | 182 } |
| 181 ++part_index; | 183 ++part_index; |
| 182 break; | 184 break; |
| 183 default: | 185 default: |
| 184 *is_rfc2047 = false; | 186 *is_rfc2047 = false; |
| 185 return false; | 187 return false; |
| 186 } | 188 } |
| 187 } | 189 } |
| 188 | 190 |
| 189 if (*is_rfc2047) { | 191 if (*is_rfc2047) { |
| 190 if (*(encoded_word.end() - 1) == '=') { | 192 if (*(encoded_word.end() - 1) == '=') { |
| 191 output->swap(tmp); | 193 output->swap(decoded_word); |
| 194 parse_result->has_rfc2047_encoded_strings = true; |
| 192 return true; | 195 return true; |
| 193 } | 196 } |
| 194 // encoded_word ending prematurelly with '?' or extra '?' | 197 // encoded_word ending prematurelly with '?' or extra '?' |
| 195 *is_rfc2047 = false; | 198 *is_rfc2047 = false; |
| 196 return false; | 199 return false; |
| 197 } | 200 } |
| 198 | 201 |
| 199 // We're not handling 'especial' characters quoted with '\', but | 202 // We're not handling 'especial' characters quoted with '\', but |
| 200 // it should be Ok because we're not an email client but a | 203 // it should be Ok because we're not an email client but a |
| 201 // web browser. | 204 // web browser. |
| 202 | 205 |
| 203 // What IE6/7 does: %-escaped UTF-8. | 206 // What IE6/7 does: %-escaped UTF-8. |
| 204 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); | 207 decoded_word = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); |
| 205 if (IsStringUTF8(tmp)) { | 208 if (decoded_word != encoded_word) |
| 206 output->swap(tmp); | 209 parse_result->has_percent_encoded_strings = true; |
| 210 if (IsStringUTF8(decoded_word)) { |
| 211 output->swap(decoded_word); |
| 207 return true; | 212 return true; |
| 208 // We can try either the OS default charset or 'origin charset' here, | 213 // We can try either the OS default charset or 'origin charset' here, |
| 209 // As far as I can tell, IE does not support it. However, I've seen | 214 // As far as I can tell, IE does not support it. However, I've seen |
| 210 // web servers emit %-escaped string in a legacy encoding (usually | 215 // web servers emit %-escaped string in a legacy encoding (usually |
| 211 // origin charset). | 216 // origin charset). |
| 212 // TODO(jungshik) : Test IE further and consider adding a fallback here. | 217 // TODO(jungshik) : Test IE further and consider adding a fallback here. |
| 213 } | 218 } |
| 214 return false; | 219 return false; |
| 215 } | 220 } |
| 216 | 221 |
| 217 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The | 222 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The |
| 218 // value is supposed to be of the form: | 223 // value is supposed to be of the form: |
| 219 // | 224 // |
| 220 // value = token | quoted-string | 225 // value = token | quoted-string |
| 221 // | 226 // |
| 222 // However we currently also allow RFC 2047 encoding and non-ASCII | 227 // However we currently also allow RFC 2047 encoding and non-ASCII |
| 223 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. | 228 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. |
| 224 bool DecodeFilenameValue(const std::string& input, | 229 bool DecodeFilenameValue(const std::string& input, |
| 225 const std::string& referrer_charset, | 230 const std::string& referrer_charset, |
| 226 std::string* output) { | 231 std::string* output, |
| 227 std::string tmp; | 232 HttpContentDisposition::ParseResult* parse_result) { |
| 233 HttpContentDisposition::ParseResult current_parse_result; |
| 234 std::string decoded_value; |
| 235 bool is_previous_token_rfc2047 = true; |
| 236 |
| 228 // Tokenize with whitespace characters. | 237 // Tokenize with whitespace characters. |
| 229 StringTokenizer t(input, " \t\n\r"); | 238 StringTokenizer t(input, " \t\n\r"); |
| 230 t.set_options(StringTokenizer::RETURN_DELIMS); | 239 t.set_options(StringTokenizer::RETURN_DELIMS); |
| 231 bool is_previous_token_rfc2047 = true; | |
| 232 while (t.GetNext()) { | 240 while (t.GetNext()) { |
| 233 if (t.token_is_delim()) { | 241 if (t.token_is_delim()) { |
| 234 // If the previous non-delimeter token is not RFC2047-encoded, | 242 // If the previous non-delimeter token is not RFC2047-encoded, |
| 235 // put in a space in its place. Otheriwse, skip over it. | 243 // put in a space in its place. Otheriwse, skip over it. |
| 236 if (!is_previous_token_rfc2047) { | 244 if (!is_previous_token_rfc2047) |
| 237 tmp.push_back(' '); | 245 decoded_value.push_back(' '); |
| 238 } | |
| 239 continue; | 246 continue; |
| 240 } | 247 } |
| 241 // We don't support a single multibyte character split into | 248 // We don't support a single multibyte character split into |
| 242 // adjacent encoded words. Some broken mail clients emit headers | 249 // adjacent encoded words. Some broken mail clients emit headers |
| 243 // with that problem, but most web servers usually encode a filename | 250 // with that problem, but most web servers usually encode a filename |
| 244 // in a single encoded-word. Firefox/Thunderbird do not support | 251 // in a single encoded-word. Firefox/Thunderbird do not support |
| 245 // it, either. | 252 // it, either. |
| 246 std::string decoded; | 253 std::string decoded; |
| 247 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 254 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
| 248 &decoded)) | 255 &decoded, ¤t_parse_result)) |
| 249 return false; | 256 return false; |
| 250 tmp.append(decoded); | 257 decoded_value.append(decoded); |
| 251 } | 258 } |
| 252 output->swap(tmp); | 259 output->swap(decoded_value); |
| 260 if (parse_result) { |
| 261 parse_result->has_non_ascii_strings = |
| 262 current_parse_result.has_non_ascii_strings; |
| 263 parse_result->has_percent_encoded_strings = |
| 264 current_parse_result.has_percent_encoded_strings; |
| 265 parse_result->has_rfc2047_encoded_strings = |
| 266 current_parse_result.has_rfc2047_encoded_strings; |
| 267 } |
| 253 return true; | 268 return true; |
| 254 } | 269 } |
| 255 | 270 |
| 256 // Parses the charset and value-chars out of an ext-value string. | 271 // Parses the charset and value-chars out of an ext-value string. |
| 257 // | 272 // |
| 258 // ext-value = charset "'" [ language ] "'" value-chars | 273 // ext-value = charset "'" [ language ] "'" value-chars |
| 259 bool ParseExtValueComponents(const std::string& input, | 274 bool ParseExtValueComponents(const std::string& input, |
| 260 std::string* charset, | 275 std::string* charset, |
| 261 std::string* value_chars) { | 276 std::string* value_chars) { |
| 262 StringTokenizer t(input, "'"); | 277 StringTokenizer t(input, "'"); |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 } | 345 } |
| 331 | 346 |
| 332 std::string unescaped = UnescapeURLComponent(value, | 347 std::string unescaped = UnescapeURLComponent(value, |
| 333 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); | 348 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
| 334 | 349 |
| 335 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 350 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); |
| 336 } | 351 } |
| 337 | 352 |
| 338 } // namespace | 353 } // namespace |
| 339 | 354 |
| 355 HttpContentDisposition::ParseResult::ParseResult() |
| 356 : has_disposition_type(false), |
| 357 has_unknown_disposition_type(false), |
| 358 has_name(false), |
| 359 has_filename(false), |
| 360 has_ext_filename(false), |
| 361 has_non_ascii_strings(false), |
| 362 has_percent_encoded_strings(false), |
| 363 has_rfc2047_encoded_strings(false) { |
| 364 } |
| 365 |
| 340 HttpContentDisposition::HttpContentDisposition( | 366 HttpContentDisposition::HttpContentDisposition( |
| 341 const std::string& header, const std::string& referrer_charset) | 367 const std::string& header, const std::string& referrer_charset) |
| 342 : type_(INLINE) { | 368 : type_(INLINE) { |
| 343 Parse(header, referrer_charset); | 369 Parse(header, referrer_charset); |
| 344 } | 370 } |
| 345 | 371 |
| 346 HttpContentDisposition::~HttpContentDisposition() { | 372 HttpContentDisposition::~HttpContentDisposition() { |
| 347 } | 373 } |
| 348 | 374 |
| 349 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 375 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
| 350 std::string::const_iterator begin, std::string::const_iterator end) { | 376 std::string::const_iterator begin, std::string::const_iterator end) { |
| 351 DCHECK(type_ == INLINE); | 377 DCHECK(type_ == INLINE); |
| 352 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 378 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
| 353 | 379 |
| 354 std::string::const_iterator type_begin = begin; | 380 std::string::const_iterator type_begin = begin; |
| 355 std::string::const_iterator type_end = delimiter; | 381 std::string::const_iterator type_end = delimiter; |
| 356 HttpUtil::TrimLWS(&type_begin, &type_end); | 382 HttpUtil::TrimLWS(&type_begin, &type_end); |
| 357 | 383 |
| 358 // If the disposition-type isn't a valid token the then the | 384 // If the disposition-type isn't a valid token the then the |
| 359 // Content-Disposition header is malformed, and we treat the first bytes as | 385 // Content-Disposition header is malformed, and we treat the first bytes as |
| 360 // a parameter rather than a disposition-type. | 386 // a parameter rather than a disposition-type. |
| 361 if (!HttpUtil::IsToken(type_begin, type_end)) | 387 if (!HttpUtil::IsToken(type_begin, type_end)) |
| 362 return begin; | 388 return begin; |
| 363 | 389 |
| 390 parse_result_.has_disposition_type = true; |
| 391 |
| 364 DCHECK(std::find(type_begin, type_end, '=') == type_end); | 392 DCHECK(std::find(type_begin, type_end, '=') == type_end); |
| 365 | 393 |
| 366 if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) | 394 if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { |
| 395 type_ = INLINE; |
| 396 } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { |
| 367 type_ = ATTACHMENT; | 397 type_ = ATTACHMENT; |
| 398 } else { |
| 399 parse_result_.has_unknown_disposition_type = true; |
| 400 type_ = ATTACHMENT; |
| 401 } |
| 368 return delimiter; | 402 return delimiter; |
| 369 } | 403 } |
| 370 | 404 |
| 371 // http://tools.ietf.org/html/rfc6266 | 405 // http://tools.ietf.org/html/rfc6266 |
| 372 // | 406 // |
| 373 // content-disposition = "Content-Disposition" ":" | 407 // content-disposition = "Content-Disposition" ":" |
| 374 // disposition-type *( ";" disposition-parm ) | 408 // disposition-type *( ";" disposition-parm ) |
| 375 // | 409 // |
| 376 // disposition-type = "inline" | "attachment" | disp-ext-type | 410 // disposition-type = "inline" | "attachment" | disp-ext-type |
| 377 // ; case-insensitive | 411 // ; case-insensitive |
| (...skipping 19 matching lines...) Expand all Loading... |
| 397 | 431 |
| 398 std::string name; | 432 std::string name; |
| 399 std::string filename; | 433 std::string filename; |
| 400 std::string ext_filename; | 434 std::string ext_filename; |
| 401 | 435 |
| 402 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 436 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
| 403 while (iter.GetNext()) { | 437 while (iter.GetNext()) { |
| 404 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 438 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 405 iter.name_end(), | 439 iter.name_end(), |
| 406 "filename")) { | 440 "filename")) { |
| 407 DecodeFilenameValue(iter.value(), referrer_charset, &filename); | 441 parse_result_.has_filename = |
| 442 DecodeFilenameValue(iter.value(), referrer_charset, &filename, |
| 443 &parse_result_); |
| 408 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 444 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 409 iter.name_end(), | 445 iter.name_end(), |
| 410 "name")) { | 446 "name")) { |
| 411 DecodeFilenameValue(iter.value(), referrer_charset, &name); | 447 parse_result_.has_name = |
| 448 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
| 412 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 449 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
| 413 iter.name_end(), | 450 iter.name_end(), |
| 414 "filename*")) { | 451 "filename*")) { |
| 415 DecodeExtValue(iter.raw_value(), &ext_filename); | 452 parse_result_.has_ext_filename = |
| 453 DecodeExtValue(iter.raw_value(), &ext_filename); |
| 416 } | 454 } |
| 417 } | 455 } |
| 418 | 456 |
| 419 if (!ext_filename.empty()) | 457 if (!ext_filename.empty()) |
| 420 filename_ = ext_filename; | 458 filename_ = ext_filename; |
| 421 else if (!filename.empty()) | 459 else if (!filename.empty()) |
| 422 filename_ = filename; | 460 filename_ = filename; |
| 423 else | 461 else |
| 424 filename_ = name; | 462 filename_ = name; |
| 425 } | 463 } |
| 426 | 464 |
| 427 } // namespace net | 465 } // namespace net |
| OLD | NEW |