OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
6 | 6 |
7 #include "base/base64.h" | 7 #include "base/base64.h" |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #include "base/strings/string_tokenizer.h" | 9 #include "base/strings/string_tokenizer.h" |
10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
11 #include "base/strings/sys_string_conversions.h" | 11 #include "base/strings/sys_string_conversions.h" |
12 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
13 #include "net/base/net_string_util.h" | 13 #include "net/base/net_string_util.h" |
14 #include "net/base/net_util.h" | 14 #include "net/base/net_util.h" |
15 #include "net/http/http_util.h" | 15 #include "net/http/http_util.h" |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 enum RFC2047EncodingType { | 19 enum RFC2047EncodingType { Q_ENCODING, B_ENCODING }; |
20 Q_ENCODING, | |
21 B_ENCODING | |
22 }; | |
23 | 20 |
24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to | 21 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
25 // decoding a quoted-printable string. Returns true if the input was valid. | 22 // decoding a quoted-printable string. Returns true if the input was valid. |
26 bool DecodeQEncoding(const std::string& input, std::string* output) { | 23 bool DecodeQEncoding(const std::string& input, std::string* output) { |
27 std::string temp; | 24 std::string temp; |
28 temp.reserve(input.size()); | 25 temp.reserve(input.size()); |
29 for (std::string::const_iterator it = input.begin(); it != input.end(); | 26 for (std::string::const_iterator it = input.begin(); it != input.end(); |
30 ++it) { | 27 ++it) { |
31 if (*it == '_') { | 28 if (*it == '_') { |
32 temp.push_back(' '); | 29 temp.push_back(' '); |
33 } else if (*it == '=') { | 30 } else if (*it == '=') { |
34 if ((input.end() - it < 3) || | 31 if ((input.end() - it < 3) || |
35 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || | 32 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || |
36 !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) | 33 !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) |
37 return false; | 34 return false; |
38 unsigned char ch = HexDigitToInt(*(it + 1)) * 16 + | 35 unsigned char ch = |
39 HexDigitToInt(*(it + 2)); | 36 HexDigitToInt(*(it + 1)) * 16 + HexDigitToInt(*(it + 2)); |
40 temp.push_back(static_cast<char>(ch)); | 37 temp.push_back(static_cast<char>(ch)); |
41 ++it; | 38 ++it; |
42 ++it; | 39 ++it; |
43 } else if (0x20 < *it && *it < 0x7F && *it != '?') { | 40 } else if (0x20 < *it && *it < 0x7F && *it != '?') { |
44 // In a Q-encoded word, only printable ASCII characters | 41 // In a Q-encoded word, only printable ASCII characters |
45 // represent themselves. Besides, space, '=', '_' and '?' are | 42 // represent themselves. Besides, space, '=', '_' and '?' are |
46 // not allowed, but they're already filtered out. | 43 // not allowed, but they're already filtered out. |
47 DCHECK_NE('=', *it); | 44 DCHECK_NE('=', *it); |
48 DCHECK_NE('?', *it); | 45 DCHECK_NE('?', *it); |
49 DCHECK_NE('_', *it); | 46 DCHECK_NE('_', *it); |
50 temp.push_back(*it); | 47 temp.push_back(*it); |
51 } else { | 48 } else { |
52 return false; | 49 return false; |
53 } | 50 } |
54 } | 51 } |
55 output->swap(temp); | 52 output->swap(temp); |
56 return true; | 53 return true; |
57 } | 54 } |
58 | 55 |
59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding | 56 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
60 // type is specified in |enc_type|. | 57 // type is specified in |enc_type|. |
61 bool DecodeBQEncoding(const std::string& part, | 58 bool DecodeBQEncoding(const std::string& part, |
62 RFC2047EncodingType enc_type, | 59 RFC2047EncodingType enc_type, |
63 const std::string& charset, | 60 const std::string& charset, |
64 std::string* output) { | 61 std::string* output) { |
65 std::string decoded; | 62 std::string decoded; |
66 if (!((enc_type == B_ENCODING) ? | 63 if (!((enc_type == B_ENCODING) ? base::Base64Decode(part, &decoded) |
67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { | 64 : DecodeQEncoding(part, &decoded))) { |
68 return false; | 65 return false; |
69 } | 66 } |
70 | 67 |
71 if (decoded.empty()) { | 68 if (decoded.empty()) { |
72 output->clear(); | 69 output->clear(); |
73 return true; | 70 return true; |
74 } | 71 } |
75 | 72 |
76 return net::ConvertToUtf8(decoded, charset.c_str(), output); | 73 return net::ConvertToUtf8(decoded, charset.c_str(), output); |
77 } | 74 } |
78 | 75 |
79 bool DecodeWord(const std::string& encoded_word, | 76 bool DecodeWord(const std::string& encoded_word, |
80 const std::string& referrer_charset, | 77 const std::string& referrer_charset, |
81 bool* is_rfc2047, | 78 bool* is_rfc2047, |
82 std::string* output, | 79 std::string* output, |
83 int* parse_result_flags) { | 80 int* parse_result_flags) { |
84 *is_rfc2047 = false; | 81 *is_rfc2047 = false; |
85 output->clear(); | 82 output->clear(); |
86 if (encoded_word.empty()) | 83 if (encoded_word.empty()) |
87 return true; | 84 return true; |
88 | 85 |
89 if (!IsStringASCII(encoded_word)) { | 86 if (!IsStringASCII(encoded_word)) { |
90 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 87 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
91 if (IsStringUTF8(encoded_word)) { | 88 if (IsStringUTF8(encoded_word)) { |
92 *output = encoded_word; | 89 *output = encoded_word; |
93 } else { | 90 } else { |
94 base::string16 utf16_output; | 91 base::string16 utf16_output; |
95 if (!referrer_charset.empty() && | 92 if (!referrer_charset.empty() && |
96 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), | 93 net::ConvertToUTF16( |
97 &utf16_output)) { | 94 encoded_word, referrer_charset.c_str(), &utf16_output)) { |
98 *output = base::UTF16ToUTF8(utf16_output); | 95 *output = base::UTF16ToUTF8(utf16_output); |
99 } else { | 96 } else { |
100 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 97 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
101 } | 98 } |
102 } | 99 } |
103 | 100 |
104 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; | 101 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
105 return true; | 102 return true; |
106 } | 103 } |
107 | 104 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 // encoded_word ending prematurelly with '?' or extra '?' | 176 // encoded_word ending prematurelly with '?' or extra '?' |
180 *is_rfc2047 = false; | 177 *is_rfc2047 = false; |
181 return false; | 178 return false; |
182 } | 179 } |
183 | 180 |
184 // We're not handling 'especial' characters quoted with '\', but | 181 // We're not handling 'especial' characters quoted with '\', but |
185 // it should be Ok because we're not an email client but a | 182 // it should be Ok because we're not an email client but a |
186 // web browser. | 183 // web browser. |
187 | 184 |
188 // What IE6/7 does: %-escaped UTF-8. | 185 // What IE6/7 does: %-escaped UTF-8. |
189 decoded_word = net::UnescapeURLComponent(encoded_word, | 186 decoded_word = |
190 net::UnescapeRule::SPACES); | 187 net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES); |
191 if (decoded_word != encoded_word) | 188 if (decoded_word != encoded_word) |
192 *parse_result_flags |= | 189 *parse_result_flags |= |
193 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; | 190 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; |
194 if (IsStringUTF8(decoded_word)) { | 191 if (IsStringUTF8(decoded_word)) { |
195 output->swap(decoded_word); | 192 output->swap(decoded_word); |
196 return true; | 193 return true; |
197 // We can try either the OS default charset or 'origin charset' here, | 194 // We can try either the OS default charset or 'origin charset' here, |
198 // As far as I can tell, IE does not support it. However, I've seen | 195 // As far as I can tell, IE does not support it. However, I've seen |
199 // web servers emit %-escaped string in a legacy encoding (usually | 196 // web servers emit %-escaped string in a legacy encoding (usually |
200 // origin charset). | 197 // origin charset). |
(...skipping 27 matching lines...) Expand all Loading... |
228 if (!is_previous_token_rfc2047) | 225 if (!is_previous_token_rfc2047) |
229 decoded_value.push_back(' '); | 226 decoded_value.push_back(' '); |
230 continue; | 227 continue; |
231 } | 228 } |
232 // We don't support a single multibyte character split into | 229 // We don't support a single multibyte character split into |
233 // adjacent encoded words. Some broken mail clients emit headers | 230 // adjacent encoded words. Some broken mail clients emit headers |
234 // with that problem, but most web servers usually encode a filename | 231 // with that problem, but most web servers usually encode a filename |
235 // in a single encoded-word. Firefox/Thunderbird do not support | 232 // in a single encoded-word. Firefox/Thunderbird do not support |
236 // it, either. | 233 // it, either. |
237 std::string decoded; | 234 std::string decoded; |
238 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 235 if (!DecodeWord(t.token(), |
239 &decoded, ¤t_parse_result_flags)) | 236 referrer_charset, |
| 237 &is_previous_token_rfc2047, |
| 238 &decoded, |
| 239 ¤t_parse_result_flags)) |
240 return false; | 240 return false; |
241 decoded_value.append(decoded); | 241 decoded_value.append(decoded); |
242 } | 242 } |
243 output->swap(decoded_value); | 243 output->swap(decoded_value); |
244 if (parse_result_flags && !output->empty()) | 244 if (parse_result_flags && !output->empty()) |
245 *parse_result_flags |= current_parse_result_flags; | 245 *parse_result_flags |= current_parse_result_flags; |
246 return true; | 246 return true; |
247 } | 247 } |
248 | 248 |
249 // Parses the charset and value-chars out of an ext-value string. | 249 // Parses the charset and value-chars out of an ext-value string. |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
321 decoded->clear(); | 321 decoded->clear(); |
322 return true; | 322 return true; |
323 } | 323 } |
324 | 324 |
325 std::string unescaped = net::UnescapeURLComponent( | 325 std::string unescaped = net::UnescapeURLComponent( |
326 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 326 value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
327 | 327 |
328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); | 328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
329 } | 329 } |
330 | 330 |
331 } // namespace | 331 } // namespace |
332 | 332 |
333 namespace net { | 333 namespace net { |
334 | 334 |
335 HttpContentDisposition::HttpContentDisposition( | 335 HttpContentDisposition::HttpContentDisposition( |
336 const std::string& header, const std::string& referrer_charset) | 336 const std::string& header, |
337 : type_(INLINE), | 337 const std::string& referrer_charset) |
338 parse_result_flags_(INVALID) { | 338 : type_(INLINE), parse_result_flags_(INVALID) { |
339 Parse(header, referrer_charset); | 339 Parse(header, referrer_charset); |
340 } | 340 } |
341 | 341 |
342 HttpContentDisposition::~HttpContentDisposition() { | 342 HttpContentDisposition::~HttpContentDisposition() { |
343 } | 343 } |
344 | 344 |
345 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 345 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
346 std::string::const_iterator begin, std::string::const_iterator end) { | 346 std::string::const_iterator begin, |
| 347 std::string::const_iterator end) { |
347 DCHECK(type_ == INLINE); | 348 DCHECK(type_ == INLINE); |
348 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 349 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
349 | 350 |
350 std::string::const_iterator type_begin = begin; | 351 std::string::const_iterator type_begin = begin; |
351 std::string::const_iterator type_end = delimiter; | 352 std::string::const_iterator type_end = delimiter; |
352 HttpUtil::TrimLWS(&type_begin, &type_end); | 353 HttpUtil::TrimLWS(&type_begin, &type_end); |
353 | 354 |
354 // If the disposition-type isn't a valid token the then the | 355 // If the disposition-type isn't a valid token the then the |
355 // Content-Disposition header is malformed, and we treat the first bytes as | 356 // Content-Disposition header is malformed, and we treat the first bytes as |
356 // a parameter rather than a disposition-type. | 357 // a parameter rather than a disposition-type. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
398 std::string::const_iterator pos = header.begin(); | 399 std::string::const_iterator pos = header.begin(); |
399 std::string::const_iterator end = header.end(); | 400 std::string::const_iterator end = header.end(); |
400 pos = ConsumeDispositionType(pos, end); | 401 pos = ConsumeDispositionType(pos, end); |
401 | 402 |
402 std::string name; | 403 std::string name; |
403 std::string filename; | 404 std::string filename; |
404 std::string ext_filename; | 405 std::string ext_filename; |
405 | 406 |
406 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 407 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
407 while (iter.GetNext()) { | 408 while (iter.GetNext()) { |
408 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 409 if (filename.empty() && |
409 iter.name_end(), | 410 LowerCaseEqualsASCII(iter.name_begin(), iter.name_end(), "filename")) { |
410 "filename")) { | 411 DecodeFilenameValue( |
411 DecodeFilenameValue(iter.value(), referrer_charset, &filename, | 412 iter.value(), referrer_charset, &filename, &parse_result_flags_); |
412 &parse_result_flags_); | |
413 if (!filename.empty()) | 413 if (!filename.empty()) |
414 parse_result_flags_ |= HAS_FILENAME; | 414 parse_result_flags_ |= HAS_FILENAME; |
415 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 415 } else if (name.empty() && |
416 iter.name_end(), | 416 LowerCaseEqualsASCII( |
417 "name")) { | 417 iter.name_begin(), iter.name_end(), "name")) { |
418 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); | 418 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
419 if (!name.empty()) | 419 if (!name.empty()) |
420 parse_result_flags_ |= HAS_NAME; | 420 parse_result_flags_ |= HAS_NAME; |
421 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 421 } else if (ext_filename.empty() && |
422 iter.name_end(), | 422 LowerCaseEqualsASCII( |
423 "filename*")) { | 423 iter.name_begin(), iter.name_end(), "filename*")) { |
424 DecodeExtValue(iter.raw_value(), &ext_filename); | 424 DecodeExtValue(iter.raw_value(), &ext_filename); |
425 if (!ext_filename.empty()) | 425 if (!ext_filename.empty()) |
426 parse_result_flags_ |= HAS_EXT_FILENAME; | 426 parse_result_flags_ |= HAS_EXT_FILENAME; |
427 } | 427 } |
428 } | 428 } |
429 | 429 |
430 if (!ext_filename.empty()) | 430 if (!ext_filename.empty()) |
431 filename_ = ext_filename; | 431 filename_ = ext_filename; |
432 else if (!filename.empty()) | 432 else if (!filename.empty()) |
433 filename_ = filename; | 433 filename_ = filename; |
434 else | 434 else |
435 filename_ = name; | 435 filename_ = name; |
436 } | 436 } |
437 | 437 |
438 } // namespace net | 438 } // namespace net |
OLD | NEW |