OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
6 | 6 |
7 #include "base/base64.h" | 7 #include "base/base64.h" |
8 #include "base/i18n/icu_string_conversions.h" | 8 #include "base/i18n/icu_string_conversions.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
90 ucnv_close(converter); | 90 ucnv_close(converter); |
91 if (U_FAILURE(err)) | 91 if (U_FAILURE(err)) |
92 return false; | 92 return false; |
93 output->resize(output_length); | 93 output->resize(output_length); |
94 return true; | 94 return true; |
95 } | 95 } |
96 | 96 |
97 bool DecodeWord(const std::string& encoded_word, | 97 bool DecodeWord(const std::string& encoded_word, |
98 const std::string& referrer_charset, | 98 const std::string& referrer_charset, |
99 bool* is_rfc2047, | 99 bool* is_rfc2047, |
100 std::string* output) { | 100 std::string* output, |
| 101 HttpContentDisposition::ParseResult* parse_result) { |
101 *is_rfc2047 = false; | 102 *is_rfc2047 = false; |
102 output->clear(); | 103 output->clear(); |
103 if (encoded_word.empty()) | 104 if (encoded_word.empty()) |
104 return true; | 105 return true; |
105 | 106 |
106 if (!IsStringASCII(encoded_word)) { | 107 if (!IsStringASCII(encoded_word)) { |
107 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 108 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
108 if (IsStringUTF8(encoded_word)) { | 109 if (IsStringUTF8(encoded_word)) { |
109 *output = encoded_word; | 110 *output = encoded_word; |
110 } else { | 111 } else { |
111 string16 utf16_output; | 112 string16 utf16_output; |
112 if (!referrer_charset.empty() && | 113 if (!referrer_charset.empty() && |
113 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 114 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
114 base::OnStringConversionError::FAIL, | 115 base::OnStringConversionError::FAIL, |
115 &utf16_output)) { | 116 &utf16_output)) { |
116 *output = UTF16ToUTF8(utf16_output); | 117 *output = UTF16ToUTF8(utf16_output); |
117 } else { | 118 } else { |
118 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 119 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
119 } | 120 } |
120 } | 121 } |
121 | 122 |
| 123 parse_result->has_non_ascii_strings = true; |
122 return true; | 124 return true; |
123 } | 125 } |
124 | 126 |
125 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 127 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
126 // widely used by web servers. | 128 // widely used by web servers. |
127 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. | 129 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. |
128 // We don't care about the length restriction (72 bytes) because | 130 // We don't care about the length restriction (72 bytes) because |
129 // many web servers generate encoded words longer than the limit. | 131 // many web servers generate encoded words longer than the limit. |
130 std::string tmp; | 132 std::string decoded_word; |
131 *is_rfc2047 = true; | 133 *is_rfc2047 = true; |
132 int part_index = 0; | 134 int part_index = 0; |
133 std::string charset; | 135 std::string charset; |
134 StringTokenizer t(encoded_word, "?"); | 136 StringTokenizer t(encoded_word, "?"); |
135 RFC2047EncodingType enc_type = Q_ENCODING; | 137 RFC2047EncodingType enc_type = Q_ENCODING; |
136 while (*is_rfc2047 && t.GetNext()) { | 138 while (*is_rfc2047 && t.GetNext()) { |
137 std::string part = t.token(); | 139 std::string part = t.token(); |
138 switch (part_index) { | 140 switch (part_index) { |
139 case 0: | 141 case 0: |
140 if (part != "=") { | 142 if (part != "=") { |
(...skipping 12 matching lines...) Expand all Loading... |
153 part.find_first_of("bBqQ") == std::string::npos) { | 155 part.find_first_of("bBqQ") == std::string::npos) { |
154 *is_rfc2047 = false; | 156 *is_rfc2047 = false; |
155 break; | 157 break; |
156 } | 158 } |
157 if (part[0] == 'b' || part[0] == 'B') { | 159 if (part[0] == 'b' || part[0] == 'B') { |
158 enc_type = B_ENCODING; | 160 enc_type = B_ENCODING; |
159 } | 161 } |
160 ++part_index; | 162 ++part_index; |
161 break; | 163 break; |
162 case 3: | 164 case 3: |
163 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); | 165 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); |
164 if (!*is_rfc2047) { | 166 if (!*is_rfc2047) { |
165 // Last minute failure. Invalid B/Q encoding. Rather than | 167 // Last minute failure. Invalid B/Q encoding. Rather than |
166 // passing it through, return now. | 168 // passing it through, return now. |
167 return false; | 169 return false; |
168 } | 170 } |
169 ++part_index; | 171 ++part_index; |
170 break; | 172 break; |
171 case 4: | 173 case 4: |
172 if (part != "=") { | 174 if (part != "=") { |
173 // Another last minute failure ! | 175 // Another last minute failure ! |
174 // Likely to be a case of two encoded-words in a row or | 176 // Likely to be a case of two encoded-words in a row or |
175 // an encoded word followed by a non-encoded word. We can be | 177 // an encoded word followed by a non-encoded word. We can be |
176 // generous, but it does not help much in terms of compatibility, | 178 // generous, but it does not help much in terms of compatibility, |
177 // I believe. Return immediately. | 179 // I believe. Return immediately. |
178 *is_rfc2047 = false; | 180 *is_rfc2047 = false; |
179 return false; | 181 return false; |
180 } | 182 } |
181 ++part_index; | 183 ++part_index; |
182 break; | 184 break; |
183 default: | 185 default: |
184 *is_rfc2047 = false; | 186 *is_rfc2047 = false; |
185 return false; | 187 return false; |
186 } | 188 } |
187 } | 189 } |
188 | 190 |
189 if (*is_rfc2047) { | 191 if (*is_rfc2047) { |
190 if (*(encoded_word.end() - 1) == '=') { | 192 if (*(encoded_word.end() - 1) == '=') { |
191 output->swap(tmp); | 193 output->swap(decoded_word); |
| 194 parse_result->has_rfc2047_encoded_strings = true; |
192 return true; | 195 return true; |
193 } | 196 } |
194 // encoded_word ending prematurelly with '?' or extra '?' | 197 // encoded_word ending prematurelly with '?' or extra '?' |
195 *is_rfc2047 = false; | 198 *is_rfc2047 = false; |
196 return false; | 199 return false; |
197 } | 200 } |
198 | 201 |
199 // We're not handling 'especial' characters quoted with '\', but | 202 // We're not handling 'especial' characters quoted with '\', but |
200 // it should be Ok because we're not an email client but a | 203 // it should be Ok because we're not an email client but a |
201 // web browser. | 204 // web browser. |
202 | 205 |
203 // What IE6/7 does: %-escaped UTF-8. | 206 // What IE6/7 does: %-escaped UTF-8. |
204 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); | 207 decoded_word = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); |
205 if (IsStringUTF8(tmp)) { | 208 if (decoded_word != encoded_word) |
206 output->swap(tmp); | 209 parse_result->has_percent_encoded_strings = true; |
| 210 if (IsStringUTF8(decoded_word)) { |
| 211 output->swap(decoded_word); |
207 return true; | 212 return true; |
208 // We can try either the OS default charset or 'origin charset' here, | 213 // We can try either the OS default charset or 'origin charset' here, |
209 // As far as I can tell, IE does not support it. However, I've seen | 214 // As far as I can tell, IE does not support it. However, I've seen |
210 // web servers emit %-escaped string in a legacy encoding (usually | 215 // web servers emit %-escaped string in a legacy encoding (usually |
211 // origin charset). | 216 // origin charset). |
212 // TODO(jungshik) : Test IE further and consider adding a fallback here. | 217 // TODO(jungshik) : Test IE further and consider adding a fallback here. |
213 } | 218 } |
214 return false; | 219 return false; |
215 } | 220 } |
216 | 221 |
217 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The | 222 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The |
218 // value is supposed to be of the form: | 223 // value is supposed to be of the form: |
219 // | 224 // |
220 // value = token | quoted-string | 225 // value = token | quoted-string |
221 // | 226 // |
222 // However we currently also allow RFC 2047 encoding and non-ASCII | 227 // However we currently also allow RFC 2047 encoding and non-ASCII |
223 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. | 228 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. |
224 bool DecodeFilenameValue(const std::string& input, | 229 bool DecodeFilenameValue(const std::string& input, |
225 const std::string& referrer_charset, | 230 const std::string& referrer_charset, |
226 std::string* output) { | 231 std::string* output, |
227 std::string tmp; | 232 HttpContentDisposition::ParseResult* parse_result) { |
| 233 HttpContentDisposition::ParseResult current_parse_result; |
| 234 std::string decoded_value; |
| 235 bool is_previous_token_rfc2047 = true; |
| 236 |
228 // Tokenize with whitespace characters. | 237 // Tokenize with whitespace characters. |
229 StringTokenizer t(input, " \t\n\r"); | 238 StringTokenizer t(input, " \t\n\r"); |
230 t.set_options(StringTokenizer::RETURN_DELIMS); | 239 t.set_options(StringTokenizer::RETURN_DELIMS); |
231 bool is_previous_token_rfc2047 = true; | |
232 while (t.GetNext()) { | 240 while (t.GetNext()) { |
233 if (t.token_is_delim()) { | 241 if (t.token_is_delim()) { |
234 // If the previous non-delimeter token is not RFC2047-encoded, | 242 // If the previous non-delimeter token is not RFC2047-encoded, |
235 // put in a space in its place. Otheriwse, skip over it. | 243 // put in a space in its place. Otheriwse, skip over it. |
236 if (!is_previous_token_rfc2047) { | 244 if (!is_previous_token_rfc2047) |
237 tmp.push_back(' '); | 245 decoded_value.push_back(' '); |
238 } | |
239 continue; | 246 continue; |
240 } | 247 } |
241 // We don't support a single multibyte character split into | 248 // We don't support a single multibyte character split into |
242 // adjacent encoded words. Some broken mail clients emit headers | 249 // adjacent encoded words. Some broken mail clients emit headers |
243 // with that problem, but most web servers usually encode a filename | 250 // with that problem, but most web servers usually encode a filename |
244 // in a single encoded-word. Firefox/Thunderbird do not support | 251 // in a single encoded-word. Firefox/Thunderbird do not support |
245 // it, either. | 252 // it, either. |
246 std::string decoded; | 253 std::string decoded; |
247 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 254 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
248 &decoded)) | 255 &decoded, ¤t_parse_result)) |
249 return false; | 256 return false; |
250 tmp.append(decoded); | 257 decoded_value.append(decoded); |
251 } | 258 } |
252 output->swap(tmp); | 259 output->swap(decoded_value); |
| 260 if (parse_result) { |
| 261 parse_result->has_non_ascii_strings = |
| 262 current_parse_result.has_non_ascii_strings; |
| 263 parse_result->has_percent_encoded_strings = |
| 264 current_parse_result.has_percent_encoded_strings; |
| 265 parse_result->has_rfc2047_encoded_strings = |
| 266 current_parse_result.has_rfc2047_encoded_strings; |
| 267 } |
253 return true; | 268 return true; |
254 } | 269 } |
255 | 270 |
256 // Parses the charset and value-chars out of an ext-value string. | 271 // Parses the charset and value-chars out of an ext-value string. |
257 // | 272 // |
258 // ext-value = charset "'" [ language ] "'" value-chars | 273 // ext-value = charset "'" [ language ] "'" value-chars |
259 bool ParseExtValueComponents(const std::string& input, | 274 bool ParseExtValueComponents(const std::string& input, |
260 std::string* charset, | 275 std::string* charset, |
261 std::string* value_chars) { | 276 std::string* value_chars) { |
262 StringTokenizer t(input, "'"); | 277 StringTokenizer t(input, "'"); |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 } | 345 } |
331 | 346 |
332 std::string unescaped = UnescapeURLComponent(value, | 347 std::string unescaped = UnescapeURLComponent(value, |
333 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); | 348 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
334 | 349 |
335 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 350 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); |
336 } | 351 } |
337 | 352 |
338 } // namespace | 353 } // namespace |
339 | 354 |
| 355 HttpContentDisposition::ParseResult::ParseResult() |
| 356 : has_disposition_type(false), |
| 357 has_unknown_disposition_type(false), |
| 358 has_name(false), |
| 359 has_filename(false), |
| 360 has_ext_filename(false), |
| 361 has_non_ascii_strings(false), |
| 362 has_percent_encoded_strings(false), |
| 363 has_rfc2047_encoded_strings(false) { |
| 364 } |
| 365 |
340 HttpContentDisposition::HttpContentDisposition( | 366 HttpContentDisposition::HttpContentDisposition( |
341 const std::string& header, const std::string& referrer_charset) | 367 const std::string& header, const std::string& referrer_charset) |
342 : type_(INLINE) { | 368 : type_(INLINE) { |
343 Parse(header, referrer_charset); | 369 Parse(header, referrer_charset); |
344 } | 370 } |
345 | 371 |
346 HttpContentDisposition::~HttpContentDisposition() { | 372 HttpContentDisposition::~HttpContentDisposition() { |
347 } | 373 } |
348 | 374 |
349 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 375 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
350 std::string::const_iterator begin, std::string::const_iterator end) { | 376 std::string::const_iterator begin, std::string::const_iterator end) { |
351 DCHECK(type_ == INLINE); | 377 DCHECK(type_ == INLINE); |
352 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 378 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
353 | 379 |
354 std::string::const_iterator type_begin = begin; | 380 std::string::const_iterator type_begin = begin; |
355 std::string::const_iterator type_end = delimiter; | 381 std::string::const_iterator type_end = delimiter; |
356 HttpUtil::TrimLWS(&type_begin, &type_end); | 382 HttpUtil::TrimLWS(&type_begin, &type_end); |
357 | 383 |
358 // If the disposition-type isn't a valid token the then the | 384 // If the disposition-type isn't a valid token the then the |
359 // Content-Disposition header is malformed, and we treat the first bytes as | 385 // Content-Disposition header is malformed, and we treat the first bytes as |
360 // a parameter rather than a disposition-type. | 386 // a parameter rather than a disposition-type. |
361 if (!HttpUtil::IsToken(type_begin, type_end)) | 387 if (!HttpUtil::IsToken(type_begin, type_end)) |
362 return begin; | 388 return begin; |
363 | 389 |
| 390 parse_result_.has_disposition_type = true; |
| 391 |
364 DCHECK(std::find(type_begin, type_end, '=') == type_end); | 392 DCHECK(std::find(type_begin, type_end, '=') == type_end); |
365 | 393 |
366 if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) | 394 if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { |
| 395 type_ = INLINE; |
| 396 } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { |
367 type_ = ATTACHMENT; | 397 type_ = ATTACHMENT; |
| 398 } else { |
| 399 parse_result_.has_unknown_disposition_type = true; |
| 400 type_ = ATTACHMENT; |
| 401 } |
368 return delimiter; | 402 return delimiter; |
369 } | 403 } |
370 | 404 |
371 // http://tools.ietf.org/html/rfc6266 | 405 // http://tools.ietf.org/html/rfc6266 |
372 // | 406 // |
373 // content-disposition = "Content-Disposition" ":" | 407 // content-disposition = "Content-Disposition" ":" |
374 // disposition-type *( ";" disposition-parm ) | 408 // disposition-type *( ";" disposition-parm ) |
375 // | 409 // |
376 // disposition-type = "inline" | "attachment" | disp-ext-type | 410 // disposition-type = "inline" | "attachment" | disp-ext-type |
377 // ; case-insensitive | 411 // ; case-insensitive |
(...skipping 19 matching lines...) Expand all Loading... |
397 | 431 |
398 std::string name; | 432 std::string name; |
399 std::string filename; | 433 std::string filename; |
400 std::string ext_filename; | 434 std::string ext_filename; |
401 | 435 |
402 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 436 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
403 while (iter.GetNext()) { | 437 while (iter.GetNext()) { |
404 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 438 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
405 iter.name_end(), | 439 iter.name_end(), |
406 "filename")) { | 440 "filename")) { |
407 DecodeFilenameValue(iter.value(), referrer_charset, &filename); | 441 parse_result_.has_filename = |
| 442 DecodeFilenameValue(iter.value(), referrer_charset, &filename, |
| 443 &parse_result_); |
408 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 444 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
409 iter.name_end(), | 445 iter.name_end(), |
410 "name")) { | 446 "name")) { |
411 DecodeFilenameValue(iter.value(), referrer_charset, &name); | 447 parse_result_.has_name = |
| 448 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
412 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 449 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
413 iter.name_end(), | 450 iter.name_end(), |
414 "filename*")) { | 451 "filename*")) { |
415 DecodeExtValue(iter.raw_value(), &ext_filename); | 452 parse_result_.has_ext_filename = |
| 453 DecodeExtValue(iter.raw_value(), &ext_filename); |
416 } | 454 } |
417 } | 455 } |
418 | 456 |
419 if (!ext_filename.empty()) | 457 if (!ext_filename.empty()) |
420 filename_ = ext_filename; | 458 filename_ = ext_filename; |
421 else if (!filename.empty()) | 459 else if (!filename.empty()) |
422 filename_ = filename; | 460 filename_ = filename; |
423 else | 461 else |
424 filename_ = name; | 462 filename_ = name; |
425 } | 463 } |
426 | 464 |
427 } // namespace net | 465 } // namespace net |
OLD | NEW |