Index: net/http/http_content_disposition.cc |
diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc |
deleted file mode 100644 |
index 3a1dedeb788077aa7ec46f87c57dd0f1ca6c60d8..0000000000000000000000000000000000000000 |
--- a/net/http/http_content_disposition.cc |
+++ /dev/null |
@@ -1,438 +0,0 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "net/http/http_content_disposition.h" |
- |
-#include "base/base64.h" |
-#include "base/logging.h" |
-#include "base/strings/string_tokenizer.h" |
-#include "base/strings/string_util.h" |
-#include "base/strings/sys_string_conversions.h" |
-#include "base/strings/utf_string_conversions.h" |
-#include "net/base/net_string_util.h" |
-#include "net/base/net_util.h" |
-#include "net/http/http_util.h" |
- |
-namespace { |
- |
-enum RFC2047EncodingType { |
- Q_ENCODING, |
- B_ENCODING |
-}; |
- |
-// Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to |
-// decoding a quoted-printable string. Returns true if the input was valid. |
-bool DecodeQEncoding(const std::string& input, std::string* output) { |
- std::string temp; |
- temp.reserve(input.size()); |
- for (std::string::const_iterator it = input.begin(); it != input.end(); |
- ++it) { |
- if (*it == '_') { |
- temp.push_back(' '); |
- } else if (*it == '=') { |
- if ((input.end() - it < 3) || |
- !IsHexDigit(static_cast<unsigned char>(*(it + 1))) || |
- !IsHexDigit(static_cast<unsigned char>(*(it + 2)))) |
- return false; |
- unsigned char ch = HexDigitToInt(*(it + 1)) * 16 + |
- HexDigitToInt(*(it + 2)); |
- temp.push_back(static_cast<char>(ch)); |
- ++it; |
- ++it; |
- } else if (0x20 < *it && *it < 0x7F && *it != '?') { |
- // In a Q-encoded word, only printable ASCII characters |
- // represent themselves. Besides, space, '=', '_' and '?' are |
- // not allowed, but they're already filtered out. |
- DCHECK_NE('=', *it); |
- DCHECK_NE('?', *it); |
- DCHECK_NE('_', *it); |
- temp.push_back(*it); |
- } else { |
- return false; |
- } |
- } |
- output->swap(temp); |
- return true; |
-} |
- |
-// Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding |
-// type is specified in |enc_type|. |
-bool DecodeBQEncoding(const std::string& part, |
- RFC2047EncodingType enc_type, |
- const std::string& charset, |
- std::string* output) { |
- std::string decoded; |
- if (!((enc_type == B_ENCODING) ? |
- base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) { |
- return false; |
- } |
- |
- if (decoded.empty()) { |
- output->clear(); |
- return true; |
- } |
- |
- return net::ConvertToUtf8(decoded, charset.c_str(), output); |
-} |
- |
-bool DecodeWord(const std::string& encoded_word, |
- const std::string& referrer_charset, |
- bool* is_rfc2047, |
- std::string* output, |
- int* parse_result_flags) { |
- *is_rfc2047 = false; |
- output->clear(); |
- if (encoded_word.empty()) |
- return true; |
- |
- if (!base::IsStringASCII(encoded_word)) { |
- // Try UTF-8, referrer_charset and the native OS default charset in turn. |
- if (base::IsStringUTF8(encoded_word)) { |
- *output = encoded_word; |
- } else { |
- base::string16 utf16_output; |
- if (!referrer_charset.empty() && |
- net::ConvertToUTF16(encoded_word, referrer_charset.c_str(), |
- &utf16_output)) { |
- *output = base::UTF16ToUTF8(utf16_output); |
- } else { |
- *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
- } |
- } |
- |
- *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
- return true; |
- } |
- |
- // RFC 2047 : one of encoding methods supported by Firefox and relatively |
- // widely used by web servers. |
- // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. |
- // We don't care about the length restriction (72 bytes) because |
- // many web servers generate encoded words longer than the limit. |
- std::string decoded_word; |
- *is_rfc2047 = true; |
- int part_index = 0; |
- std::string charset; |
- base::StringTokenizer t(encoded_word, "?"); |
- RFC2047EncodingType enc_type = Q_ENCODING; |
- while (*is_rfc2047 && t.GetNext()) { |
- std::string part = t.token(); |
- switch (part_index) { |
- case 0: |
- if (part != "=") { |
- *is_rfc2047 = false; |
- break; |
- } |
- ++part_index; |
- break; |
- case 1: |
- // Do we need charset validity check here? |
- charset = part; |
- ++part_index; |
- break; |
- case 2: |
- if (part.size() > 1 || |
- part.find_first_of("bBqQ") == std::string::npos) { |
- *is_rfc2047 = false; |
- break; |
- } |
- if (part[0] == 'b' || part[0] == 'B') { |
- enc_type = B_ENCODING; |
- } |
- ++part_index; |
- break; |
- case 3: |
- *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); |
- if (!*is_rfc2047) { |
- // Last minute failure. Invalid B/Q encoding. Rather than |
- // passing it through, return now. |
- return false; |
- } |
- ++part_index; |
- break; |
- case 4: |
- if (part != "=") { |
- // Another last minute failure ! |
- // Likely to be a case of two encoded-words in a row or |
- // an encoded word followed by a non-encoded word. We can be |
- // generous, but it does not help much in terms of compatibility, |
- // I believe. Return immediately. |
- *is_rfc2047 = false; |
- return false; |
- } |
- ++part_index; |
- break; |
- default: |
- *is_rfc2047 = false; |
- return false; |
- } |
- } |
- |
- if (*is_rfc2047) { |
- if (*(encoded_word.end() - 1) == '=') { |
- output->swap(decoded_word); |
- *parse_result_flags |= |
- net::HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS; |
- return true; |
- } |
- // encoded_word ending prematurelly with '?' or extra '?' |
- *is_rfc2047 = false; |
- return false; |
- } |
- |
- // We're not handling 'especial' characters quoted with '\', but |
- // it should be Ok because we're not an email client but a |
- // web browser. |
- |
- // What IE6/7 does: %-escaped UTF-8. |
- decoded_word = net::UnescapeURLComponent(encoded_word, |
- net::UnescapeRule::SPACES); |
- if (decoded_word != encoded_word) |
- *parse_result_flags |= |
- net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; |
- if (base::IsStringUTF8(decoded_word)) { |
- output->swap(decoded_word); |
- return true; |
- // We can try either the OS default charset or 'origin charset' here, |
- // As far as I can tell, IE does not support it. However, I've seen |
- // web servers emit %-escaped string in a legacy encoding (usually |
- // origin charset). |
- // TODO(jungshik) : Test IE further and consider adding a fallback here. |
- } |
- return false; |
-} |
- |
-// Decodes the value of a 'filename' or 'name' parameter given as |input|. The |
-// value is supposed to be of the form: |
-// |
-// value = token | quoted-string |
-// |
-// However we currently also allow RFC 2047 encoding and non-ASCII |
-// strings. Non-ASCII strings are interpreted based on |referrer_charset|. |
-bool DecodeFilenameValue(const std::string& input, |
- const std::string& referrer_charset, |
- std::string* output, |
- int* parse_result_flags) { |
- int current_parse_result_flags = 0; |
- std::string decoded_value; |
- bool is_previous_token_rfc2047 = true; |
- |
- // Tokenize with whitespace characters. |
- base::StringTokenizer t(input, " \t\n\r"); |
- t.set_options(base::StringTokenizer::RETURN_DELIMS); |
- while (t.GetNext()) { |
- if (t.token_is_delim()) { |
- // If the previous non-delimeter token is not RFC2047-encoded, |
- // put in a space in its place. Otheriwse, skip over it. |
- if (!is_previous_token_rfc2047) |
- decoded_value.push_back(' '); |
- continue; |
- } |
- // We don't support a single multibyte character split into |
- // adjacent encoded words. Some broken mail clients emit headers |
- // with that problem, but most web servers usually encode a filename |
- // in a single encoded-word. Firefox/Thunderbird do not support |
- // it, either. |
- std::string decoded; |
- if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
- &decoded, ¤t_parse_result_flags)) |
- return false; |
- decoded_value.append(decoded); |
- } |
- output->swap(decoded_value); |
- if (parse_result_flags && !output->empty()) |
- *parse_result_flags |= current_parse_result_flags; |
- return true; |
-} |
- |
-// Parses the charset and value-chars out of an ext-value string. |
-// |
-// ext-value = charset "'" [ language ] "'" value-chars |
-bool ParseExtValueComponents(const std::string& input, |
- std::string* charset, |
- std::string* value_chars) { |
- base::StringTokenizer t(input, "'"); |
- t.set_options(base::StringTokenizer::RETURN_DELIMS); |
- std::string temp_charset; |
- std::string temp_value; |
- int numDelimsSeen = 0; |
- while (t.GetNext()) { |
- if (t.token_is_delim()) { |
- ++numDelimsSeen; |
- continue; |
- } else { |
- switch (numDelimsSeen) { |
- case 0: |
- temp_charset = t.token(); |
- break; |
- case 1: |
- // Language is ignored. |
- break; |
- case 2: |
- temp_value = t.token(); |
- break; |
- default: |
- return false; |
- } |
- } |
- } |
- if (numDelimsSeen != 2) |
- return false; |
- if (temp_charset.empty() || temp_value.empty()) |
- return false; |
- charset->swap(temp_charset); |
- value_chars->swap(temp_value); |
- return true; |
-} |
- |
-// http://tools.ietf.org/html/rfc5987#section-3.2 |
-// |
-// ext-value = charset "'" [ language ] "'" value-chars |
-// |
-// charset = "UTF-8" / "ISO-8859-1" / mime-charset |
-// |
-// mime-charset = 1*mime-charsetc |
-// mime-charsetc = ALPHA / DIGIT |
-// / "!" / "#" / "$" / "%" / "&" |
-// / "+" / "-" / "^" / "_" / "`" |
-// / "{" / "}" / "~" |
-// |
-// language = <Language-Tag, defined in [RFC5646], Section 2.1> |
-// |
-// value-chars = *( pct-encoded / attr-char ) |
-// |
-// pct-encoded = "%" HEXDIG HEXDIG |
-// |
-// attr-char = ALPHA / DIGIT |
-// / "!" / "#" / "$" / "&" / "+" / "-" / "." |
-// / "^" / "_" / "`" / "|" / "~" |
-bool DecodeExtValue(const std::string& param_value, std::string* decoded) { |
- if (param_value.find('"') != std::string::npos) |
- return false; |
- |
- std::string charset; |
- std::string value; |
- if (!ParseExtValueComponents(param_value, &charset, &value)) |
- return false; |
- |
- // RFC 5987 value should be ASCII-only. |
- if (!base::IsStringASCII(value)) { |
- decoded->clear(); |
- return true; |
- } |
- |
- std::string unescaped = net::UnescapeURLComponent( |
- value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
- |
- return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded); |
-} |
- |
-} // namespace |
- |
-namespace net { |
- |
-HttpContentDisposition::HttpContentDisposition( |
- const std::string& header, const std::string& referrer_charset) |
- : type_(INLINE), |
- parse_result_flags_(INVALID) { |
- Parse(header, referrer_charset); |
-} |
- |
-HttpContentDisposition::~HttpContentDisposition() { |
-} |
- |
-std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
- std::string::const_iterator begin, std::string::const_iterator end) { |
- DCHECK(type_ == INLINE); |
- std::string::const_iterator delimiter = std::find(begin, end, ';'); |
- |
- std::string::const_iterator type_begin = begin; |
- std::string::const_iterator type_end = delimiter; |
- HttpUtil::TrimLWS(&type_begin, &type_end); |
- |
- // If the disposition-type isn't a valid token the then the |
- // Content-Disposition header is malformed, and we treat the first bytes as |
- // a parameter rather than a disposition-type. |
- if (!HttpUtil::IsToken(type_begin, type_end)) |
- return begin; |
- |
- parse_result_flags_ |= HAS_DISPOSITION_TYPE; |
- |
- DCHECK(std::find(type_begin, type_end, '=') == type_end); |
- |
- if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { |
- type_ = INLINE; |
- } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { |
- type_ = ATTACHMENT; |
- } else { |
- parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE; |
- type_ = ATTACHMENT; |
- } |
- return delimiter; |
-} |
- |
-// http://tools.ietf.org/html/rfc6266 |
-// |
-// content-disposition = "Content-Disposition" ":" |
-// disposition-type *( ";" disposition-parm ) |
-// |
-// disposition-type = "inline" | "attachment" | disp-ext-type |
-// ; case-insensitive |
-// disp-ext-type = token |
-// |
-// disposition-parm = filename-parm | disp-ext-parm |
-// |
-// filename-parm = "filename" "=" value |
-// | "filename*" "=" ext-value |
-// |
-// disp-ext-parm = token "=" value |
-// | ext-token "=" ext-value |
-// ext-token = <the characters in token, followed by "*"> |
-// |
-void HttpContentDisposition::Parse(const std::string& header, |
- const std::string& referrer_charset) { |
- DCHECK(type_ == INLINE); |
- DCHECK(filename_.empty()); |
- |
- std::string::const_iterator pos = header.begin(); |
- std::string::const_iterator end = header.end(); |
- pos = ConsumeDispositionType(pos, end); |
- |
- std::string name; |
- std::string filename; |
- std::string ext_filename; |
- |
- HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
- while (iter.GetNext()) { |
- if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
- iter.name_end(), |
- "filename")) { |
- DecodeFilenameValue(iter.value(), referrer_charset, &filename, |
- &parse_result_flags_); |
- if (!filename.empty()) |
- parse_result_flags_ |= HAS_FILENAME; |
- } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
- iter.name_end(), |
- "name")) { |
- DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
- if (!name.empty()) |
- parse_result_flags_ |= HAS_NAME; |
- } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
- iter.name_end(), |
- "filename*")) { |
- DecodeExtValue(iter.raw_value(), &ext_filename); |
- if (!ext_filename.empty()) |
- parse_result_flags_ |= HAS_EXT_FILENAME; |
- } |
- } |
- |
- if (!ext_filename.empty()) |
- filename_ = ext_filename; |
- else if (!filename.empty()) |
- filename_ = filename; |
- else |
- filename_ = name; |
-} |
- |
-} // namespace net |