net/http/http_content_disposition.cc - Issue 11478034: Add UMA for measuring Content-Dispostion header use and abuse.

Unified Diff: net/http/http_content_disposition.cc

Issue 11478034: Add UMA for measuring Content-Dispostion header use and abuse. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Only measure valid C-D headers Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« net/http/http_content_disposition.h ('K') | « net/http/http_content_disposition.h ('k') | net/http/http_content_disposition_unittest.cc » ('j') | net/http/http_content_disposition_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: net/http/http_content_disposition.cc

diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc

index 0726e93ee477b1a1ab3bf6a50b97fe49c773a664..8b76ba4cb82ce57510f8d0680e5819b1cfd4465a 100644

--- a/net/http/http_content_disposition.cc

+++ b/net/http/http_content_disposition.cc

@@ -95,7 +95,8 @@ bool DecodeBQEncoding(const std::string& part,

bool DecodeWord(const std::string& encoded_word,

const std::string& referrer_charset,

bool* is_rfc2047,

- std::string* output) {

+ std::string* output,

+ net::HttpContentDisposition::ParseResult* parse_result) {

*is_rfc2047 = false;

output->clear();

if (encoded_word.empty())

@@ -117,6 +118,7 @@ bool DecodeWord(const std::string& encoded_word,

}

+ parse_result->has_non_ascii_strings = true;

return true;

}

@@ -125,7 +127,7 @@ bool DecodeWord(const std::string& encoded_word,

// =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

// We don't care about the length restriction (72 bytes) because

// many web servers generate encoded words longer than the limit.

- std::string tmp;

+ std::string decoded_word;

*is_rfc2047 = true;

int part_index = 0;

std::string charset;

@@ -158,7 +160,7 @@ bool DecodeWord(const std::string& encoded_word,

++part_index;

break;

case 3:

- *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);

+ *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);

if (!*is_rfc2047) {

// Last minute failure. Invalid B/Q encoding. Rather than

// passing it through, return now.

@@ -186,7 +188,8 @@ bool DecodeWord(const std::string& encoded_word,

if (*is_rfc2047) {

if (*(encoded_word.end() - 1) == '=') {

- output->swap(tmp);

+ output->swap(decoded_word);

+ parse_result->has_rfc2047_encoded_strings = true;

return true;

}

// encoded_word ending prematurelly with '?' or extra '?'

@@ -199,9 +202,12 @@ bool DecodeWord(const std::string& encoded_word,

// web browser.

// What IE6/7 does: %-escaped UTF-8.

- tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES);

- if (IsStringUTF8(tmp)) {

- output->swap(tmp);

+ decoded_word = net::UnescapeURLComponent(encoded_word,

+ net::UnescapeRule::SPACES);

+ if (decoded_word != encoded_word)

+ parse_result->has_percent_encoded_strings = true;

+ if (IsStringUTF8(decoded_word)) {

+ output->swap(decoded_word);

return true;

// We can try either the OS default charset or 'origin charset' here,

// As far as I can tell, IE does not support it. However, I've seen

@@ -219,21 +225,24 @@ bool DecodeWord(const std::string& encoded_word,

// However we currently also allow RFC 2047 encoding and non-ASCII

// strings. Non-ASCII strings are interpreted based on |referrer_charset|.

-bool DecodeFilenameValue(const std::string& input,

- const std::string& referrer_charset,

- std::string* output) {

- std::string tmp;

+bool DecodeFilenameValue(

+ const std::string& input,

+ const std::string& referrer_charset,

+ std::string* output,

+ net::HttpContentDisposition::ParseResult* parse_result) {

+ net::HttpContentDisposition::ParseResult current_parse_result;

+ std::string decoded_value;

+ bool is_previous_token_rfc2047 = true;

// Tokenize with whitespace characters.

StringTokenizer t(input, " \t\n\r");

t.set_options(StringTokenizer::RETURN_DELIMS);

- bool is_previous_token_rfc2047 = true;

while (t.GetNext()) {

if (t.token_is_delim()) {

// If the previous non-delimeter token is not RFC2047-encoded,

// put in a space in its place. Otheriwse, skip over it.

- if (!is_previous_token_rfc2047) {

- tmp.push_back(' ');

- }

+ if (!is_previous_token_rfc2047)

+ decoded_value.push_back(' ');

continue;

}

// We don't support a single multibyte character split into

@@ -243,11 +252,19 @@ bool DecodeFilenameValue(const std::string& input,

// it, either.

std::string decoded;

if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

- &decoded))

+ &decoded, &current_parse_result))

return false;

- tmp.append(decoded);

+ decoded_value.append(decoded);

+ }

+ output->swap(decoded_value);

+ if (parse_result && !output->empty()) {

+ parse_result->has_non_ascii_strings =

+ current_parse_result.has_non_ascii_strings;

+ parse_result->has_percent_encoded_strings =

+ current_parse_result.has_percent_encoded_strings;

+ parse_result->has_rfc2047_encoded_strings =

+ current_parse_result.has_rfc2047_encoded_strings;

}

- output->swap(tmp);

return true;

}

@@ -337,6 +354,17 @@ bool DecodeExtValue(const std::string& param_value, std::string* decoded) {

namespace net {

+HttpContentDisposition::ParseResult::ParseResult()

+ : has_disposition_type(false),

+ has_unknown_disposition_type(false),

+ has_name(false),

+ has_filename(false),

+ has_ext_filename(false),

+ has_non_ascii_strings(false),

+ has_percent_encoded_strings(false),

+ has_rfc2047_encoded_strings(false) {

HttpContentDisposition::HttpContentDisposition(

const std::string& header, const std::string& referrer_charset)

: type_(INLINE) {

@@ -361,10 +389,18 @@ std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(

if (!HttpUtil::IsToken(type_begin, type_end))

return begin;

+ parse_result_.has_disposition_type = true;

DCHECK(std::find(type_begin, type_end, '=') == type_end);

- if (!LowerCaseEqualsASCII(type_begin, type_end, "inline"))

+ if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) {

+ type_ = INLINE;

+ } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) {

type_ = ATTACHMENT;

+ } else {

+ parse_result_.has_unknown_disposition_type = true;

+ type_ = ATTACHMENT;

+ }

return delimiter;

}

@@ -404,15 +440,19 @@ void HttpContentDisposition::Parse(const std::string& header,

if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"filename")) {

- DecodeFilenameValue(iter.value(), referrer_charset, &filename);

+ DecodeFilenameValue(iter.value(), referrer_charset, &filename,

+ &parse_result_);

+ parse_result_.has_filename = !filename.empty();

} else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"name")) {

- DecodeFilenameValue(iter.value(), referrer_charset, &name);

+ DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL);

+ parse_result_.has_name = !name.empty();

} else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"filename*")) {

DecodeExtValue(iter.raw_value(), &ext_filename);

+ parse_result_.has_ext_filename = !ext_filename.empty();

}