net/http/http_content_disposition.cc - Issue 11478034: Add UMA for measuring Content-Dispostion header use and abuse.

Unified Diff: net/http/http_content_disposition.cc

Issue 11478034: Add UMA for measuring Content-Dispostion header use and abuse. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/http/http_content_disposition.cc

diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc

index cd691ad56cbee54404f86899f607430a76eae72b..631f11f19fe12054f96731d9c432ead5adba87e2 100644

--- a/net/http/http_content_disposition.cc

+++ b/net/http/http_content_disposition.cc

@@ -97,7 +97,8 @@ bool DecodeBQEncoding(const std::string& part,

bool DecodeWord(const std::string& encoded_word,

const std::string& referrer_charset,

bool* is_rfc2047,

- std::string* output) {

+ std::string* output,

+ HttpContentDisposition::ParseResult* parse_result) {

*is_rfc2047 = false;

output->clear();

if (encoded_word.empty())

@@ -119,6 +120,7 @@ bool DecodeWord(const std::string& encoded_word,

}

+ parse_result->has_non_ascii_strings = true;

return true;

}

@@ -127,7 +129,7 @@ bool DecodeWord(const std::string& encoded_word,

// =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

// We don't care about the length restriction (72 bytes) because

// many web servers generate encoded words longer than the limit.

- std::string tmp;

+ std::string decoded_word;

*is_rfc2047 = true;

int part_index = 0;

std::string charset;

@@ -160,7 +162,7 @@ bool DecodeWord(const std::string& encoded_word,

++part_index;

break;

case 3:

- *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);

+ *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);

if (!*is_rfc2047) {

// Last minute failure. Invalid B/Q encoding. Rather than

// passing it through, return now.

@@ -188,7 +190,8 @@ bool DecodeWord(const std::string& encoded_word,

if (*is_rfc2047) {

if (*(encoded_word.end() - 1) == '=') {

- output->swap(tmp);

+ output->swap(decoded_word);

+ parse_result->has_rfc2047_encoded_strings = true;

return true;

}

// encoded_word ending prematurelly with '?' or extra '?'

@@ -201,9 +204,11 @@ bool DecodeWord(const std::string& encoded_word,

// web browser.

// What IE6/7 does: %-escaped UTF-8.

- tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);

- if (IsStringUTF8(tmp)) {

- output->swap(tmp);

+ decoded_word = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);

+ if (decoded_word != encoded_word)

+ parse_result->has_percent_encoded_strings = true;

+ if (IsStringUTF8(decoded_word)) {

+ output->swap(decoded_word);

return true;

// We can try either the OS default charset or 'origin charset' here,

// As far as I can tell, IE does not support it. However, I've seen

@@ -223,19 +228,21 @@ bool DecodeWord(const std::string& encoded_word,

// strings. Non-ASCII strings are interpreted based on |referrer_charset|.

bool DecodeFilenameValue(const std::string& input,

const std::string& referrer_charset,

- std::string* output) {

- std::string tmp;

+ std::string* output,

+ HttpContentDisposition::ParseResult* parse_result) {

+ HttpContentDisposition::ParseResult current_parse_result;

+ std::string decoded_value;

+ bool is_previous_token_rfc2047 = true;

// Tokenize with whitespace characters.

StringTokenizer t(input, " \t\n\r");

t.set_options(StringTokenizer::RETURN_DELIMS);

- bool is_previous_token_rfc2047 = true;

while (t.GetNext()) {

if (t.token_is_delim()) {

// If the previous non-delimeter token is not RFC2047-encoded,

// put in a space in its place. Otheriwse, skip over it.

- if (!is_previous_token_rfc2047) {

- tmp.push_back(' ');

- }

+ if (!is_previous_token_rfc2047)

+ decoded_value.push_back(' ');

continue;

}

// We don't support a single multibyte character split into

@@ -245,11 +252,19 @@ bool DecodeFilenameValue(const std::string& input,

// it, either.

std::string decoded;

if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

- &decoded))

+ &decoded, &current_parse_result))

return false;

- tmp.append(decoded);

+ decoded_value.append(decoded);

+ }

+ output->swap(decoded_value);

+ if (parse_result) {

+ parse_result->has_non_ascii_strings =

+ current_parse_result.has_non_ascii_strings;

+ parse_result->has_percent_encoded_strings =

+ current_parse_result.has_percent_encoded_strings;

+ parse_result->has_rfc2047_encoded_strings =

+ current_parse_result.has_rfc2047_encoded_strings;

}

- output->swap(tmp);

return true;

}

@@ -337,6 +352,17 @@ bool DecodeExtValue(const std::string& param_value, std::string* decoded) {

} // namespace

+HttpContentDisposition::ParseResult::ParseResult()

+ : has_disposition_type(false),

+ has_unknown_disposition_type(false),

+ has_name(false),

+ has_filename(false),

+ has_ext_filename(false),

+ has_non_ascii_strings(false),

+ has_percent_encoded_strings(false),

+ has_rfc2047_encoded_strings(false) {

HttpContentDisposition::HttpContentDisposition(

const std::string& header, const std::string& referrer_charset)

: type_(INLINE) {

@@ -361,10 +387,18 @@ std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(

if (!HttpUtil::IsToken(type_begin, type_end))

return begin;

+ parse_result_.has_disposition_type = true;

DCHECK(std::find(type_begin, type_end, '=') == type_end);

- if (!LowerCaseEqualsASCII(type_begin, type_end, "inline"))

+ if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) {

+ type_ = INLINE;

+ } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) {

type_ = ATTACHMENT;

+ } else {

+ parse_result_.has_unknown_disposition_type = true;

+ type_ = ATTACHMENT;

+ }

return delimiter;

}

@@ -404,15 +438,19 @@ void HttpContentDisposition::Parse(const std::string& header,

if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"filename")) {

- DecodeFilenameValue(iter.value(), referrer_charset, &filename);

+ parse_result_.has_filename =

+ DecodeFilenameValue(iter.value(), referrer_charset, &filename,

+ &parse_result_);

} else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"name")) {

- DecodeFilenameValue(iter.value(), referrer_charset, &name);

+ parse_result_.has_name =

+ DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL);

} else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

iter.name_end(),

"filename*")) {

- DecodeExtValue(iter.raw_value(), &ext_filename);

+ parse_result_.has_ext_filename =

+ DecodeExtValue(iter.raw_value(), &ext_filename);

}

« content/browser/download/download_stats.cc ('K') | « net/http/http_content_disposition.h ('k') | net/http/http_content_disposition_unittest.cc » ('j') | no next file with comments »