net/base/net_util.cc - Issue 4435001: Add support for the extended header parameter syntax in Content-Disposition h...

Side by Side Diff: net/base/net_util.cc

Issue 4435001: Add support for the extended header parameter syntax in Content-Disposition h... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <map>	8 #include <map>

9 #include <unicode/regex.h>	9 #include <unicode/regex.h>

10 #include <unicode/ucnv.h>	10 #include <unicode/ucnv.h>

(...skipping 351 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
362 }	362 }

363 // encoded_word ending prematurelly with '?' or extra '?'	363 // encoded_word ending prematurelly with '?' or extra '?'

364 *is_rfc2047 = false;	364 *is_rfc2047 = false;

365 return false;	365 return false;

366 }	366 }

367	367

368 // We're not handling 'especial' characters quoted with '\', but	368 // We're not handling 'especial' characters quoted with '\', but

369 // it should be Ok because we're not an email client but a	369 // it should be Ok because we're not an email client but a

370 // web browser.	370 // web browser.

371	371

372 // What IE6/7 does: %-escaped UTF-8. We could extend this to	372 // What IE6/7 does: %-escaped UTF-8.

373 // support a rudimentary form of RFC 2231 with charset label, but

374 // it'd gain us little in terms of compatibility.

375 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);	373 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);

376 if (IsStringUTF8(tmp)) {	374 if (IsStringUTF8(tmp)) {

377 output->swap(tmp);	375 output->swap(tmp);

378 return true;	376 return true;

379 // We can try either the OS default charset or 'origin charset' here,	377 // We can try either the OS default charset or 'origin charset' here,

380 // As far as I can tell, IE does not support it. However, I've seen	378 // As far as I can tell, IE does not support it. However, I've seen

381 // web servers emit %-escaped string in a legacy encoding (usually	379 // web servers emit %-escaped string in a legacy encoding (usually

382 // origin charset).	380 // origin charset).

383 // TODO(jungshik) : Test IE further and consider adding a fallback here.	381 // TODO(jungshik) : Test IE further and consider adding a fallback here.

384 }	382 }

(...skipping 28 matching lines...) Expand all Loading...
413 return false;	411 return false;

414 tmp.append(decoded);	412 tmp.append(decoded);

415 }	413 }

416 output->swap(tmp);	414 output->swap(tmp);

417 return true;	415 return true;

418 }	416 }

419	417

420 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm	418 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

421 // sure this doesn't properly handle all (most?) cases.	419 // sure this doesn't properly handle all (most?) cases.

422 template<typename STR>	420 template<typename STR>

423 STR GetHeaderParamValueT(const STR& header, const STR& param_name) {	421 STR GetHeaderParamValueT(const STR& header, const STR& param_name,

	422 QuoteRule::Type quote_rule) {

424 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".	423 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".

425 typename STR::const_iterator param_begin =	424 typename STR::const_iterator param_begin =

426 search(header.begin(), header.end(), param_name.begin(), param_name.end(),	425 search(header.begin(), header.end(), param_name.begin(), param_name.end(),

427 CaseInsensitiveCompareASCII<typename STR::value_type>());	426 CaseInsensitiveCompareASCII<typename STR::value_type>());

428	427

429 if (param_begin == header.end())	428 if (param_begin == header.end())

430 return STR();	429 return STR();

431 param_begin += param_name.length();	430 param_begin += param_name.length();

432	431

433 STR whitespace;	432 STR whitespace;

434 whitespace.push_back(' ');	433 whitespace.push_back(' ');

435 whitespace.push_back('\t');	434 whitespace.push_back('\t');

436 const typename STR::size_type equals_offset =	435 const typename STR::size_type equals_offset =

437 header.find_first_not_of(whitespace, param_begin - header.begin());	436 header.find_first_not_of(whitespace, param_begin - header.begin());

438 if (equals_offset == STR::npos \|\| header.at(equals_offset) != '=')	437 if (equals_offset == STR::npos \|\| header.at(equals_offset) != '=')

439 return STR();	438 return STR();

440	439

441 param_begin = header.begin() + equals_offset + 1;	440 param_begin = header.begin() + equals_offset + 1;

442 if (param_begin == header.end())	441 if (param_begin == header.end())

443 return STR();	442 return STR();

444	443

445 typename STR::const_iterator param_end;	444 typename STR::const_iterator param_end;

446 if (*param_begin == '"') {	445 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {

447 param_end = find(param_begin+1, header.end(), '"');	446 param_end = find(param_begin+1, header.end(), '"');

448 if (param_end == header.end())	447 if (param_end == header.end())

449 return STR(); // poorly formatted param?	448 return STR(); // poorly formatted param?

450	449

451 ++param_begin; // skip past the quote.	450 ++param_begin; // skip past the quote.

452 } else {	451 } else {

453 param_end = find(param_begin+1, header.end(), ';');	452 param_end = find(param_begin+1, header.end(), ';');

454 }	453 }

455	454

456 return STR(param_begin, param_end);	455 return STR(param_begin, param_end);

(...skipping 627 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1084 std::wstring GetSpecificHeader(const std::wstring& headers,	1083 std::wstring GetSpecificHeader(const std::wstring& headers,

1085 const std::wstring& name) {	1084 const std::wstring& name) {

1086 return GetSpecificHeaderT(headers, name);	1085 return GetSpecificHeaderT(headers, name);

1087 }	1086 }

1088	1087

1089 std::string GetSpecificHeader(const std::string& headers,	1088 std::string GetSpecificHeader(const std::string& headers,

1090 const std::string& name) {	1089 const std::string& name) {

1091 return GetSpecificHeaderT(headers, name);	1090 return GetSpecificHeaderT(headers, name);

1092 }	1091 }

1093	1092

	1093 bool DecodeCharset(const std::string& input,

	1094 std::string* decoded_charset,

	1095 std::string* value) {

	1096 StringTokenizer t(input, "'");

	1097 t.set_options(StringTokenizer::RETURN_DELIMS);

	1098 std::string temp_charset;

	1099 std::string temp_value;

	1100 int numDelimsSeen = 0;

	1101 while (t.GetNext()) {

	1102 if (t.token_is_delim()) {

	1103 ++numDelimsSeen;

	1104 continue;

	1105 } else {

	1106 switch (numDelimsSeen) {

	1107 case 0:

	1108 temp_charset = t.token();

	1109 break;

	1110 case 1:

	1111 // Language is ignored.

	1112 break;

	1113 case 2:

	1114 temp_value = t.token();

	1115 break;

	1116 default:

	1117 return false;

	1118 }

	1119 }

	1120 }

	1121 if (numDelimsSeen != 2)

	1122 return false;

	1123 if (temp_charset.empty() \|\| temp_value.empty())

	1124 return false;

	1125 decoded_charset->swap(temp_charset);

	1126 value->swap(temp_value);

	1127 return true;

	1128 }

	1129

1094 std::string GetFileNameFromCD(const std::string& header,	1130 std::string GetFileNameFromCD(const std::string& header,

1095 const std::string& referrer_charset) {	1131 const std::string& referrer_charset) {

1096 std::string param_value = GetHeaderParamValue(header, "filename");	1132 std::string decoded;

	1133 std::string param_value = GetHeaderParamValue(header, "filename*",

	1134 QuoteRule::KEEP_OUTER_QUOTES);

	1135 if (!param_value.empty()) {

	1136 if (param_value.find('"') == std::string::npos) {

	1137 std::string charset;

	1138 std::string value;

	1139 if (DecodeCharset(param_value, &charset, &value)) {

	1140 // RFC 5987 value should be ASCII-only.

	1141 if (!IsStringASCII(value))

	1142 return std::string();

	1143 std::string tmp = UnescapeURLComponent(value, UnescapeRule::SPACES);

	1144 if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded))

	1145 return decoded;

	1146 }

	1147 }

	1148 }

	1149 param_value = GetHeaderParamValue(header, "filename",

	1150 QuoteRule::REMOVE_OUTER_QUOTES);

1097 if (param_value.empty()) {	1151 if (param_value.empty()) {

1098 // Some servers use 'name' parameter.	1152 // Some servers use 'name' parameter.

1099 param_value = GetHeaderParamValue(header, "name");	1153 param_value = GetHeaderParamValue(header, "name",

	1154 QuoteRule::REMOVE_OUTER_QUOTES);

1100 }	1155 }

1101 if (param_value.empty())	1156 if (param_value.empty())

1102 return std::string();	1157 return std::string();

1103 std::string decoded;

1104 if (DecodeParamValue(param_value, referrer_charset, &decoded))	1158 if (DecodeParamValue(param_value, referrer_charset, &decoded))

1105 return decoded;	1159 return decoded;

1106 return std::string();	1160 return std::string();

1107 }	1161 }

1108	1162

1109 std::wstring GetHeaderParamValue(const std::wstring& field,	1163 std::wstring GetHeaderParamValue(const std::wstring& field,

1110 const std::wstring& param_name) {	1164 const std::wstring& param_name,

1111 return GetHeaderParamValueT(field, param_name);	1165 QuoteRule::Type quote_rule) {

	1166 return GetHeaderParamValueT(field, param_name, quote_rule);

1112 }	1167 }

1113	1168

1114 std::string GetHeaderParamValue(const std::string& field,	1169 std::string GetHeaderParamValue(const std::string& field,

1115 const std::string& param_name) {	1170 const std::string& param_name,

1116 return GetHeaderParamValueT(field, param_name);	1171 QuoteRule::Type quote_rule) {

	1172 return GetHeaderParamValueT(field, param_name, quote_rule);

1117 }	1173 }

1118	1174

1119 // TODO(brettw) bug 734373: check the scripts for each host component and	1175 // TODO(brettw) bug 734373: check the scripts for each host component and

1120 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for	1176 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

1121 // scripts that the user has installed. For now, just put the entire	1177 // scripts that the user has installed. For now, just put the entire

1122 // path through IDN. Maybe this feature can be implemented in ICU itself?	1178 // path through IDN. Maybe this feature can be implemented in ICU itself?

1123 //	1179 //

1124 // We may want to skip this step in the case of file URLs to allow unicode	1180 // We may want to skip this step in the case of file URLs to allow unicode

1125 // UNC hostnames regardless of encodings.	1181 // UNC hostnames regardless of encodings.

1126 std::wstring IDNToUnicode(const char* host,	1182 std::wstring IDNToUnicode(const char* host,

(...skipping 863 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1990 }	2046 }

1991	2047

1992 int GetPortFromAddrinfo(const struct addrinfo* info) {	2048 int GetPortFromAddrinfo(const struct addrinfo* info) {

1993 uint16* port_field = GetPortFieldFromAddrinfo(info);	2049 uint16* port_field = GetPortFieldFromAddrinfo(info);

1994 if (!port_field)	2050 if (!port_field)

1995 return -1;	2051 return -1;

1996 return ntohs(*port_field);	2052 return ntohs(*port_field);

1997 }	2053 }

1998	2054

1999 } // namespace net	2055 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »