Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(299)

Side by Side Diff: net/base/net_util.cc

Issue 4435001: Add support for the extended header parameter syntax in Content-Disposition h... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <map> 8 #include <map>
9 #include <unicode/regex.h> 9 #include <unicode/regex.h>
10 #include <unicode/ucnv.h> 10 #include <unicode/ucnv.h>
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 } 362 }
363 // encoded_word ending prematurelly with '?' or extra '?' 363 // encoded_word ending prematurelly with '?' or extra '?'
364 *is_rfc2047 = false; 364 *is_rfc2047 = false;
365 return false; 365 return false;
366 } 366 }
367 367
368 // We're not handling 'especial' characters quoted with '\', but 368 // We're not handling 'especial' characters quoted with '\', but
369 // it should be Ok because we're not an email client but a 369 // it should be Ok because we're not an email client but a
370 // web browser. 370 // web browser.
371 371
372 // What IE6/7 does: %-escaped UTF-8. We could extend this to 372 // What IE6/7 does: %-escaped UTF-8.
373 // support a rudimentary form of RFC 2231 with charset label, but
374 // it'd gain us little in terms of compatibility.
375 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); 373 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);
376 if (IsStringUTF8(tmp)) { 374 if (IsStringUTF8(tmp)) {
377 output->swap(tmp); 375 output->swap(tmp);
378 return true; 376 return true;
379 // We can try either the OS default charset or 'origin charset' here, 377 // We can try either the OS default charset or 'origin charset' here,
380 // As far as I can tell, IE does not support it. However, I've seen 378 // As far as I can tell, IE does not support it. However, I've seen
381 // web servers emit %-escaped string in a legacy encoding (usually 379 // web servers emit %-escaped string in a legacy encoding (usually
382 // origin charset). 380 // origin charset).
383 // TODO(jungshik) : Test IE further and consider adding a fallback here. 381 // TODO(jungshik) : Test IE further and consider adding a fallback here.
384 } 382 }
(...skipping 28 matching lines...) Expand all
413 return false; 411 return false;
414 tmp.append(decoded); 412 tmp.append(decoded);
415 } 413 }
416 output->swap(tmp); 414 output->swap(tmp);
417 return true; 415 return true;
418 } 416 }
419 417
420 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm 418 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
421 // sure this doesn't properly handle all (most?) cases. 419 // sure this doesn't properly handle all (most?) cases.
422 template<typename STR> 420 template<typename STR>
423 STR GetHeaderParamValueT(const STR& header, const STR& param_name) { 421 STR GetHeaderParamValueT(const STR& header, const STR& param_name,
422 QuoteRule::Type quote_rule) {
424 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". 423 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
425 typename STR::const_iterator param_begin = 424 typename STR::const_iterator param_begin =
426 search(header.begin(), header.end(), param_name.begin(), param_name.end(), 425 search(header.begin(), header.end(), param_name.begin(), param_name.end(),
427 CaseInsensitiveCompareASCII<typename STR::value_type>()); 426 CaseInsensitiveCompareASCII<typename STR::value_type>());
428 427
429 if (param_begin == header.end()) 428 if (param_begin == header.end())
430 return STR(); 429 return STR();
431 param_begin += param_name.length(); 430 param_begin += param_name.length();
432 431
433 STR whitespace; 432 STR whitespace;
434 whitespace.push_back(' '); 433 whitespace.push_back(' ');
435 whitespace.push_back('\t'); 434 whitespace.push_back('\t');
436 const typename STR::size_type equals_offset = 435 const typename STR::size_type equals_offset =
437 header.find_first_not_of(whitespace, param_begin - header.begin()); 436 header.find_first_not_of(whitespace, param_begin - header.begin());
438 if (equals_offset == STR::npos || header.at(equals_offset) != '=') 437 if (equals_offset == STR::npos || header.at(equals_offset) != '=')
439 return STR(); 438 return STR();
440 439
441 param_begin = header.begin() + equals_offset + 1; 440 param_begin = header.begin() + equals_offset + 1;
442 if (param_begin == header.end()) 441 if (param_begin == header.end())
443 return STR(); 442 return STR();
444 443
445 typename STR::const_iterator param_end; 444 typename STR::const_iterator param_end;
446 if (*param_begin == '"') { 445 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
447 param_end = find(param_begin+1, header.end(), '"'); 446 param_end = find(param_begin+1, header.end(), '"');
448 if (param_end == header.end()) 447 if (param_end == header.end())
449 return STR(); // poorly formatted param? 448 return STR(); // poorly formatted param?
450 449
451 ++param_begin; // skip past the quote. 450 ++param_begin; // skip past the quote.
452 } else { 451 } else {
453 param_end = find(param_begin+1, header.end(), ';'); 452 param_end = find(param_begin+1, header.end(), ';');
454 } 453 }
455 454
456 return STR(param_begin, param_end); 455 return STR(param_begin, param_end);
(...skipping 627 matching lines...) Expand 10 before | Expand all | Expand 10 after
1084 std::wstring GetSpecificHeader(const std::wstring& headers, 1083 std::wstring GetSpecificHeader(const std::wstring& headers,
1085 const std::wstring& name) { 1084 const std::wstring& name) {
1086 return GetSpecificHeaderT(headers, name); 1085 return GetSpecificHeaderT(headers, name);
1087 } 1086 }
1088 1087
1089 std::string GetSpecificHeader(const std::string& headers, 1088 std::string GetSpecificHeader(const std::string& headers,
1090 const std::string& name) { 1089 const std::string& name) {
1091 return GetSpecificHeaderT(headers, name); 1090 return GetSpecificHeaderT(headers, name);
1092 } 1091 }
1093 1092
1093 bool DecodeCharset(const std::string& input,
1094 std::string* decoded_charset,
1095 std::string* value) {
1096 StringTokenizer t(input, "'");
1097 t.set_options(StringTokenizer::RETURN_DELIMS);
1098 std::string temp_charset;
1099 std::string temp_value;
1100 int numDelimsSeen = 0;
1101 while (t.GetNext()) {
1102 if (t.token_is_delim()) {
1103 ++numDelimsSeen;
1104 continue;
1105 } else {
1106 switch (numDelimsSeen) {
1107 case 0:
1108 temp_charset = t.token();
1109 break;
1110 case 1:
1111 // Language is ignored.
1112 break;
1113 case 2:
1114 temp_value = t.token();
1115 break;
1116 default:
1117 return false;
1118 }
1119 }
1120 }
1121 if (numDelimsSeen != 2)
1122 return false;
1123 if (temp_charset.empty() || temp_value.empty())
1124 return false;
1125 decoded_charset->swap(temp_charset);
1126 value->swap(temp_value);
1127 return true;
1128 }
1129
1094 std::string GetFileNameFromCD(const std::string& header, 1130 std::string GetFileNameFromCD(const std::string& header,
1095 const std::string& referrer_charset) { 1131 const std::string& referrer_charset) {
1096 std::string param_value = GetHeaderParamValue(header, "filename"); 1132 std::string decoded;
1133 std::string param_value = GetHeaderParamValue(header, "filename*",
1134 QuoteRule::KEEP_OUTER_QUOTES);
1135 if (!param_value.empty()) {
1136 if (param_value.find('"') == std::string::npos) {
1137 std::string charset;
1138 std::string value;
1139 if (DecodeCharset(param_value, &charset, &value)) {
1140 // RFC 5987 value should be ASCII-only.
1141 if (!IsStringASCII(value))
1142 return std::string();
1143 std::string tmp = UnescapeURLComponent(value, UnescapeRule::SPACES);
1144 if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded))
1145 return decoded;
1146 }
1147 }
1148 }
1149 param_value = GetHeaderParamValue(header, "filename",
1150 QuoteRule::REMOVE_OUTER_QUOTES);
1097 if (param_value.empty()) { 1151 if (param_value.empty()) {
1098 // Some servers use 'name' parameter. 1152 // Some servers use 'name' parameter.
1099 param_value = GetHeaderParamValue(header, "name"); 1153 param_value = GetHeaderParamValue(header, "name",
1154 QuoteRule::REMOVE_OUTER_QUOTES);
1100 } 1155 }
1101 if (param_value.empty()) 1156 if (param_value.empty())
1102 return std::string(); 1157 return std::string();
1103 std::string decoded;
1104 if (DecodeParamValue(param_value, referrer_charset, &decoded)) 1158 if (DecodeParamValue(param_value, referrer_charset, &decoded))
1105 return decoded; 1159 return decoded;
1106 return std::string(); 1160 return std::string();
1107 } 1161 }
1108 1162
1109 std::wstring GetHeaderParamValue(const std::wstring& field, 1163 std::wstring GetHeaderParamValue(const std::wstring& field,
1110 const std::wstring& param_name) { 1164 const std::wstring& param_name,
1111 return GetHeaderParamValueT(field, param_name); 1165 QuoteRule::Type quote_rule) {
1166 return GetHeaderParamValueT(field, param_name, quote_rule);
1112 } 1167 }
1113 1168
1114 std::string GetHeaderParamValue(const std::string& field, 1169 std::string GetHeaderParamValue(const std::string& field,
1115 const std::string& param_name) { 1170 const std::string& param_name,
1116 return GetHeaderParamValueT(field, param_name); 1171 QuoteRule::Type quote_rule) {
1172 return GetHeaderParamValueT(field, param_name, quote_rule);
1117 } 1173 }
1118 1174
1119 // TODO(brettw) bug 734373: check the scripts for each host component and 1175 // TODO(brettw) bug 734373: check the scripts for each host component and
1120 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for 1176 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
1121 // scripts that the user has installed. For now, just put the entire 1177 // scripts that the user has installed. For now, just put the entire
1122 // path through IDN. Maybe this feature can be implemented in ICU itself? 1178 // path through IDN. Maybe this feature can be implemented in ICU itself?
1123 // 1179 //
1124 // We may want to skip this step in the case of file URLs to allow unicode 1180 // We may want to skip this step in the case of file URLs to allow unicode
1125 // UNC hostnames regardless of encodings. 1181 // UNC hostnames regardless of encodings.
1126 std::wstring IDNToUnicode(const char* host, 1182 std::wstring IDNToUnicode(const char* host,
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after
1990 } 2046 }
1991 2047
1992 int GetPortFromAddrinfo(const struct addrinfo* info) { 2048 int GetPortFromAddrinfo(const struct addrinfo* info) {
1993 uint16* port_field = GetPortFieldFromAddrinfo(info); 2049 uint16* port_field = GetPortFieldFromAddrinfo(info);
1994 if (!port_field) 2050 if (!port_field)
1995 return -1; 2051 return -1;
1996 return ntohs(*port_field); 2052 return ntohs(*port_field);
1997 } 2053 }
1998 2054
1999 } // namespace net 2055 } // namespace net
OLDNEW
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698