OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <map> | 8 #include <map> |
9 #include <unicode/regex.h> | 9 #include <unicode/regex.h> |
10 #include <unicode/ucnv.h> | 10 #include <unicode/ucnv.h> |
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
362 } | 362 } |
363 // encoded_word ending prematurelly with '?' or extra '?' | 363 // encoded_word ending prematurelly with '?' or extra '?' |
364 *is_rfc2047 = false; | 364 *is_rfc2047 = false; |
365 return false; | 365 return false; |
366 } | 366 } |
367 | 367 |
368 // We're not handling 'especial' characters quoted with '\', but | 368 // We're not handling 'especial' characters quoted with '\', but |
369 // it should be Ok because we're not an email client but a | 369 // it should be Ok because we're not an email client but a |
370 // web browser. | 370 // web browser. |
371 | 371 |
372 // What IE6/7 does: %-escaped UTF-8. We could extend this to | 372 // What IE6/7 does: %-escaped UTF-8. |
373 // support a rudimentary form of RFC 2231 with charset label, but | |
374 // it'd gain us little in terms of compatibility. | |
375 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); | 373 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); |
376 if (IsStringUTF8(tmp)) { | 374 if (IsStringUTF8(tmp)) { |
377 output->swap(tmp); | 375 output->swap(tmp); |
378 return true; | 376 return true; |
379 // We can try either the OS default charset or 'origin charset' here, | 377 // We can try either the OS default charset or 'origin charset' here, |
380 // As far as I can tell, IE does not support it. However, I've seen | 378 // As far as I can tell, IE does not support it. However, I've seen |
381 // web servers emit %-escaped string in a legacy encoding (usually | 379 // web servers emit %-escaped string in a legacy encoding (usually |
382 // origin charset). | 380 // origin charset). |
383 // TODO(jungshik) : Test IE further and consider adding a fallback here. | 381 // TODO(jungshik) : Test IE further and consider adding a fallback here. |
384 } | 382 } |
(...skipping 28 matching lines...) Expand all Loading... |
413 return false; | 411 return false; |
414 tmp.append(decoded); | 412 tmp.append(decoded); |
415 } | 413 } |
416 output->swap(tmp); | 414 output->swap(tmp); |
417 return true; | 415 return true; |
418 } | 416 } |
419 | 417 |
420 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm | 418 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
421 // sure this doesn't properly handle all (most?) cases. | 419 // sure this doesn't properly handle all (most?) cases. |
422 template<typename STR> | 420 template<typename STR> |
423 STR GetHeaderParamValueT(const STR& header, const STR& param_name) { | 421 STR GetHeaderParamValueT(const STR& header, const STR& param_name, |
| 422 QuoteRule::Type quote_rule) { |
424 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | 423 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". |
425 typename STR::const_iterator param_begin = | 424 typename STR::const_iterator param_begin = |
426 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | 425 search(header.begin(), header.end(), param_name.begin(), param_name.end(), |
427 CaseInsensitiveCompareASCII<typename STR::value_type>()); | 426 CaseInsensitiveCompareASCII<typename STR::value_type>()); |
428 | 427 |
429 if (param_begin == header.end()) | 428 if (param_begin == header.end()) |
430 return STR(); | 429 return STR(); |
431 param_begin += param_name.length(); | 430 param_begin += param_name.length(); |
432 | 431 |
433 STR whitespace; | 432 STR whitespace; |
434 whitespace.push_back(' '); | 433 whitespace.push_back(' '); |
435 whitespace.push_back('\t'); | 434 whitespace.push_back('\t'); |
436 const typename STR::size_type equals_offset = | 435 const typename STR::size_type equals_offset = |
437 header.find_first_not_of(whitespace, param_begin - header.begin()); | 436 header.find_first_not_of(whitespace, param_begin - header.begin()); |
438 if (equals_offset == STR::npos || header.at(equals_offset) != '=') | 437 if (equals_offset == STR::npos || header.at(equals_offset) != '=') |
439 return STR(); | 438 return STR(); |
440 | 439 |
441 param_begin = header.begin() + equals_offset + 1; | 440 param_begin = header.begin() + equals_offset + 1; |
442 if (param_begin == header.end()) | 441 if (param_begin == header.end()) |
443 return STR(); | 442 return STR(); |
444 | 443 |
445 typename STR::const_iterator param_end; | 444 typename STR::const_iterator param_end; |
446 if (*param_begin == '"') { | 445 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { |
447 param_end = find(param_begin+1, header.end(), '"'); | 446 param_end = find(param_begin+1, header.end(), '"'); |
448 if (param_end == header.end()) | 447 if (param_end == header.end()) |
449 return STR(); // poorly formatted param? | 448 return STR(); // poorly formatted param? |
450 | 449 |
451 ++param_begin; // skip past the quote. | 450 ++param_begin; // skip past the quote. |
452 } else { | 451 } else { |
453 param_end = find(param_begin+1, header.end(), ';'); | 452 param_end = find(param_begin+1, header.end(), ';'); |
454 } | 453 } |
455 | 454 |
456 return STR(param_begin, param_end); | 455 return STR(param_begin, param_end); |
(...skipping 627 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1084 std::wstring GetSpecificHeader(const std::wstring& headers, | 1083 std::wstring GetSpecificHeader(const std::wstring& headers, |
1085 const std::wstring& name) { | 1084 const std::wstring& name) { |
1086 return GetSpecificHeaderT(headers, name); | 1085 return GetSpecificHeaderT(headers, name); |
1087 } | 1086 } |
1088 | 1087 |
1089 std::string GetSpecificHeader(const std::string& headers, | 1088 std::string GetSpecificHeader(const std::string& headers, |
1090 const std::string& name) { | 1089 const std::string& name) { |
1091 return GetSpecificHeaderT(headers, name); | 1090 return GetSpecificHeaderT(headers, name); |
1092 } | 1091 } |
1093 | 1092 |
| 1093 bool DecodeCharset(const std::string& input, |
| 1094 std::string* decoded_charset, |
| 1095 std::string* value) { |
| 1096 StringTokenizer t(input, "'"); |
| 1097 t.set_options(StringTokenizer::RETURN_DELIMS); |
| 1098 std::string temp_charset; |
| 1099 std::string temp_value; |
| 1100 int numDelimsSeen = 0; |
| 1101 while (t.GetNext()) { |
| 1102 if (t.token_is_delim()) { |
| 1103 ++numDelimsSeen; |
| 1104 continue; |
| 1105 } else { |
| 1106 switch (numDelimsSeen) { |
| 1107 case 0: |
| 1108 temp_charset = t.token(); |
| 1109 break; |
| 1110 case 1: |
| 1111 // Language is ignored. |
| 1112 break; |
| 1113 case 2: |
| 1114 temp_value = t.token(); |
| 1115 break; |
| 1116 default: |
| 1117 return false; |
| 1118 } |
| 1119 } |
| 1120 } |
| 1121 if (numDelimsSeen != 2) |
| 1122 return false; |
| 1123 if (temp_charset.empty() || temp_value.empty()) |
| 1124 return false; |
| 1125 decoded_charset->swap(temp_charset); |
| 1126 value->swap(temp_value); |
| 1127 return true; |
| 1128 } |
| 1129 |
1094 std::string GetFileNameFromCD(const std::string& header, | 1130 std::string GetFileNameFromCD(const std::string& header, |
1095 const std::string& referrer_charset) { | 1131 const std::string& referrer_charset) { |
1096 std::string param_value = GetHeaderParamValue(header, "filename"); | 1132 std::string decoded; |
| 1133 std::string param_value = GetHeaderParamValue(header, "filename*", |
| 1134 QuoteRule::KEEP_OUTER_QUOTES); |
| 1135 if (!param_value.empty()) { |
| 1136 if (param_value.find('"') == std::string::npos) { |
| 1137 std::string charset; |
| 1138 std::string value; |
| 1139 if (DecodeCharset(param_value, &charset, &value)) { |
| 1140 // RFC 5987 value should be ASCII-only. |
| 1141 if (!IsStringASCII(value)) |
| 1142 return std::string(); |
| 1143 std::string tmp = UnescapeURLComponent(value, UnescapeRule::SPACES); |
| 1144 if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded)) |
| 1145 return decoded; |
| 1146 } |
| 1147 } |
| 1148 } |
| 1149 param_value = GetHeaderParamValue(header, "filename", |
| 1150 QuoteRule::REMOVE_OUTER_QUOTES); |
1097 if (param_value.empty()) { | 1151 if (param_value.empty()) { |
1098 // Some servers use 'name' parameter. | 1152 // Some servers use 'name' parameter. |
1099 param_value = GetHeaderParamValue(header, "name"); | 1153 param_value = GetHeaderParamValue(header, "name", |
| 1154 QuoteRule::REMOVE_OUTER_QUOTES); |
1100 } | 1155 } |
1101 if (param_value.empty()) | 1156 if (param_value.empty()) |
1102 return std::string(); | 1157 return std::string(); |
1103 std::string decoded; | |
1104 if (DecodeParamValue(param_value, referrer_charset, &decoded)) | 1158 if (DecodeParamValue(param_value, referrer_charset, &decoded)) |
1105 return decoded; | 1159 return decoded; |
1106 return std::string(); | 1160 return std::string(); |
1107 } | 1161 } |
1108 | 1162 |
1109 std::wstring GetHeaderParamValue(const std::wstring& field, | 1163 std::wstring GetHeaderParamValue(const std::wstring& field, |
1110 const std::wstring& param_name) { | 1164 const std::wstring& param_name, |
1111 return GetHeaderParamValueT(field, param_name); | 1165 QuoteRule::Type quote_rule) { |
| 1166 return GetHeaderParamValueT(field, param_name, quote_rule); |
1112 } | 1167 } |
1113 | 1168 |
1114 std::string GetHeaderParamValue(const std::string& field, | 1169 std::string GetHeaderParamValue(const std::string& field, |
1115 const std::string& param_name) { | 1170 const std::string& param_name, |
1116 return GetHeaderParamValueT(field, param_name); | 1171 QuoteRule::Type quote_rule) { |
| 1172 return GetHeaderParamValueT(field, param_name, quote_rule); |
1117 } | 1173 } |
1118 | 1174 |
1119 // TODO(brettw) bug 734373: check the scripts for each host component and | 1175 // TODO(brettw) bug 734373: check the scripts for each host component and |
1120 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | 1176 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
1121 // scripts that the user has installed. For now, just put the entire | 1177 // scripts that the user has installed. For now, just put the entire |
1122 // path through IDN. Maybe this feature can be implemented in ICU itself? | 1178 // path through IDN. Maybe this feature can be implemented in ICU itself? |
1123 // | 1179 // |
1124 // We may want to skip this step in the case of file URLs to allow unicode | 1180 // We may want to skip this step in the case of file URLs to allow unicode |
1125 // UNC hostnames regardless of encodings. | 1181 // UNC hostnames regardless of encodings. |
1126 std::wstring IDNToUnicode(const char* host, | 1182 std::wstring IDNToUnicode(const char* host, |
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1990 } | 2046 } |
1991 | 2047 |
1992 int GetPortFromAddrinfo(const struct addrinfo* info) { | 2048 int GetPortFromAddrinfo(const struct addrinfo* info) { |
1993 uint16* port_field = GetPortFieldFromAddrinfo(info); | 2049 uint16* port_field = GetPortFieldFromAddrinfo(info); |
1994 if (!port_field) | 2050 if (!port_field) |
1995 return -1; | 2051 return -1; |
1996 return ntohs(*port_field); | 2052 return ntohs(*port_field); |
1997 } | 2053 } |
1998 | 2054 |
1999 } // namespace net | 2055 } // namespace net |
OLD | NEW |