net/base/net_util.cc - Issue 2733005: Download filename encoding fix [try2]:

Side by Side Diff: net/base/net_util.cc

Issue 2733005: Download filename encoding fix [try2]: (Closed) Base URL: http://src.chromium.org/git/chromium.git

Patch Set: add test Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <map>	8 #include <map>

9 #include <unicode/ucnv.h>	9 #include <unicode/ucnv.h>

10 #include <unicode/uidna.h>	10 #include <unicode/uidna.h>

(...skipping 254 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
265 return false;	265 return false;

266 }	266 }

267 output->resize(length);	267 output->resize(length);

268 return true;	268 return true;

269 }	269 }

270	270

271 bool DecodeWord(const std::string& encoded_word,	271 bool DecodeWord(const std::string& encoded_word,

272 const std::string& referrer_charset,	272 const std::string& referrer_charset,

273 bool* is_rfc2047,	273 bool* is_rfc2047,

274 std::string* output) {	274 std::string* output) {

	275 *is_rfc2047 = false;

	276 output->clear();

	277 if (encoded_word.empty())

	278 return true;

	279

275 if (!IsStringASCII(encoded_word)) {	280 if (!IsStringASCII(encoded_word)) {

276 // Try UTF-8, referrer_charset and the native OS default charset in turn.	281 // Try UTF-8, referrer_charset and the native OS default charset in turn.

277 if (IsStringUTF8(encoded_word)) {	282 if (IsStringUTF8(encoded_word)) {

278 *output = encoded_word;	283 *output = encoded_word;

279 } else {	284 } else {

280 std::wstring wide_output;	285 std::wstring wide_output;

281 if (!referrer_charset.empty() &&	286 if (!referrer_charset.empty() &&

282 base::CodepageToWide(encoded_word, referrer_charset.c_str(),	287 base::CodepageToWide(encoded_word, referrer_charset.c_str(),

283 base::OnStringConversionError::FAIL,	288 base::OnStringConversionError::FAIL,

284 &wide_output)) {	289 &wide_output)) {

285 *output = WideToUTF8(wide_output);	290 *output = WideToUTF8(wide_output);

286 } else {	291 } else {

287 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));	292 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));

288 }	293 }

289 }	294 }

290 *is_rfc2047 = false;	295

291 return true;	296 return true;

292 }	297 }

293	298

294 // RFC 2047 : one of encoding methods supported by Firefox and relatively	299 // RFC 2047 : one of encoding methods supported by Firefox and relatively

295 // widely used by web servers.	300 // widely used by web servers.

296 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.	301 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

297 // We don't care about the length restriction (72 bytes) because	302 // We don't care about the length restriction (72 bytes) because

298 // many web servers generate encoded words longer than the limit.	303 // many web servers generate encoded words longer than the limit.

299 std::string tmp;	304 std::string tmp;

300 *is_rfc2047 = true;	305 *is_rfc2047 = true;

(...skipping 773 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1074 FILE_PATH_LITERAL("download");	1079 FILE_PATH_LITERAL("download");

1075	1080

1076 // about: and data: URLs don't have file names, but esp. data: URLs may	1081 // about: and data: URLs don't have file names, but esp. data: URLs may

1077 // contain parts that look like ones (i.e., contain a slash).	1082 // contain parts that look like ones (i.e., contain a slash).

1078 // Therefore we don't attempt to divine a file name out of them.	1083 // Therefore we don't attempt to divine a file name out of them.

1079 if (url.SchemeIs("about") \|\| url.SchemeIs("data")) {	1084 if (url.SchemeIs("about") \|\| url.SchemeIs("data")) {

1080 return default_name.empty() ? FilePath(kFinalFallbackName) : default_name;	1085 return default_name.empty() ? FilePath(kFinalFallbackName) : default_name;

1081 }	1086 }

1082	1087

1083 const std::string filename_from_cd = GetFileNameFromCD(content_disposition,	1088 const std::string filename_from_cd = GetFileNameFromCD(content_disposition,

1084 referrer_charset);	1089 referrer_charset);

1085 #if defined(OS_WIN)	1090 #if defined(OS_WIN)

1086 FilePath::StringType filename = UTF8ToWide(filename_from_cd);	1091 FilePath::StringType filename = UTF8ToWide(filename_from_cd);

1087 #elif defined(OS_POSIX)	1092 #elif defined(OS_POSIX)

1088 FilePath::StringType filename = filename_from_cd;	1093 FilePath::StringType filename = filename_from_cd;

1089 #endif	1094 #endif

1090	1095

1091 if (!filename.empty()) {	1096 if (!filename.empty()) {

1092 // Remove any path information the server may have sent, take the name	1097 // Remove any path information the server may have sent, take the name

1093 // only.	1098 // only.

1094 filename = FilePath(filename).BaseName().value();	1099 filename = FilePath(filename).BaseName().value();

1095	1100

1096 // Next, remove "." from the beginning and end of the file name to avoid	1101 // Next, remove "." from the beginning and end of the file name to avoid

1097 // tricks with hidden files, "..", and "."	1102 // tricks with hidden files, "..", and "."

1098 TrimString(filename, FILE_PATH_LITERAL("."), &filename);	1103 TrimString(filename, FILE_PATH_LITERAL("."), &filename);

1099 }	1104 }

1100 if (filename.empty()) {	1105 if (filename.empty()) {

1101 if (url.is_valid()) {	1106 if (url.is_valid()) {

1102 const std::string unescaped_url_filename = UnescapeURLComponent(	1107 const std::string unescaped_url_filename = UnescapeURLComponent(

1103 url.ExtractFileName(),	1108 url.ExtractFileName(),

1104 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS);	1109 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS);

	1110

	1111 // The URL's path should be escaped UTF-8, but may not be.

	1112 std::string decoded_filename = unescaped_url_filename;

	1113 if (!IsStringASCII(decoded_filename)) {

	1114 bool ignore;

	1115 // TODO(jshin): this is probably not robust enough. To be sure, we

	1116 // need encoding detection.

	1117 DecodeWord(unescaped_url_filename, referrer_charset, &ignore,

	1118 &decoded_filename);

	1119 }

	1120

1105 #if defined(OS_WIN)	1121 #if defined(OS_WIN)

1106 filename = UTF8ToWide(unescaped_url_filename);	1122 filename = UTF8ToWide(decoded_filename);

1107 #elif defined(OS_POSIX)	1123 #elif defined(OS_POSIX)

1108 filename = unescaped_url_filename;	1124 filename = decoded_filename;

1109 #endif	1125 #endif

1110 }	1126 }

1111 }	1127 }

1112	1128

1113 #if defined(OS_WIN)	1129 #if defined(OS_WIN)

1114 { // Handle CreateFile() stripping trailing dots and spaces on filenames	1130 { // Handle CreateFile() stripping trailing dots and spaces on filenames

1115 // http://support.microsoft.com/kb/115827	1131 // http://support.microsoft.com/kb/115827

1116 std::string::size_type pos = filename.find_last_not_of(L" .");	1132 std::string::size_type pos = filename.find_last_not_of(L" .");

1117 if (pos == std::string::npos)	1133 if (pos == std::string::npos)

1118 filename.resize(0);	1134 filename.resize(0);

(...skipping 741 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1860 unsigned char mask = 0xFF << (8 - remaining_bits);	1876 unsigned char mask = 0xFF << (8 - remaining_bits);

1861 int i = num_entire_bytes_in_prefix;	1877 int i = num_entire_bytes_in_prefix;

1862 if ((ip_number[i] & mask) != (ip_prefix[i] & mask))	1878 if ((ip_number[i] & mask) != (ip_prefix[i] & mask))

1863 return false;	1879 return false;

1864 }	1880 }

1865	1881

1866 return true;	1882 return true;

1867 }	1883 }

1868	1884

1869 } // namespace net	1885 } // namespace net

OLD	NEW

« no previous file with comments | « chrome/browser/gtk/download_item_gtk.cc ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »