Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: net/base/net_util.cc

Issue 2733005: Download filename encoding fix [try2]: (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: add test Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/browser/gtk/download_item_gtk.cc ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <map> 8 #include <map>
9 #include <unicode/ucnv.h> 9 #include <unicode/ucnv.h>
10 #include <unicode/uidna.h> 10 #include <unicode/uidna.h>
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after
265 return false; 265 return false;
266 } 266 }
267 output->resize(length); 267 output->resize(length);
268 return true; 268 return true;
269 } 269 }
270 270
271 bool DecodeWord(const std::string& encoded_word, 271 bool DecodeWord(const std::string& encoded_word,
272 const std::string& referrer_charset, 272 const std::string& referrer_charset,
273 bool* is_rfc2047, 273 bool* is_rfc2047,
274 std::string* output) { 274 std::string* output) {
275 *is_rfc2047 = false;
276 output->clear();
277 if (encoded_word.empty())
278 return true;
279
275 if (!IsStringASCII(encoded_word)) { 280 if (!IsStringASCII(encoded_word)) {
276 // Try UTF-8, referrer_charset and the native OS default charset in turn. 281 // Try UTF-8, referrer_charset and the native OS default charset in turn.
277 if (IsStringUTF8(encoded_word)) { 282 if (IsStringUTF8(encoded_word)) {
278 *output = encoded_word; 283 *output = encoded_word;
279 } else { 284 } else {
280 std::wstring wide_output; 285 std::wstring wide_output;
281 if (!referrer_charset.empty() && 286 if (!referrer_charset.empty() &&
282 base::CodepageToWide(encoded_word, referrer_charset.c_str(), 287 base::CodepageToWide(encoded_word, referrer_charset.c_str(),
283 base::OnStringConversionError::FAIL, 288 base::OnStringConversionError::FAIL,
284 &wide_output)) { 289 &wide_output)) {
285 *output = WideToUTF8(wide_output); 290 *output = WideToUTF8(wide_output);
286 } else { 291 } else {
287 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); 292 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
288 } 293 }
289 } 294 }
290 *is_rfc2047 = false; 295
291 return true; 296 return true;
292 } 297 }
293 298
294 // RFC 2047 : one of encoding methods supported by Firefox and relatively 299 // RFC 2047 : one of encoding methods supported by Firefox and relatively
295 // widely used by web servers. 300 // widely used by web servers.
296 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. 301 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
297 // We don't care about the length restriction (72 bytes) because 302 // We don't care about the length restriction (72 bytes) because
298 // many web servers generate encoded words longer than the limit. 303 // many web servers generate encoded words longer than the limit.
299 std::string tmp; 304 std::string tmp;
300 *is_rfc2047 = true; 305 *is_rfc2047 = true;
(...skipping 773 matching lines...) Expand 10 before | Expand all | Expand 10 after
1074 FILE_PATH_LITERAL("download"); 1079 FILE_PATH_LITERAL("download");
1075 1080
1076 // about: and data: URLs don't have file names, but esp. data: URLs may 1081 // about: and data: URLs don't have file names, but esp. data: URLs may
1077 // contain parts that look like ones (i.e., contain a slash). 1082 // contain parts that look like ones (i.e., contain a slash).
1078 // Therefore we don't attempt to divine a file name out of them. 1083 // Therefore we don't attempt to divine a file name out of them.
1079 if (url.SchemeIs("about") || url.SchemeIs("data")) { 1084 if (url.SchemeIs("about") || url.SchemeIs("data")) {
1080 return default_name.empty() ? FilePath(kFinalFallbackName) : default_name; 1085 return default_name.empty() ? FilePath(kFinalFallbackName) : default_name;
1081 } 1086 }
1082 1087
1083 const std::string filename_from_cd = GetFileNameFromCD(content_disposition, 1088 const std::string filename_from_cd = GetFileNameFromCD(content_disposition,
1084 referrer_charset); 1089 referrer_charset);
1085 #if defined(OS_WIN) 1090 #if defined(OS_WIN)
1086 FilePath::StringType filename = UTF8ToWide(filename_from_cd); 1091 FilePath::StringType filename = UTF8ToWide(filename_from_cd);
1087 #elif defined(OS_POSIX) 1092 #elif defined(OS_POSIX)
1088 FilePath::StringType filename = filename_from_cd; 1093 FilePath::StringType filename = filename_from_cd;
1089 #endif 1094 #endif
1090 1095
1091 if (!filename.empty()) { 1096 if (!filename.empty()) {
1092 // Remove any path information the server may have sent, take the name 1097 // Remove any path information the server may have sent, take the name
1093 // only. 1098 // only.
1094 filename = FilePath(filename).BaseName().value(); 1099 filename = FilePath(filename).BaseName().value();
1095 1100
1096 // Next, remove "." from the beginning and end of the file name to avoid 1101 // Next, remove "." from the beginning and end of the file name to avoid
1097 // tricks with hidden files, "..", and "." 1102 // tricks with hidden files, "..", and "."
1098 TrimString(filename, FILE_PATH_LITERAL("."), &filename); 1103 TrimString(filename, FILE_PATH_LITERAL("."), &filename);
1099 } 1104 }
1100 if (filename.empty()) { 1105 if (filename.empty()) {
1101 if (url.is_valid()) { 1106 if (url.is_valid()) {
1102 const std::string unescaped_url_filename = UnescapeURLComponent( 1107 const std::string unescaped_url_filename = UnescapeURLComponent(
1103 url.ExtractFileName(), 1108 url.ExtractFileName(),
1104 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 1109 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
1110
1111 // The URL's path should be escaped UTF-8, but may not be.
1112 std::string decoded_filename = unescaped_url_filename;
1113 if (!IsStringASCII(decoded_filename)) {
1114 bool ignore;
1115 // TODO(jshin): this is probably not robust enough. To be sure, we
1116 // need encoding detection.
1117 DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
1118 &decoded_filename);
1119 }
1120
1105 #if defined(OS_WIN) 1121 #if defined(OS_WIN)
1106 filename = UTF8ToWide(unescaped_url_filename); 1122 filename = UTF8ToWide(decoded_filename);
1107 #elif defined(OS_POSIX) 1123 #elif defined(OS_POSIX)
1108 filename = unescaped_url_filename; 1124 filename = decoded_filename;
1109 #endif 1125 #endif
1110 } 1126 }
1111 } 1127 }
1112 1128
1113 #if defined(OS_WIN) 1129 #if defined(OS_WIN)
1114 { // Handle CreateFile() stripping trailing dots and spaces on filenames 1130 { // Handle CreateFile() stripping trailing dots and spaces on filenames
1115 // http://support.microsoft.com/kb/115827 1131 // http://support.microsoft.com/kb/115827
1116 std::string::size_type pos = filename.find_last_not_of(L" ."); 1132 std::string::size_type pos = filename.find_last_not_of(L" .");
1117 if (pos == std::string::npos) 1133 if (pos == std::string::npos)
1118 filename.resize(0); 1134 filename.resize(0);
(...skipping 741 matching lines...) Expand 10 before | Expand all | Expand 10 after
1860 unsigned char mask = 0xFF << (8 - remaining_bits); 1876 unsigned char mask = 0xFF << (8 - remaining_bits);
1861 int i = num_entire_bytes_in_prefix; 1877 int i = num_entire_bytes_in_prefix;
1862 if ((ip_number[i] & mask) != (ip_prefix[i] & mask)) 1878 if ((ip_number[i] & mask) != (ip_prefix[i] & mask))
1863 return false; 1879 return false;
1864 } 1880 }
1865 1881
1866 return true; 1882 return true;
1867 } 1883 }
1868 1884
1869 } // namespace net 1885 } // namespace net
OLDNEW
« no previous file with comments | « chrome/browser/gtk/download_item_gtk.cc ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698