OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "net/base/filename_util.h" |
| 6 |
| 7 #include "base/file_util.h" |
| 8 #include "base/files/file_path.h" |
| 9 #include "base/i18n/file_util_icu.h" |
| 10 #include "base/i18n/icu_string_conversions.h" |
| 11 #include "base/path_service.h" |
| 12 #include "base/strings/string_util.h" |
| 13 #include "base/strings/sys_string_conversions.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
| 15 #include "base/threading/thread_restrictions.h" |
| 16 #include "net/base/escape.h" |
| 17 #include "net/base/mime_util.h" |
| 18 #include "net/http/http_content_disposition.h" |
| 19 #include "url/gurl.h" |
| 20 |
| 21 namespace net { |
| 22 |
| 23 namespace { |
| 24 |
| 25 // what we prepend to get a file URL |
| 26 static const base::FilePath::CharType kFileURLPrefix[] = |
| 27 FILE_PATH_LITERAL("file:///"); |
| 28 |
| 29 void SanitizeGeneratedFileName(base::FilePath::StringType* filename, |
| 30 bool replace_trailing) { |
| 31 const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-"); |
| 32 if (filename->empty()) |
| 33 return; |
| 34 if (replace_trailing) { |
| 35 // Handle CreateFile() stripping trailing dots and spaces on filenames |
| 36 // http://support.microsoft.com/kb/115827 |
| 37 size_t length = filename->size(); |
| 38 size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" .")); |
| 39 filename->resize((pos == std::string::npos) ? 0 : (pos + 1)); |
| 40 base::TrimWhitespace(*filename, base::TRIM_TRAILING, filename); |
| 41 if (filename->empty()) |
| 42 return; |
| 43 size_t trimmed = length - filename->size(); |
| 44 if (trimmed) |
| 45 filename->insert(filename->end(), trimmed, kReplace[0]); |
| 46 } |
| 47 base::TrimString(*filename, FILE_PATH_LITERAL("."), filename); |
| 48 if (filename->empty()) |
| 49 return; |
| 50 // Replace any path information by changing path separators. |
| 51 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace); |
| 52 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace); |
| 53 } |
| 54 |
| 55 // Returns the filename determined from the last component of the path portion |
| 56 // of the URL. Returns an empty string if the URL doesn't have a path or is |
| 57 // invalid. If the generated filename is not reliable, |
| 58 // |should_overwrite_extension| will be set to true, in which case a better |
| 59 // extension should be determined based on the content type. |
| 60 std::string GetFileNameFromURL(const GURL& url, |
| 61 const std::string& referrer_charset, |
| 62 bool* should_overwrite_extension) { |
| 63 // about: and data: URLs don't have file names, but esp. data: URLs may |
| 64 // contain parts that look like ones (i.e., contain a slash). Therefore we |
| 65 // don't attempt to divine a file name out of them. |
| 66 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) |
| 67 return std::string(); |
| 68 |
| 69 const std::string unescaped_url_filename = UnescapeURLComponent( |
| 70 url.ExtractFileName(), |
| 71 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
| 72 |
| 73 // The URL's path should be escaped UTF-8, but may not be. |
| 74 std::string decoded_filename = unescaped_url_filename; |
| 75 if (!IsStringUTF8(decoded_filename)) { |
| 76 // TODO(jshin): this is probably not robust enough. To be sure, we need |
| 77 // encoding detection. |
| 78 base::string16 utf16_output; |
| 79 if (!referrer_charset.empty() && |
| 80 base::CodepageToUTF16(unescaped_url_filename, |
| 81 referrer_charset.c_str(), |
| 82 base::OnStringConversionError::FAIL, |
| 83 &utf16_output)) { |
| 84 decoded_filename = base::UTF16ToUTF8(utf16_output); |
| 85 } else { |
| 86 decoded_filename = base::WideToUTF8( |
| 87 base::SysNativeMBToWide(unescaped_url_filename)); |
| 88 } |
| 89 } |
| 90 // If the URL contains a (possibly empty) query, assume it is a generator, and |
| 91 // allow the determined extension to be overwritten. |
| 92 *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); |
| 93 |
| 94 return decoded_filename; |
| 95 } |
| 96 |
| 97 // Returns whether the specified extension is automatically integrated into the |
| 98 // windows shell. |
| 99 bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) { |
| 100 base::FilePath::StringType extension_lower = StringToLowerASCII(extension); |
| 101 |
| 102 // http://msdn.microsoft.com/en-us/library/ms811694.aspx |
| 103 // Right-clicking on shortcuts can be magical. |
| 104 if ((extension_lower == FILE_PATH_LITERAL("local")) || |
| 105 (extension_lower == FILE_PATH_LITERAL("lnk"))) |
| 106 return true; |
| 107 |
| 108 // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html |
| 109 // Files become magical if they end in a CLSID, so block such extensions. |
| 110 if (!extension_lower.empty() && |
| 111 (extension_lower[0] == FILE_PATH_LITERAL('{')) && |
| 112 (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}'))) |
| 113 return true; |
| 114 return false; |
| 115 } |
| 116 |
| 117 // Returns whether the specified file name is a reserved name on windows. |
| 118 // This includes names like "com2.zip" (which correspond to devices) and |
| 119 // desktop.ini and thumbs.db which have special meaning to the windows shell. |
| 120 bool IsReservedName(const base::FilePath::StringType& filename) { |
| 121 // This list is taken from the MSDN article "Naming a file" |
| 122 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx |
| 123 // I also added clock$ because GetSaveFileName seems to consider it as a |
| 124 // reserved name too. |
| 125 static const char* const known_devices[] = { |
| 126 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", |
| 127 "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", |
| 128 "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$" |
| 129 }; |
| 130 #if defined(OS_WIN) |
| 131 std::string filename_lower = StringToLowerASCII(base::WideToUTF8(filename)); |
| 132 #elif defined(OS_POSIX) |
| 133 std::string filename_lower = StringToLowerASCII(filename); |
| 134 #endif |
| 135 |
| 136 for (size_t i = 0; i < arraysize(known_devices); ++i) { |
| 137 // Exact match. |
| 138 if (filename_lower == known_devices[i]) |
| 139 return true; |
| 140 // Starts with "DEVICE.". |
| 141 if (filename_lower.find(std::string(known_devices[i]) + ".") == 0) |
| 142 return true; |
| 143 } |
| 144 |
| 145 static const char* const magic_names[] = { |
| 146 // These file names are used by the "Customize folder" feature of the shell. |
| 147 "desktop.ini", |
| 148 "thumbs.db", |
| 149 }; |
| 150 |
| 151 for (size_t i = 0; i < arraysize(magic_names); ++i) { |
| 152 if (filename_lower == magic_names[i]) |
| 153 return true; |
| 154 } |
| 155 |
| 156 return false; |
| 157 } |
| 158 |
| 159 |
| 160 // Examines the current extension in |file_name| and modifies it if necessary in |
| 161 // order to ensure the filename is safe. If |file_name| doesn't contain an |
| 162 // extension or if |ignore_extension| is true, then a new extension will be |
| 163 // constructed based on the |mime_type|. |
| 164 // |
| 165 // We're addressing two things here: |
| 166 // |
| 167 // 1) Usability. If there is no reliable file extension, we want to guess a |
| 168 // reasonable file extension based on the content type. |
| 169 // |
| 170 // 2) Shell integration. Some file extensions automatically integrate with the |
| 171 // shell. We block these extensions to prevent a malicious web site from |
| 172 // integrating with the user's shell. |
| 173 void EnsureSafeExtension(const std::string& mime_type, |
| 174 bool ignore_extension, |
| 175 base::FilePath* file_name) { |
| 176 // See if our file name already contains an extension. |
| 177 base::FilePath::StringType extension = file_name->Extension(); |
| 178 if (!extension.empty()) |
| 179 extension.erase(extension.begin()); // Erase preceding '.'. |
| 180 |
| 181 if ((ignore_extension || extension.empty()) && !mime_type.empty()) { |
| 182 base::FilePath::StringType preferred_mime_extension; |
| 183 std::vector<base::FilePath::StringType> all_mime_extensions; |
| 184 // The GetPreferredExtensionForMimeType call will end up going to disk. Do |
| 185 // this on another thread to avoid slowing the IO thread. |
| 186 // http://crbug.com/61827 |
| 187 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched |
| 188 // over to IO safe threads. |
| 189 base::ThreadRestrictions::ScopedAllowIO allow_io; |
| 190 net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension); |
| 191 net::GetExtensionsForMimeType(mime_type, &all_mime_extensions); |
| 192 // If the existing extension is in the list of valid extensions for the |
| 193 // given type, use it. This avoids doing things like pointlessly renaming |
| 194 // "foo.jpg" to "foo.jpeg". |
| 195 if (std::find(all_mime_extensions.begin(), |
| 196 all_mime_extensions.end(), |
| 197 extension) != all_mime_extensions.end()) { |
| 198 // leave |extension| alone |
| 199 } else if (!preferred_mime_extension.empty()) { |
| 200 extension = preferred_mime_extension; |
| 201 } |
| 202 } |
| 203 |
| 204 #if defined(OS_WIN) |
| 205 static const base::FilePath::CharType default_extension[] = |
| 206 FILE_PATH_LITERAL("download"); |
| 207 |
| 208 // Rename shell-integrated extensions. |
| 209 // TODO(asanka): Consider stripping out the bad extension and replacing it |
| 210 // with the preferred extension for the MIME type if one is available. |
| 211 if (IsShellIntegratedExtension(extension)) |
| 212 extension.assign(default_extension); |
| 213 #endif |
| 214 |
| 215 *file_name = file_name->ReplaceExtension(extension); |
| 216 } |
| 217 |
| 218 bool FilePathToString16(const base::FilePath& path, base::string16* converted) { |
| 219 #if defined(OS_WIN) |
| 220 return base::WideToUTF16( |
| 221 path.value().c_str(), path.value().size(), converted); |
| 222 #elif defined(OS_POSIX) |
| 223 std::string component8 = path.AsUTF8Unsafe(); |
| 224 return !component8.empty() && |
| 225 base::UTF8ToUTF16(component8.c_str(), component8.size(), converted); |
| 226 #endif |
| 227 } |
| 228 |
| 229 } // namespace |
| 230 |
| 231 GURL FilePathToFileURL(const base::FilePath& path) { |
| 232 // Produce a URL like "file:///C:/foo" for a regular file, or |
| 233 // "file://///server/path" for UNC. The URL canonicalizer will fix up the |
| 234 // latter case to be the canonical UNC form: "file://server/path" |
| 235 base::FilePath::StringType url_string(kFileURLPrefix); |
| 236 if (!path.IsAbsolute()) { |
| 237 base::FilePath current_dir; |
| 238 PathService::Get(base::DIR_CURRENT, ¤t_dir); |
| 239 url_string.append(current_dir.value()); |
| 240 url_string.push_back(base::FilePath::kSeparators[0]); |
| 241 } |
| 242 url_string.append(path.value()); |
| 243 |
| 244 // Now do replacement of some characters. Since we assume the input is a |
| 245 // literal filename, anything the URL parser might consider special should |
| 246 // be escaped here. |
| 247 |
| 248 // must be the first substitution since others will introduce percents as the |
| 249 // escape character |
| 250 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 251 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); |
| 252 |
| 253 // semicolon is supposed to be some kind of separator according to RFC 2396 |
| 254 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 255 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); |
| 256 |
| 257 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 258 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); |
| 259 |
| 260 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 261 FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F")); |
| 262 |
| 263 #if defined(OS_POSIX) |
| 264 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 265 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); |
| 266 #endif |
| 267 |
| 268 return GURL(url_string); |
| 269 } |
| 270 |
| 271 bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) { |
| 272 *file_path = base::FilePath(); |
| 273 base::FilePath::StringType& file_path_str = |
| 274 const_cast<base::FilePath::StringType&>(file_path->value()); |
| 275 file_path_str.clear(); |
| 276 |
| 277 if (!url.is_valid()) |
| 278 return false; |
| 279 |
| 280 #if defined(OS_WIN) |
| 281 std::string path; |
| 282 std::string host = url.host(); |
| 283 if (host.empty()) { |
| 284 // URL contains no host, the path is the filename. In this case, the path |
| 285 // will probably be preceeded with a slash, as in "/C:/foo.txt", so we |
| 286 // trim out that here. |
| 287 path = url.path(); |
| 288 size_t first_non_slash = path.find_first_not_of("/\\"); |
| 289 if (first_non_slash != std::string::npos && first_non_slash > 0) |
| 290 path.erase(0, first_non_slash); |
| 291 } else { |
| 292 // URL contains a host: this means it's UNC. We keep the preceeding slash |
| 293 // on the path. |
| 294 path = "\\\\"; |
| 295 path.append(host); |
| 296 path.append(url.path()); |
| 297 } |
| 298 std::replace(path.begin(), path.end(), '/', '\\'); |
| 299 #else // defined(OS_WIN) |
| 300 // Firefox seems to ignore the "host" of a file url if there is one. That is, |
| 301 // file://foo/bar.txt maps to /bar.txt. |
| 302 // TODO(dhg): This should probably take into account UNCs which could |
| 303 // include a hostname other than localhost or blank |
| 304 std::string path = url.path(); |
| 305 #endif // !defined(OS_WIN) |
| 306 |
| 307 if (path.empty()) |
| 308 return false; |
| 309 |
| 310 // GURL stores strings as percent-encoded 8-bit, this will undo if possible. |
| 311 path = UnescapeURLComponent(path, |
| 312 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
| 313 |
| 314 #if defined(OS_WIN) |
| 315 if (IsStringUTF8(path)) { |
| 316 file_path_str.assign(base::UTF8ToWide(path)); |
| 317 // We used to try too hard and see if |path| made up entirely of |
| 318 // the 1st 256 characters in the Unicode was a zero-extended UTF-16. |
| 319 // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. |
| 320 // If the check passed, we converted the result to UTF-8. |
| 321 // Otherwise, we treated the result as the native OS encoding. |
| 322 // However, that led to http://crbug.com/4619 and http://crbug.com/14153 |
| 323 } else { |
| 324 // Not UTF-8, assume encoding is native codepage and we're done. We know we |
| 325 // are giving the conversion function a nonempty string, and it may fail if |
| 326 // the given string is not in the current encoding and give us an empty |
| 327 // string back. We detect this and report failure. |
| 328 file_path_str = base::SysNativeMBToWide(path); |
| 329 } |
| 330 #else // defined(OS_WIN) |
| 331 // Collapse multiple path slashes into a single path slash. |
| 332 std::string new_path; |
| 333 do { |
| 334 new_path = path; |
| 335 ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/"); |
| 336 path.swap(new_path); |
| 337 } while (new_path != path); |
| 338 |
| 339 file_path_str.assign(path); |
| 340 #endif // !defined(OS_WIN) |
| 341 |
| 342 return !file_path_str.empty(); |
| 343 } |
| 344 |
| 345 bool IsSafePortablePathComponent(const base::FilePath& component) { |
| 346 base::string16 component16; |
| 347 base::FilePath::StringType sanitized = component.value(); |
| 348 SanitizeGeneratedFileName(&sanitized, true); |
| 349 base::FilePath::StringType extension = component.Extension(); |
| 350 if (!extension.empty()) |
| 351 extension.erase(extension.begin()); // Erase preceding '.'. |
| 352 return !component.empty() && |
| 353 (component == component.BaseName()) && |
| 354 (component == component.StripTrailingSeparators()) && |
| 355 FilePathToString16(component, &component16) && |
| 356 file_util::IsFilenameLegal(component16) && |
| 357 !IsShellIntegratedExtension(extension) && |
| 358 (sanitized == component.value()) && |
| 359 !IsReservedName(component.value()); |
| 360 } |
| 361 |
| 362 bool IsSafePortableRelativePath(const base::FilePath& path) { |
| 363 if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator()) |
| 364 return false; |
| 365 std::vector<base::FilePath::StringType> components; |
| 366 path.GetComponents(&components); |
| 367 if (components.empty()) |
| 368 return false; |
| 369 for (size_t i = 0; i < components.size() - 1; ++i) { |
| 370 if (!IsSafePortablePathComponent(base::FilePath(components[i]))) |
| 371 return false; |
| 372 } |
| 373 return IsSafePortablePathComponent(path.BaseName()); |
| 374 } |
| 375 |
| 376 void GenerateSafeFileName(const std::string& mime_type, |
| 377 bool ignore_extension, |
| 378 base::FilePath* file_path) { |
| 379 // Make sure we get the right file extension |
| 380 EnsureSafeExtension(mime_type, ignore_extension, file_path); |
| 381 |
| 382 #if defined(OS_WIN) |
| 383 // Prepend "_" to the file name if it's a reserved name |
| 384 base::FilePath::StringType leaf_name = file_path->BaseName().value(); |
| 385 DCHECK(!leaf_name.empty()); |
| 386 if (IsReservedName(leaf_name)) { |
| 387 leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; |
| 388 *file_path = file_path->DirName(); |
| 389 if (file_path->value() == base::FilePath::kCurrentDirectory) { |
| 390 *file_path = base::FilePath(leaf_name); |
| 391 } else { |
| 392 *file_path = file_path->Append(leaf_name); |
| 393 } |
| 394 } |
| 395 #endif |
| 396 } |
| 397 |
| 398 base::string16 GetSuggestedFilename(const GURL& url, |
| 399 const std::string& content_disposition, |
| 400 const std::string& referrer_charset, |
| 401 const std::string& suggested_name, |
| 402 const std::string& mime_type, |
| 403 const std::string& default_name) { |
| 404 // TODO: this function to be updated to match the httpbis recommendations. |
| 405 // Talk to abarth for the latest news. |
| 406 |
| 407 // We don't translate this fallback string, "download". If localization is |
| 408 // needed, the caller should provide localized fallback in |default_name|. |
| 409 static const base::FilePath::CharType kFinalFallbackName[] = |
| 410 FILE_PATH_LITERAL("download"); |
| 411 std::string filename; // In UTF-8 |
| 412 bool overwrite_extension = false; |
| 413 |
| 414 // Try to extract a filename from content-disposition first. |
| 415 if (!content_disposition.empty()) { |
| 416 HttpContentDisposition header(content_disposition, referrer_charset); |
| 417 filename = header.filename(); |
| 418 } |
| 419 |
| 420 // Then try to use the suggested name. |
| 421 if (filename.empty() && !suggested_name.empty()) |
| 422 filename = suggested_name; |
| 423 |
| 424 // Now try extracting the filename from the URL. GetFileNameFromURL() only |
| 425 // looks at the last component of the URL and doesn't return the hostname as a |
| 426 // failover. |
| 427 if (filename.empty()) |
| 428 filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension); |
| 429 |
| 430 // Finally try the URL hostname, but only if there's no default specified in |
| 431 // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a |
| 432 // host name. |
| 433 if (filename.empty() && |
| 434 default_name.empty() && |
| 435 url.is_valid() && |
| 436 !url.host().empty()) { |
| 437 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) |
| 438 filename = url.host(); |
| 439 } |
| 440 |
| 441 bool replace_trailing = false; |
| 442 base::FilePath::StringType result_str, default_name_str; |
| 443 #if defined(OS_WIN) |
| 444 replace_trailing = true; |
| 445 result_str = base::UTF8ToUTF16(filename); |
| 446 default_name_str = base::UTF8ToUTF16(default_name); |
| 447 #else |
| 448 result_str = filename; |
| 449 default_name_str = default_name; |
| 450 #endif |
| 451 SanitizeGeneratedFileName(&result_str, replace_trailing); |
| 452 if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) == |
| 453 base::FilePath::StringType::npos) { |
| 454 result_str = !default_name_str.empty() ? default_name_str : |
| 455 base::FilePath::StringType(kFinalFallbackName); |
| 456 overwrite_extension = false; |
| 457 } |
| 458 file_util::ReplaceIllegalCharactersInPath(&result_str, '-'); |
| 459 base::FilePath result(result_str); |
| 460 GenerateSafeFileName(mime_type, overwrite_extension, &result); |
| 461 |
| 462 base::string16 result16; |
| 463 if (!FilePathToString16(result, &result16)) { |
| 464 result = base::FilePath(default_name_str); |
| 465 if (!FilePathToString16(result, &result16)) { |
| 466 result = base::FilePath(kFinalFallbackName); |
| 467 FilePathToString16(result, &result16); |
| 468 } |
| 469 } |
| 470 return result16; |
| 471 } |
| 472 |
| 473 base::FilePath GenerateFileName(const GURL& url, |
| 474 const std::string& content_disposition, |
| 475 const std::string& referrer_charset, |
| 476 const std::string& suggested_name, |
| 477 const std::string& mime_type, |
| 478 const std::string& default_file_name) { |
| 479 base::string16 file_name = GetSuggestedFilename(url, |
| 480 content_disposition, |
| 481 referrer_charset, |
| 482 suggested_name, |
| 483 mime_type, |
| 484 default_file_name); |
| 485 |
| 486 #if defined(OS_WIN) |
| 487 base::FilePath generated_name(file_name); |
| 488 #else |
| 489 base::FilePath generated_name( |
| 490 base::SysWideToNativeMB(base::UTF16ToWide(file_name))); |
| 491 #endif |
| 492 |
| 493 #if defined(OS_CHROMEOS) |
| 494 // When doing file manager operations on ChromeOS, the file paths get |
| 495 // normalized in WebKit layer, so let's ensure downloaded files have |
| 496 // normalized names. Otherwise, we won't be able to handle files with NFD |
| 497 // utf8 encoded characters in name. |
| 498 file_util::NormalizeFileNameEncoding(&generated_name); |
| 499 #endif |
| 500 |
| 501 DCHECK(!generated_name.empty()); |
| 502 |
| 503 return generated_name; |
| 504 } |
| 505 |
| 506 } // namespace net |
OLD | NEW |