Chromium Code Reviews| Index: net/base/net_util.cc |
| diff --git a/net/base/net_util.cc b/net/base/net_util.cc |
| index 097da238ed2acf6d2ba5123f8ddab72a5fe57d93..a5ab31e71392c780f3b6e575b9cabf770fa276bd 100644 |
| --- a/net/base/net_util.cc |
| +++ b/net/base/net_util.cc |
| @@ -64,6 +64,7 @@ |
| #include "grit/net_resources.h" |
| #include "net/base/dns_util.h" |
| #include "net/base/escape.h" |
| +#include "net/base/mime_util.h" |
| #include "net/base/net_module.h" |
| #if defined(OS_WIN) |
| #include "net/base/winsock_init.h" |
| @@ -921,6 +922,154 @@ char* do_strdup(const char* src) { |
| #endif |
| } |
| +void TrimGeneratedFileName(std::string& filename) { |
| + if (!filename.empty()) { |
| + // Remove "." from the beginning and end of the file name to avoid tricks |
| + // with hidden files, "..", and "." |
| + TrimString(filename, ".", &filename); |
| +#if defined(OS_WIN) |
| + // Handle CreateFile() stripping trailing dots and spaces on filenames |
| + // http://support.microsoft.com/kb/115827 |
| + std::string::size_type pos = filename.find_last_not_of(" ."); |
| + if (pos == std::string::npos) |
| + filename.resize(0); |
| + else |
| + filename.resize(++pos); |
| +#endif |
| + } |
| +} |
| + |
| +std::string GetFileNameFromURL(const GURL& url, |
| + const std::string& referrer_charset) { |
| + // about: and data: URLs don't have file names, but esp. data: URLs may |
| + // contain parts that look like ones (i.e., contain a slash). Therefore we |
| + // don't attempt to divine a file name out of them. |
| + if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) |
| + return ""; |
|
rvargas (doing something else)
2011/07/26 00:10:57
nit: return std::string();
asanka
2011/07/28 20:04:38
Done.
|
| + |
| + const std::string unescaped_url_filename = UnescapeURLComponent( |
| + url.ExtractFileName(), |
| + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
| + |
| + // The URL's path should be escaped UTF-8, but may not be. |
| + std::string decoded_filename = unescaped_url_filename; |
| + if (!IsStringASCII(decoded_filename)) { |
| + bool ignore; |
| + // TODO(jshin): this is probably not robust enough. To be sure, we need |
| + // encoding detection. |
| + DecodeWord(unescaped_url_filename, referrer_charset, &ignore, |
| + &decoded_filename); |
| + } |
| + |
| + return decoded_filename; |
| +} |
| + |
| +#if defined(OS_WIN) |
| +// Returns whether the specified extension is automatically integrated into the |
| +// windows shell. |
| +bool IsShellIntegratedExtension(const string16& extension) { |
| + string16 extension_lower = StringToLowerASCII(extension); |
| + |
| + static const wchar_t* const integrated_extensions[] = { |
| + // See <http://msdn.microsoft.com/en-us/library/ms811694.aspx>. |
| + L"local", |
| + // Right-clicking on shortcuts can be magical. |
| + L"lnk", |
| + }; |
| + |
| + for (int i = 0; i < arraysize(integrated_extensions); ++i) { |
| + if (extension_lower == integrated_extensions[i]) |
| + return true; |
| + } |
| + |
| + // See <http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html>. |
| + // That vulnerability report is not exactly on point, but files become magical |
| + // if their end in a CLSID. Here we block extensions that look like CLSIDs. |
| + if (!extension_lower.empty() && extension_lower[0] == L'{' && |
| + extension_lower[extension_lower.length() - 1] == L'}') |
| + return true; |
| + |
| + return false; |
| +} |
| + |
| +// Returns whether the specified file name is a reserved name on windows. |
| +// This includes names like "com2.zip" (which correspond to devices) and |
| +// desktop.ini and thumbs.db which have special meaning to the windows shell. |
| +bool IsReservedName(const string16& filename) { |
| + // This list is taken from the MSDN article "Naming a file" |
| + // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx |
| + // I also added clock$ because GetSaveFileName seems to consider it as a |
| + // reserved name too. |
| + static const wchar_t* const known_devices[] = { |
| + L"con", L"prn", L"aux", L"nul", L"com1", L"com2", L"com3", L"com4", L"com5", |
| + L"com6", L"com7", L"com8", L"com9", L"lpt1", L"lpt2", L"lpt3", L"lpt4", |
| + L"lpt5", L"lpt6", L"lpt7", L"lpt8", L"lpt9", L"clock$" |
| + }; |
| + string16 filename_lower = StringToLowerASCII(filename); |
| + |
| + for (int i = 0; i < arraysize(known_devices); ++i) { |
| + // Exact match. |
| + if (filename_lower == known_devices[i]) |
| + return true; |
| + // Starts with "DEVICE.". |
| + if (filename_lower.find(string16(known_devices[i]) + L".") == 0) |
| + return true; |
| + } |
| + |
| + static const wchar_t* const magic_names[] = { |
| + // These file names are used by the "Customize folder" feature of the shell. |
| + L"desktop.ini", |
| + L"thumbs.db", |
| + }; |
| + |
| + for (int i = 0; i < arraysize(magic_names); ++i) { |
| + if (filename_lower == magic_names[i]) |
| + return true; |
| + } |
| + |
| + return false; |
| +} |
| +#endif // OS_WIN |
| + |
| +void GenerateSafeExtension(const std::string& mime_type, FilePath* file_name) { |
| + // We're worried about two things here: |
| + // |
| + // 1) Usability. If the site fails to provide a file extension, we want to |
| + // guess a reasonable file extension based on the content type. |
| + // |
| + // 2) Shell integration. Some file extensions automatically integrate with |
| + // the shell. We block these extensions to prevent a malicious web site |
| + // from integrating with the user's shell. |
| + |
| + // See if our file name already contains an extension. |
| + FilePath::StringType extension = file_name->Extension(); |
| + if (!extension.empty()) |
| + extension.erase(extension.begin()); // Erase preceding '.'. |
| + |
| +#if defined(OS_WIN) |
| + static const FilePath::CharType default_extension[] = |
| + FILE_PATH_LITERAL("download"); |
| + |
| + // Rename shell-integrated extensions. |
| + // TODO(asanka): Consider stripping out the bad extension and replacing it |
| + // with the preferred extension for the MIME type if one is available. |
| + if (IsShellIntegratedExtension(extension)) |
| + extension.assign(default_extension); |
| +#endif |
| + |
| + if (extension.empty() && !mime_type.empty()) { |
| + // The GetPreferredExtensionForMimeType call will end up going to disk. Do |
| + // this on another thread to avoid slowing the IO thread. |
| + // http://crbug.com/61827 |
| + // TODO(asanka): Remove this ScopedAllowIO once all callers have switched |
| + // over to IO safe threads. |
| + base::ThreadRestrictions::ScopedAllowIO allow_io; |
| + net::GetPreferredExtensionForMimeType(mime_type, &extension); |
| + } |
| + |
| + *file_name = file_name->ReplaceExtension(extension); |
| +} |
| + |
| } // namespace |
| const FormatUrlType kFormatUrlOmitNothing = 0; |
| @@ -968,7 +1117,7 @@ GURL FilePathToFileURL(const FilePath& path) { |
| } |
| std::string GetSpecificHeader(const std::string& headers, |
| - const std::string& name) { |
| + const std::string& name) { |
| // We want to grab the Value from the "Key: Value" pairs in the headers, |
| // which should look like this (no leading spaces, \n-separated) (we format |
| // them this way in url_request_inet.cc): |
| @@ -1248,104 +1397,112 @@ string16 StripWWW(const string16& text) { |
| return StartsWith(text, www, true) ? text.substr(www.length()) : text; |
| } |
| +void GenerateSafeFileName(const std::string& mime_type, FilePath* file_path) { |
| + // Make sure we get the right file extension |
| + GenerateSafeExtension(mime_type, file_path); |
| + |
| +#if defined(OS_WIN) |
| + // Prepend "_" to the file name if it's a reserved name |
| + FilePath::StringType leaf_name = file_path->BaseName().value(); |
| + DCHECK(!leaf_name.empty()); |
| + if (IsReservedName(leaf_name)) { |
| + leaf_name = FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; |
| + *file_path = file_path->DirName(); |
| + if (file_path->value() == FilePath::kCurrentDirectory) { |
| + *file_path = FilePath(leaf_name); |
| + } else { |
| + *file_path = file_path->Append(leaf_name); |
| + } |
| + } |
| +#endif |
| +} |
| + |
| string16 GetSuggestedFilename(const GURL& url, |
| const std::string& content_disposition, |
| const std::string& referrer_charset, |
| const std::string& suggested_name, |
| + const std::string& mime_type, |
| const string16& default_name) { |
| // TODO: this function to be updated to match the httpbis recommendations. |
| // Talk to abarth for the latest news. |
| // We don't translate this fallback string, "download". If localization is |
| - // needed, the caller should provide localized fallback default_name. |
| + // needed, the caller should provide localized fallback in |default_name|. |
| static const char* kFinalFallbackName = "download"; |
| + std::string filename; // In UTF-8 |
| - std::string filename; |
| - |
| - // Try to extract from content-disposition first. |
| - if (!content_disposition.empty()) |
| + // Try to extract a filename from content-disposition first. |
| + if (!content_disposition.empty()) { |
| filename = GetFileNameFromCD(content_disposition, referrer_charset); |
| + TrimGeneratedFileName(filename); |
| + } |
| - // Then try to use suggested name. |
| - if (filename.empty() && !suggested_name.empty()) |
| + // Then try to use the suggested name. |
| + if (filename.empty() && !suggested_name.empty()) { |
| filename = suggested_name; |
| - |
| - if (!filename.empty()) { |
| - // Replace any path information the server may have sent, by changing |
| - // path separators with underscores. |
| - ReplaceSubstringsAfterOffset(&filename, 0, "/", "_"); |
| - ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_"); |
| - |
| - // Next, remove "." from the beginning and end of the file name to avoid |
| - // tricks with hidden files, "..", and "." |
| - TrimString(filename, ".", &filename); |
| + TrimGeneratedFileName(filename); |
| } |
| + // Now try extracting the filename from the URL. This only looks at the last |
| + // component of the URL and doesn't failover to returning the hostname. |
|
rvargas (doing something else)
2011/07/26 00:10:57
nit: failover as a verb
asanka
2011/07/28 20:04:38
Done.
|
| if (filename.empty()) { |
| - // about: and data: URLs don't have file names, but esp. data: URLs may |
| - // contain parts that look like ones (i.e., contain a slash). |
| - // Therefore we don't attempt to divine a file name out of them. |
| - if (url.SchemeIs("about") || url.SchemeIs("data")) { |
| - return default_name.empty() ? ASCIIToUTF16(kFinalFallbackName) |
| - : default_name; |
| - } |
| - |
| - if (url.is_valid()) { |
| - const std::string unescaped_url_filename = UnescapeURLComponent( |
| - url.ExtractFileName(), |
| - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); |
| - |
| - // The URL's path should be escaped UTF-8, but may not be. |
| - std::string decoded_filename = unescaped_url_filename; |
| - if (!IsStringASCII(decoded_filename)) { |
| - bool ignore; |
| - // TODO(jshin): this is probably not robust enough. To be sure, we |
| - // need encoding detection. |
| - DecodeWord(unescaped_url_filename, referrer_charset, &ignore, |
| - &decoded_filename); |
| - } |
| - |
| - filename = decoded_filename; |
| - } |
| + filename = GetFileNameFromURL(url, referrer_charset); |
| + TrimGeneratedFileName(filename); |
| } |
| -#if defined(OS_WIN) |
| - { // Handle CreateFile() stripping trailing dots and spaces on filenames |
| - // http://support.microsoft.com/kb/115827 |
| - std::string::size_type pos = filename.find_last_not_of(" ."); |
| - if (pos == std::string::npos) |
| - filename.resize(0); |
| - else |
| - filename.resize(++pos); |
| + // Finally try the URL hostname, but only if there's no default specified in |
| + // |generated_path| |
| + if (filename.empty() && default_name.empty() && |
|
rvargas (doing something else)
2011/07/26 00:10:57
The previous logic didn't reach this point for dat
asanka
2011/07/28 20:04:38
Shouldn't the !url.host().empty() exclude 'about'
rvargas (doing something else)
2011/07/29 01:34:53
Yes. Please add a comment about that (we are losin
|
| + url.is_valid() && !url.host().empty()) { |
| + // Some schemes (e.g. file) do not have a hostname. Even though it's not |
| + // likely to reach here, let's hardcode the last fallback name. |
| + // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) |
| + filename = url.host(); |
| + TrimGeneratedFileName(filename); |
|
rvargas (doing something else)
2011/07/26 00:10:57
Looks like it's better to call this after the next
asanka
2011/07/28 20:04:38
I did it this way so that if trimming the filename
rvargas (doing something else)
2011/07/29 01:34:53
I'm not so sure about that. For instance, if the s
|
| } |
| -#endif |
| - // Trim '.' once more. |
| - TrimString(filename, ".", &filename); |
| - // If there's no filename or it gets trimed to be empty, use |
| - // the URL hostname or default_name |
| - if (filename.empty()) { |
| - if (!default_name.empty()) { |
| - return default_name; |
| - } else if (url.is_valid()) { |
| - // Some schemes (e.g. file) do not have a hostname. Even though it's |
| - // not likely to reach here, let's hardcode the last fallback name. |
| - // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) |
| - filename = url.host().empty() ? kFinalFallbackName : url.host(); |
| - } else { |
| - NOTREACHED(); |
| - } |
| + if (filename.empty() && default_name.empty()) { |
| + filename = kFinalFallbackName; |
| } |
| + // Replace any path information by changing path separators with |
| + // underscores. |
| + ReplaceSubstringsAfterOffset(&filename, 0, "/", "_"); |
| + ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_"); |
| + |
| #if defined(OS_WIN) |
| - string16 path = UTF8ToUTF16(filename); |
| + string16 path = (filename.empty())? default_name : UTF8ToUTF16(filename); |
| file_util::ReplaceIllegalCharactersInPath(&path, '-'); |
| - return path; |
| + FilePath result(path); |
| + GenerateSafeFileName(mime_type, &result); |
| + return result.value(); |
| #else |
| - std::string path = filename; |
| + std::string path = (filename.empty())? UTF16ToUTF8(default_name) : filename; |
| file_util::ReplaceIllegalCharactersInPath(&path, '-'); |
| - return UTF8ToUTF16(path); |
| + FilePath result(path); |
| + GenerateSafeFileName(mime_type, &result); |
| + return UTF8ToUTF16(result.value()); |
| +#endif |
| +} |
| + |
| +FilePath GenerateFileName(const GURL& url, |
| + const std::string& content_disposition, |
| + const std::string& referrer_charset, |
| + const std::string& suggested_name, |
| + const std::string& mime_type, |
| + const string16& default_name) { |
| + string16 file_name = GetSuggestedFilename( |
| + url, content_disposition, referrer_charset, |
| + suggested_name, mime_type, default_name); |
| + |
| +#if defined(OS_WIN) |
| + FilePath generated_name(file_name); |
| +#else |
| + FilePath generated_name(base::SysWideToNativeMB(UTF16ToWide(file_name))); |
| #endif |
| + DCHECK(!generated_name.empty()); |
| + |
| + return generated_name; |
| } |
| bool IsPortAllowedByDefault(int port) { |