Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: net/base/net_util.cc

Issue 7300005: Move filename determination to net_util (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <unicode/regex.h> 7 #include <unicode/regex.h>
8 #include <unicode/ucnv.h> 8 #include <unicode/ucnv.h>
9 #include <unicode/uidna.h> 9 #include <unicode/uidna.h>
10 #include <unicode/ulocdata.h> 10 #include <unicode/ulocdata.h>
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
57 #include "base/time.h" 57 #include "base/time.h"
58 #include "base/utf_offset_string_conversions.h" 58 #include "base/utf_offset_string_conversions.h"
59 #include "base/utf_string_conversions.h" 59 #include "base/utf_string_conversions.h"
60 #include "googleurl/src/gurl.h" 60 #include "googleurl/src/gurl.h"
61 #include "googleurl/src/url_canon.h" 61 #include "googleurl/src/url_canon.h"
62 #include "googleurl/src/url_canon_ip.h" 62 #include "googleurl/src/url_canon_ip.h"
63 #include "googleurl/src/url_parse.h" 63 #include "googleurl/src/url_parse.h"
64 #include "grit/net_resources.h" 64 #include "grit/net_resources.h"
65 #include "net/base/dns_util.h" 65 #include "net/base/dns_util.h"
66 #include "net/base/escape.h" 66 #include "net/base/escape.h"
67 #include "net/base/mime_util.h"
67 #include "net/base/net_module.h" 68 #include "net/base/net_module.h"
68 #if defined(OS_WIN) 69 #if defined(OS_WIN)
69 #include "net/base/winsock_init.h" 70 #include "net/base/winsock_init.h"
70 #endif 71 #endif
71 #include "unicode/datefmt.h" 72 #include "unicode/datefmt.h"
72 73
73 using base::Time; 74 using base::Time;
74 75
75 namespace net { 76 namespace net {
76 77
(...skipping 837 matching lines...) Expand 10 before | Expand all | Expand 10 after
914 } 915 }
915 916
916 char* do_strdup(const char* src) { 917 char* do_strdup(const char* src) {
917 #if defined(OS_WIN) 918 #if defined(OS_WIN)
918 return _strdup(src); 919 return _strdup(src);
919 #else 920 #else
920 return strdup(src); 921 return strdup(src);
921 #endif 922 #endif
922 } 923 }
923 924
925 void TrimGeneratedFileName(std::string& filename) {
926 if (!filename.empty()) {
927 // Remove "." from the beginning and end of the file name to avoid tricks
928 // with hidden files, "..", and "."
929 TrimString(filename, ".", &filename);
930 #if defined(OS_WIN)
931 // Handle CreateFile() stripping trailing dots and spaces on filenames
932 // http://support.microsoft.com/kb/115827
933 std::string::size_type pos = filename.find_last_not_of(" .");
934 if (pos == std::string::npos)
935 filename.resize(0);
936 else
937 filename.resize(++pos);
938 #endif
939 }
940 }
941
942 std::string GetFileNameFromURL(const GURL& url,
943 const std::string& referrer_charset) {
944 // about: and data: URLs don't have file names, but esp. data: URLs may
945 // contain parts that look like ones (i.e., contain a slash). Therefore we
946 // don't attempt to divine a file name out of them.
947 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data"))
948 return "";
rvargas (doing something else) 2011/07/26 00:10:57 nit: return std::string();
asanka 2011/07/28 20:04:38 Done.
949
950 const std::string unescaped_url_filename = UnescapeURLComponent(
951 url.ExtractFileName(),
952 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
953
954 // The URL's path should be escaped UTF-8, but may not be.
955 std::string decoded_filename = unescaped_url_filename;
956 if (!IsStringASCII(decoded_filename)) {
957 bool ignore;
958 // TODO(jshin): this is probably not robust enough. To be sure, we need
959 // encoding detection.
960 DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
961 &decoded_filename);
962 }
963
964 return decoded_filename;
965 }
966
967 #if defined(OS_WIN)
968 // Returns whether the specified extension is automatically integrated into the
969 // windows shell.
970 bool IsShellIntegratedExtension(const string16& extension) {
971 string16 extension_lower = StringToLowerASCII(extension);
972
973 static const wchar_t* const integrated_extensions[] = {
974 // See <http://msdn.microsoft.com/en-us/library/ms811694.aspx>.
975 L"local",
976 // Right-clicking on shortcuts can be magical.
977 L"lnk",
978 };
979
980 for (int i = 0; i < arraysize(integrated_extensions); ++i) {
981 if (extension_lower == integrated_extensions[i])
982 return true;
983 }
984
985 // See <http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html>.
986 // That vulnerability report is not exactly on point, but files become magical
987 // if their end in a CLSID. Here we block extensions that look like CLSIDs.
988 if (!extension_lower.empty() && extension_lower[0] == L'{' &&
989 extension_lower[extension_lower.length() - 1] == L'}')
990 return true;
991
992 return false;
993 }
994
995 // Returns whether the specified file name is a reserved name on windows.
996 // This includes names like "com2.zip" (which correspond to devices) and
997 // desktop.ini and thumbs.db which have special meaning to the windows shell.
998 bool IsReservedName(const string16& filename) {
999 // This list is taken from the MSDN article "Naming a file"
1000 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
1001 // I also added clock$ because GetSaveFileName seems to consider it as a
1002 // reserved name too.
1003 static const wchar_t* const known_devices[] = {
1004 L"con", L"prn", L"aux", L"nul", L"com1", L"com2", L"com3", L"com4", L"com5",
1005 L"com6", L"com7", L"com8", L"com9", L"lpt1", L"lpt2", L"lpt3", L"lpt4",
1006 L"lpt5", L"lpt6", L"lpt7", L"lpt8", L"lpt9", L"clock$"
1007 };
1008 string16 filename_lower = StringToLowerASCII(filename);
1009
1010 for (int i = 0; i < arraysize(known_devices); ++i) {
1011 // Exact match.
1012 if (filename_lower == known_devices[i])
1013 return true;
1014 // Starts with "DEVICE.".
1015 if (filename_lower.find(string16(known_devices[i]) + L".") == 0)
1016 return true;
1017 }
1018
1019 static const wchar_t* const magic_names[] = {
1020 // These file names are used by the "Customize folder" feature of the shell.
1021 L"desktop.ini",
1022 L"thumbs.db",
1023 };
1024
1025 for (int i = 0; i < arraysize(magic_names); ++i) {
1026 if (filename_lower == magic_names[i])
1027 return true;
1028 }
1029
1030 return false;
1031 }
1032 #endif // OS_WIN
1033
1034 void GenerateSafeExtension(const std::string& mime_type, FilePath* file_name) {
1035 // We're worried about two things here:
1036 //
1037 // 1) Usability. If the site fails to provide a file extension, we want to
1038 // guess a reasonable file extension based on the content type.
1039 //
1040 // 2) Shell integration. Some file extensions automatically integrate with
1041 // the shell. We block these extensions to prevent a malicious web site
1042 // from integrating with the user's shell.
1043
1044 // See if our file name already contains an extension.
1045 FilePath::StringType extension = file_name->Extension();
1046 if (!extension.empty())
1047 extension.erase(extension.begin()); // Erase preceding '.'.
1048
1049 #if defined(OS_WIN)
1050 static const FilePath::CharType default_extension[] =
1051 FILE_PATH_LITERAL("download");
1052
1053 // Rename shell-integrated extensions.
1054 // TODO(asanka): Consider stripping out the bad extension and replacing it
1055 // with the preferred extension for the MIME type if one is available.
1056 if (IsShellIntegratedExtension(extension))
1057 extension.assign(default_extension);
1058 #endif
1059
1060 if (extension.empty() && !mime_type.empty()) {
1061 // The GetPreferredExtensionForMimeType call will end up going to disk. Do
1062 // this on another thread to avoid slowing the IO thread.
1063 // http://crbug.com/61827
1064 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched
1065 // over to IO safe threads.
1066 base::ThreadRestrictions::ScopedAllowIO allow_io;
1067 net::GetPreferredExtensionForMimeType(mime_type, &extension);
1068 }
1069
1070 *file_name = file_name->ReplaceExtension(extension);
1071 }
1072
924 } // namespace 1073 } // namespace
925 1074
926 const FormatUrlType kFormatUrlOmitNothing = 0; 1075 const FormatUrlType kFormatUrlOmitNothing = 0;
927 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 1076 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
928 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 1077 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
929 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 1078 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
930 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | 1079 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword |
931 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; 1080 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname;
932 1081
933 // TODO(viettrungluu): We don't want non-POD globals; change this. 1082 // TODO(viettrungluu): We don't want non-POD globals; change this.
(...skipping 27 matching lines...) Expand all
961 1110
962 #if defined(OS_POSIX) 1111 #if defined(OS_POSIX)
963 ReplaceSubstringsAfterOffset(&url_string, 0, 1112 ReplaceSubstringsAfterOffset(&url_string, 0,
964 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); 1113 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));
965 #endif 1114 #endif
966 1115
967 return GURL(url_string); 1116 return GURL(url_string);
968 } 1117 }
969 1118
970 std::string GetSpecificHeader(const std::string& headers, 1119 std::string GetSpecificHeader(const std::string& headers,
971 const std::string& name) { 1120 const std::string& name) {
972 // We want to grab the Value from the "Key: Value" pairs in the headers, 1121 // We want to grab the Value from the "Key: Value" pairs in the headers,
973 // which should look like this (no leading spaces, \n-separated) (we format 1122 // which should look like this (no leading spaces, \n-separated) (we format
974 // them this way in url_request_inet.cc): 1123 // them this way in url_request_inet.cc):
975 // HTTP/1.1 200 OK\n 1124 // HTTP/1.1 200 OK\n
976 // ETag: "6d0b8-947-24f35ec0"\n 1125 // ETag: "6d0b8-947-24f35ec0"\n
977 // Content-Length: 2375\n 1126 // Content-Length: 2375\n
978 // Content-Type: text/html; charset=UTF-8\n 1127 // Content-Type: text/html; charset=UTF-8\n
979 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n 1128 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
980 if (headers.empty()) 1129 if (headers.empty())
981 return std::string(); 1130 return std::string();
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after
1241 result.append(");</script>\n"); 1390 result.append(");</script>\n");
1242 1391
1243 return result; 1392 return result;
1244 } 1393 }
1245 1394
1246 string16 StripWWW(const string16& text) { 1395 string16 StripWWW(const string16& text) {
1247 const string16 www(ASCIIToUTF16("www.")); 1396 const string16 www(ASCIIToUTF16("www."));
1248 return StartsWith(text, www, true) ? text.substr(www.length()) : text; 1397 return StartsWith(text, www, true) ? text.substr(www.length()) : text;
1249 } 1398 }
1250 1399
1400 void GenerateSafeFileName(const std::string& mime_type, FilePath* file_path) {
1401 // Make sure we get the right file extension
1402 GenerateSafeExtension(mime_type, file_path);
1403
1404 #if defined(OS_WIN)
1405 // Prepend "_" to the file name if it's a reserved name
1406 FilePath::StringType leaf_name = file_path->BaseName().value();
1407 DCHECK(!leaf_name.empty());
1408 if (IsReservedName(leaf_name)) {
1409 leaf_name = FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name;
1410 *file_path = file_path->DirName();
1411 if (file_path->value() == FilePath::kCurrentDirectory) {
1412 *file_path = FilePath(leaf_name);
1413 } else {
1414 *file_path = file_path->Append(leaf_name);
1415 }
1416 }
1417 #endif
1418 }
1419
1251 string16 GetSuggestedFilename(const GURL& url, 1420 string16 GetSuggestedFilename(const GURL& url,
1252 const std::string& content_disposition, 1421 const std::string& content_disposition,
1253 const std::string& referrer_charset, 1422 const std::string& referrer_charset,
1254 const std::string& suggested_name, 1423 const std::string& suggested_name,
1424 const std::string& mime_type,
1255 const string16& default_name) { 1425 const string16& default_name) {
1256 // TODO: this function to be updated to match the httpbis recommendations. 1426 // TODO: this function to be updated to match the httpbis recommendations.
1257 // Talk to abarth for the latest news. 1427 // Talk to abarth for the latest news.
1258 1428
1259 // We don't translate this fallback string, "download". If localization is 1429 // We don't translate this fallback string, "download". If localization is
1260 // needed, the caller should provide localized fallback default_name. 1430 // needed, the caller should provide localized fallback in |default_name|.
1261 static const char* kFinalFallbackName = "download"; 1431 static const char* kFinalFallbackName = "download";
1432 std::string filename; // In UTF-8
1262 1433
1263 std::string filename; 1434 // Try to extract a filename from content-disposition first.
1264 1435 if (!content_disposition.empty()) {
1265 // Try to extract from content-disposition first.
1266 if (!content_disposition.empty())
1267 filename = GetFileNameFromCD(content_disposition, referrer_charset); 1436 filename = GetFileNameFromCD(content_disposition, referrer_charset);
1268 1437 TrimGeneratedFileName(filename);
1269 // Then try to use suggested name.
1270 if (filename.empty() && !suggested_name.empty())
1271 filename = suggested_name;
1272
1273 if (!filename.empty()) {
1274 // Replace any path information the server may have sent, by changing
1275 // path separators with underscores.
1276 ReplaceSubstringsAfterOffset(&filename, 0, "/", "_");
1277 ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_");
1278
1279 // Next, remove "." from the beginning and end of the file name to avoid
1280 // tricks with hidden files, "..", and "."
1281 TrimString(filename, ".", &filename);
1282 } 1438 }
1283 1439
1284 if (filename.empty()) { 1440 // Then try to use the suggested name.
1285 // about: and data: URLs don't have file names, but esp. data: URLs may 1441 if (filename.empty() && !suggested_name.empty()) {
1286 // contain parts that look like ones (i.e., contain a slash). 1442 filename = suggested_name;
1287 // Therefore we don't attempt to divine a file name out of them. 1443 TrimGeneratedFileName(filename);
1288 if (url.SchemeIs("about") || url.SchemeIs("data")) {
1289 return default_name.empty() ? ASCIIToUTF16(kFinalFallbackName)
1290 : default_name;
1291 }
1292
1293 if (url.is_valid()) {
1294 const std::string unescaped_url_filename = UnescapeURLComponent(
1295 url.ExtractFileName(),
1296 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
1297
1298 // The URL's path should be escaped UTF-8, but may not be.
1299 std::string decoded_filename = unescaped_url_filename;
1300 if (!IsStringASCII(decoded_filename)) {
1301 bool ignore;
1302 // TODO(jshin): this is probably not robust enough. To be sure, we
1303 // need encoding detection.
1304 DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
1305 &decoded_filename);
1306 }
1307
1308 filename = decoded_filename;
1309 }
1310 } 1444 }
1311 1445
1312 #if defined(OS_WIN) 1446 // Now try extracting the filename from the URL. This only looks at the last
1313 { // Handle CreateFile() stripping trailing dots and spaces on filenames 1447 // component of the URL and doesn't failover to returning the hostname.
rvargas (doing something else) 2011/07/26 00:10:57 nit: failover as a verb
asanka 2011/07/28 20:04:38 Done.
1314 // http://support.microsoft.com/kb/115827
1315 std::string::size_type pos = filename.find_last_not_of(" .");
1316 if (pos == std::string::npos)
1317 filename.resize(0);
1318 else
1319 filename.resize(++pos);
1320 }
1321 #endif
1322 // Trim '.' once more.
1323 TrimString(filename, ".", &filename);
1324
1325 // If there's no filename or it gets trimed to be empty, use
1326 // the URL hostname or default_name
1327 if (filename.empty()) { 1448 if (filename.empty()) {
1328 if (!default_name.empty()) { 1449 filename = GetFileNameFromURL(url, referrer_charset);
1329 return default_name; 1450 TrimGeneratedFileName(filename);
1330 } else if (url.is_valid()) {
1331 // Some schemes (e.g. file) do not have a hostname. Even though it's
1332 // not likely to reach here, let's hardcode the last fallback name.
1333 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
1334 filename = url.host().empty() ? kFinalFallbackName : url.host();
1335 } else {
1336 NOTREACHED();
1337 }
1338 } 1451 }
1339 1452
1453 // Finally try the URL hostname, but only if there's no default specified in
1454 // |generated_path|
1455 if (filename.empty() && default_name.empty() &&
rvargas (doing something else) 2011/07/26 00:10:57 The previous logic didn't reach this point for dat
asanka 2011/07/28 20:04:38 Shouldn't the !url.host().empty() exclude 'about'
rvargas (doing something else) 2011/07/29 01:34:53 Yes. Please add a comment about that (we are losin
1456 url.is_valid() && !url.host().empty()) {
1457 // Some schemes (e.g. file) do not have a hostname. Even though it's not
1458 // likely to reach here, let's hardcode the last fallback name.
1459 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
1460 filename = url.host();
1461 TrimGeneratedFileName(filename);
rvargas (doing something else) 2011/07/26 00:10:57 Looks like it's better to call this after the next
asanka 2011/07/28 20:04:38 I did it this way so that if trimming the filename
rvargas (doing something else) 2011/07/29 01:34:53 I'm not so sure about that. For instance, if the s
1462 }
1463
1464 if (filename.empty() && default_name.empty()) {
1465 filename = kFinalFallbackName;
1466 }
1467
1468 // Replace any path information by changing path separators with
1469 // underscores.
1470 ReplaceSubstringsAfterOffset(&filename, 0, "/", "_");
1471 ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_");
1472
1340 #if defined(OS_WIN) 1473 #if defined(OS_WIN)
1341 string16 path = UTF8ToUTF16(filename); 1474 string16 path = (filename.empty())? default_name : UTF8ToUTF16(filename);
1342 file_util::ReplaceIllegalCharactersInPath(&path, '-'); 1475 file_util::ReplaceIllegalCharactersInPath(&path, '-');
1343 return path; 1476 FilePath result(path);
1477 GenerateSafeFileName(mime_type, &result);
1478 return result.value();
1344 #else 1479 #else
1345 std::string path = filename; 1480 std::string path = (filename.empty())? UTF16ToUTF8(default_name) : filename;
1346 file_util::ReplaceIllegalCharactersInPath(&path, '-'); 1481 file_util::ReplaceIllegalCharactersInPath(&path, '-');
1347 return UTF8ToUTF16(path); 1482 FilePath result(path);
1483 GenerateSafeFileName(mime_type, &result);
1484 return UTF8ToUTF16(result.value());
1348 #endif 1485 #endif
1349 } 1486 }
1350 1487
1488 FilePath GenerateFileName(const GURL& url,
1489 const std::string& content_disposition,
1490 const std::string& referrer_charset,
1491 const std::string& suggested_name,
1492 const std::string& mime_type,
1493 const string16& default_name) {
1494 string16 file_name = GetSuggestedFilename(
1495 url, content_disposition, referrer_charset,
1496 suggested_name, mime_type, default_name);
1497
1498 #if defined(OS_WIN)
1499 FilePath generated_name(file_name);
1500 #else
1501 FilePath generated_name(base::SysWideToNativeMB(UTF16ToWide(file_name)));
1502 #endif
1503 DCHECK(!generated_name.empty());
1504
1505 return generated_name;
1506 }
1507
1351 bool IsPortAllowedByDefault(int port) { 1508 bool IsPortAllowedByDefault(int port) {
1352 int array_size = arraysize(kRestrictedPorts); 1509 int array_size = arraysize(kRestrictedPorts);
1353 for (int i = 0; i < array_size; i++) { 1510 for (int i = 0; i < array_size; i++) {
1354 if (kRestrictedPorts[i] == port) { 1511 if (kRestrictedPorts[i] == port) {
1355 return false; 1512 return false;
1356 } 1513 }
1357 } 1514 }
1358 return true; 1515 return true;
1359 } 1516 }
1360 1517
(...skipping 886 matching lines...) Expand 10 before | Expand all | Expand 10 after
2247 2404
2248 NetworkInterface::NetworkInterface(const std::string& name, 2405 NetworkInterface::NetworkInterface(const std::string& name,
2249 const IPAddressNumber& address) 2406 const IPAddressNumber& address)
2250 : name(name), address(address) { 2407 : name(name), address(address) {
2251 } 2408 }
2252 2409
2253 NetworkInterface::~NetworkInterface() { 2410 NetworkInterface::~NetworkInterface() {
2254 } 2411 }
2255 2412
2256 } // namespace net 2413 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698