Index: net/base/net_util_unittest.cc |
=================================================================== |
--- net/base/net_util_unittest.cc (revision 15065) |
+++ net/base/net_util_unittest.cc (working copy) |
@@ -39,6 +39,7 @@ |
struct FileNameCDCase { |
const char* header_field; |
+ const char* referrer_charset; |
const wchar_t* expected; |
}; |
@@ -58,7 +59,8 @@ |
struct SuggestedFilenameCase { |
const char* url; |
- const wchar_t* content_disp_header; |
+ const char* content_disp_header; |
+ const char* referrer_charset; |
const wchar_t* default_filename; |
const wchar_t* expected_filename; |
}; |
@@ -299,75 +301,96 @@ |
TEST(NetUtilTest, GetFileNameFromCD) { |
const FileNameCDCase tests[] = { |
// Test various forms of C-D header fields emitted by web servers. |
- {"content-disposition: inline; filename=\"abcde.pdf\"", L"abcde.pdf"}, |
- {"content-disposition: inline; name=\"abcde.pdf\"", L"abcde.pdf"}, |
- {"content-disposition: attachment; filename=abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: attachment; name=abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: attachment; filename=abc,de.pdf", L"abc,de.pdf"}, |
- {"content-disposition: filename=abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: filename= abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: filename =abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: filename = abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: filename\t=abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: filename \t\t =abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: name=abcde.pdf", L"abcde.pdf"}, |
- {"content-disposition: inline; filename=\"abc%20de.pdf\"", L"abc de.pdf"}, |
+ {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"}, |
+ {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"}, |
+ {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"}, |
+ {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"}, |
+ {"content-disposition: inline; filename=\"abc%20de.pdf\"", "", |
+ L"abc de.pdf"}, |
// Whitespaces are converted to a space. |
- {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", |
+ {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "", |
L"abc de.pdf"}, |
// %-escaped UTF-8 |
{"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" |
- "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, |
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, |
{"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1" |
- "abc.jpg\"", L"\U00010330\U00010331abc.jpg"}, |
+ "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"}, |
{"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n" |
- "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, |
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, |
// RFC 2047 with various charsets and Q/B encodings |
{"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" |
- "D13=2Epng?=\"", L"\x82b8\x8853" L"3.png"}, |
+ "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"}, |
{"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=", |
- L"\xc608\xc220 3.png"}, |
+ "", L"\xc608\xc220 3.png"}, |
{"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8" |
- "=A1=93_3=2Epng?=", L"\x82b8\x8853 3.png"}, |
+ "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"}, |
{"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0" |
- "_3=2Epng?=", L"\U00010330 3.png"}, |
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e3_=2epng?=\"", |
- L"caf\x00e3 .png"}, |
+ "_3=2Epng?=", "", L"\U00010330 3.png"}, |
+ {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"", |
+ "", L"caf\x00e9 .png"}, |
// Space after an encode word should be removed. |
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E3_?= .png\"", |
- L"caf\x00e3 .png"}, |
+ {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"", |
+ "", L"caf\x00e9 .png"}, |
// Two encoded words with different charsets (not very likely to be emitted |
// by web servers in the wild). Spaces between them are removed. |
{"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?=" |
- " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", L"\xc608\xc220 3\xc608\xc220.png"}, |
- {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E3?=" |
- " =?iso-8859-7?b?4eI=?= .png\"", L"caf\x00e3\x03b1\x03b2.png"}, |
- // Non-ASCII string is passed through (and treated as UTF-8). |
- {"Content-Disposition: attachment; filename=caf\xc3\xa3.png", |
- L"caf\x00e3.png"}, |
+ " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "", |
+ L"\xc608\xc220 3\xc608\xc220.png"}, |
+ {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?=" |
+ " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"}, |
+ // Non-ASCII string is passed through and treated as UTF-8 as long as |
+ // it's valid as UTF-8 and regardless of |referrer_charset|. |
+ {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", |
+ "iso-8859-1", L"caf\x00e9.png"}, |
+ {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", |
+ "", L"caf\x00e9.png"}, |
+ // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset. |
+ {"Content-Disposition: attachment; filename=caf\xe5.png", |
+ "windows-1253", L"caf\x03b5.png"}, |
+#if 0 |
+ // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage. |
+ // TODO(jungshik): We need to set the OS default codepage |
+ // to a specific value before testing. On Windows, we can use |
+ // SetThreadLocale(). |
+ {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png", |
+ "", L"\xac00\xac01.png"}, |
+#endif |
// Failure cases |
// Invalid hex-digit "G" |
- {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", L""}, |
+ {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "", |
+ L""}, |
// Incomplete RFC 2047 encoded-word (missing '='' at the end) |
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", L""}, |
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""}, |
// Extra character at the end of an encoded word |
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", L""}, |
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", |
+ "", L""}, |
// Extra token at the end of an encoded word |
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", L""}, |
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", L""}, |
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", |
+ "", L""}, |
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", |
+ "", L""}, |
// Incomplete hex-escaped chars |
{"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=", |
- L""}, |
- {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", L""}, |
+ "", L""}, |
+ {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""}, |
// %-escaped non-UTF-8 encoding is an "error" |
- {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", L""}, |
+ {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""}, |
// Two RFC 2047 encoded words in a row without a space is an error. |
{"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?=" |
- "=?iso-8859-7?b?4eIucG5nCg==?=", L""}, |
+ "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""}, |
}; |
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { |
EXPECT_EQ(tests[i].expected, |
- net::GetFileNameFromCD(tests[i].header_field)); |
+ net::GetFileNameFromCD(tests[i].header_field, |
+ tests[i].referrer_charset)); |
} |
} |
@@ -669,97 +692,132 @@ |
TEST(NetUtilTest, GetSuggestedFilename) { |
const SuggestedFilenameCase test_cases[] = { |
{"http://www.google.com/", |
- L"Content-disposition: attachment; filename=test.html", |
+ "Content-disposition: attachment; filename=test.html", |
+ "", |
L"", |
L"test.html"}, |
{"http://www.google.com/", |
- L"Content-disposition: attachment; filename=\"test.html\"", |
+ "Content-disposition: attachment; filename=\"test.html\"", |
+ "", |
L"", |
L"test.html"}, |
{"http://www.google.com/path/test.html", |
- L"Content-disposition: attachment", |
+ "Content-disposition: attachment", |
+ "", |
L"", |
L"test.html"}, |
{"http://www.google.com/path/test.html", |
- L"Content-disposition: attachment;", |
+ "Content-disposition: attachment;", |
+ "", |
L"", |
L"test.html"}, |
{"http://www.google.com/", |
+ "", |
+ "", |
L"", |
- L"", |
L"www.google.com"}, |
{"http://www.google.com/test.html", |
+ "", |
+ "", |
L"", |
- L"", |
L"test.html"}, |
// Now that we use googleurl's ExtractFileName, this case falls back |
// to the hostname. If this behavior is not desirable, we'd better |
// change ExtractFileName (in url_parse). |
{"http://www.google.com/path/", |
+ "", |
+ "", |
L"", |
- L"", |
L"www.google.com"}, |
{"http://www.google.com/path", |
+ "", |
+ "", |
L"", |
- L"", |
L"path"}, |
{"file:///", |
+ "", |
+ "", |
L"", |
- L"", |
L"download"}, |
{"view-cache:", |
+ "", |
+ "", |
L"", |
- L"", |
L"download"}, |
{"http://www.google.com/", |
- L"Content-disposition: attachment; filename =\"test.html\"", |
+ "Content-disposition: attachment; filename =\"test.html\"", |
+ "", |
L"download", |
L"test.html"}, |
{"http://www.google.com/", |
- L"", |
+ "", |
+ "", |
L"download", |
L"download"}, |
{"http://www.google.com/", |
- L"Content-disposition: attachment; filename=\"../test.html\"", |
+ "Content-disposition: attachment; filename=\"../test.html\"", |
+ "", |
L"", |
L"test.html"}, |
{"http://www.google.com/", |
- L"Content-disposition: attachment; filename=\"..\"", |
+ "Content-disposition: attachment; filename=\"..\"", |
+ "", |
L"download", |
L"download"}, |
{"http://www.google.com/test.html", |
- L"Content-disposition: attachment; filename=\"..\"", |
+ "Content-disposition: attachment; filename=\"..\"", |
+ "", |
L"download", |
L"test.html"}, |
// Below is a small subset of cases taken from GetFileNameFromCD test above. |
{"http://www.google.com/", |
- L"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" |
- L"%EC%98%88%EC%88%A0.jpg\"", |
+ "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" |
+ "%EC%98%88%EC%88%A0.jpg\"", |
+ "", |
L"", |
L"\uc608\uc220 \uc608\uc220.jpg"}, |
{"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", |
- L"", |
+ "", |
+ "", |
L"download", |
L"\uc608\uc220 \uc608\uc220.jpg"}, |
{"http://www.google.com/", |
- L"Content-disposition: attachment;", |
+ "Content-disposition: attachment;", |
+ "", |
L"\uB2E4\uC6B4\uB85C\uB4DC", |
L"\uB2E4\uC6B4\uB85C\uB4DC"}, |
{"http://www.google.com/", |
- L"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" |
- L"D13=2Epng?=\"", |
+ "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" |
+ "D13=2Epng?=\"", |
+ "", |
L"download", |
L"\u82b8\u88533.png"}, |
+ {"http://www.example.com/images?id=3", |
+ "Content-Disposition: attachment; filename=caf\xc3\xa9.png", |
+ "iso-8859-1", |
+ L"", |
+ L"caf\u00e9.png"}, |
+ {"http://www.example.com/images?id=3", |
+ "Content-Disposition: attachment; filename=caf\xe5.png", |
+ "windows-1253", |
+ L"", |
+ L"caf\u03b5.png"}, |
+ {"http://www.example.com/file?id=3", |
+ "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip", |
+ "GBK", |
+ L"", |
+ L"\u4e0b\u8f7d.zip"}, |
// Invalid C-D header. Extracts filename from url. |
{"http://www.google.com/test.html", |
- L"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", |
+ "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", |
+ "", |
L"", |
L"test.html"}, |
}; |
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { |
std::wstring filename = net::GetSuggestedFilename( |
GURL(test_cases[i].url), test_cases[i].content_disp_header, |
- test_cases[i].default_filename); |
+ test_cases[i].referrer_charset, test_cases[i].default_filename); |
EXPECT_EQ(std::wstring(test_cases[i].expected_filename), filename); |
} |
} |