Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Unified Diff: net/base/net_util_icu_unittest.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: add back languages to one more, update comments Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« net/base/net_util_icu.cc ('K') | « net/base/net_util_icu.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/net_util_icu_unittest.cc
diff --git a/net/base/net_util_icu_unittest.cc b/net/base/net_util_icu_unittest.cc
index f643426aa678ed48f36e96877adde254a26624b5..e58b0649d16454fab018f28dbb2cc6abe5149467 100644
--- a/net/base/net_util_icu_unittest.cc
+++ b/net/base/net_util_icu_unittest.cc
@@ -24,328 +24,120 @@ namespace net {
namespace {
const size_t kNpos = base::string16::npos;
-
-const char* const kLanguages[] = {
- "", "en", "zh-CN", "ja", "ko",
- "he", "ar", "ru", "el", "fr",
- "de", "pt", "sv", "th", "hi",
- "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
- "zh,ru,en"
-};
-
struct IDNTestCase {
const char* const input;
const wchar_t* unicode_output;
- const bool unicode_allowed[arraysize(kLanguages)];
+ const bool unicode_allowed;
};
// TODO(jungshik) This is just a random sample of languages and is far
// from exhaustive. We may have to generate all the combinations
// of languages (powerset of a set of all the languages).
const IDNTestCase idn_cases[] = {
- // No IDN
- {"www.google.com", L"www.google.com",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {"www.google.com.", L"www.google.com.",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {".", L".",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- {"", L"",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- // IDN
- // Hanzi (Traditional Chinese)
- {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
- {true, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, true, false,
- true}},
- // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
- {"xn--cy2a840a.com", L"\x89c6\x9891.com",
- {true, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- true}},
- // Hanzi + '123'
- {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
- {true, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, true, true, false,
- true}},
- // Hanzi + Latin : U+56FD is simplified and is regarded
- // as not supported in zh-TW.
- {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
- {false, false, true, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- true}},
- // Kanji + Kana (Japanese)
- {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false}},
- // Katakana including U+30FC
- {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
- {true, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- // Katakana + Latin (Japanese)
- // TODO(jungshik): Change 'false' in the first element to 'true'
- // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
- // of our IsIDNComponentInSingleScript().
- {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
- {false, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
- {false, false, false, true, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- }},
- // Hangul (Korean)
- {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
- {true, false, false, false, true,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false}},
- // b<u-umlaut>cher (German)
- {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
- {true, false, false, false, false,
- false, false, false, false, true,
- true, false, false, false, false,
- true, false, false, false, false,
- false}},
- // a with diaeresis
- {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
- {true, false, false, false, false,
- false, false, false, false, false,
- true, false, true, false, false,
- true, false, false, false, false,
- false}},
- // c-cedilla (French)
- {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
- {true, false, false, false, false,
- false, false, false, false, true,
- false, true, false, false, false,
- false, false, false, false, false,
- false}},
- // caf'e with acute accent' (French)
- {"xn--caf-dma.fr", L"caf\x00e9.fr",
- {true, false, false, false, false,
- false, false, false, false, true,
- false, true, true, false, false,
- false, false, false, false, false,
- false}},
- // c-cedillla and a with tilde (Portuguese)
- {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false, false, false, false, false,
- false}},
- // s with caron
- {"xn--achy-f6a.com", L"\x0161" L"achy.com",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // TODO(jungshik) : Add examples with Cyrillic letters
- // only used in some languages written in Cyrillic.
- // Eutopia (Greek)
- {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
- {true, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false}},
- // Eutopia + 123 (Greek)
- {"xn---123-pldm0haj2bk.gr",
- L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
- {true, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false, true, false, false, false,
- false}},
- // Cyrillic (Russian)
- {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
- {true, false, false, false, false,
- false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- true}},
- // Cyrillic + 123 (Russian)
- {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
- {true, false, false, false, false,
- false, false, true, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- true}},
- // Arabic
- {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
- {true, false, false, false, false,
- false, true, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // Hebrew
- {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
- {true, false, false, false, false,
- true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- false}},
- // Thai
- {"xn--12c2cc4ag3b4ccu.th",
- L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, true, false,
- false, false, false, false, false,
- false}},
- // Devangari (Hindi)
- {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, true,
- false, false, false, false, false,
- false}},
- // Invalid IDN
- {"xn--hello?world.com", NULL,
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // Unsafe IDNs
- // "payp<alpha>l.com"
- {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // google.gr with Greek omicron and epsilon
- {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // google.ru with Cyrillic o
- {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // h<e with acute>llo<China in Han>.cn
- {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // <Greek rho><Cyrillic a><Cyrillic u>.ru
- {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- // One that's really long that will force a buffer realloc
- {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- "aaaaaaa",
- L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- L"aaaaaaaa",
- {true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true, true, true, true, true,
- true}},
- // Test cases for characters we blacklisted although allowed in IDN.
- // Embedded spaces will be turned to %20 in the display.
- // TODO(jungshik): We need to have more cases. This is a typical
- // data-driven trap. The following test cases need to be separated
- // and tested only for a couple of languages.
- {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false}},
- {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- // Padlock icon spoof.
- {"xn--google-hj64e", L"\U0001f512google.com",
- {false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
- // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
- // all strings with the surrogate '\xdd12'.
- {"xn--fk9c.com", L"\U00010912.com",
- {true, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- false, false, false, false, false,
- }},
+ // No IDN
+ {"www.google.com", L"www.google.com", true},
+ {"www.google.com.", L"www.google.com.", true},
+ {".", L".", true},
+ {"", L"", true},
+ // IDN
+ // Hanzi (Traditional Chinese)
+ {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
+ // Hanzi ('video' in Simplified Chinese
+ {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
+ // Hanzi + '123'
+ {"www.xn--123-p18d.com",
+ L"www.\x4e00"
+ L"123.com",
+ true},
+ // Hanzi + Latin : U+56FD is simplified
+ {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
+ // Kanji + Kana (Japanese)
+ {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
+ // Katakana including U+30FC
+ {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
+ {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
+ // Katakana + Latin (Japanese)
+ {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
+ {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
+ // Hangul (Korean)
+ {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
+ // b<u-umlaut>cher (German)
+ {"xn--bcher-kva.de",
+ L"b\x00fc"
+ L"cher.de",
+ true},
+ // a with diaeresis
+ {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
+ // c-cedilla (French)
+ {"www.xn--alliancefranaise-npb.fr",
+ L"www.alliancefran\x00e7"
+ L"aise.fr",
+ true},
+ // caf'e with acute accent' (French)
+ {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
+ // c-cedillla and a with tilde (Portuguese)
+ {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
+ // s with caron
+ {"xn--achy-f6a.com",
+ L"\x0161"
+ L"achy.com",
+ true},
+ {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
+ true},
+ // Eutopia + 123 (Greek)
+ {"xn---123-pldm0haj2bk.gr",
+ L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
+ // Cyrillic (Russian)
+ {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
+ // Cyrillic + 123 (Russian)
+ {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
+ // Arabic
+ {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
+ // Hebrew
+ {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
+ // Thai
+ {"xn--12c2cc4ag3b4ccu.th",
+ L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
+ // Devangari (Hindi)
+ {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
+ // Invalid IDN
+ {"xn--hello?world.com", NULL, false},
+ // Unsafe IDNs
+ // "payp<alpha>l.com"
+ {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
+ // google.gr with Greek omicron and epsilon
+ {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
+ // google.ru with Cyrillic o
+ {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
+ // h<e with acute>llo<China in Han>.cn
+ {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
+ // <Greek rho><Cyrillic a><Cyrillic u>.ru
+ {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
+ // One that's really long that will force a buffer realloc
+ {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ "aaaaaaa",
+ L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ L"aaaaaaaa",
+ true},
+ {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
+ {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
+ // Test cases for characters we blacklisted although allowed in IDN.
+ {"google.xn--comabc-k8d",
+ L"google.com\x0338"
+ L"abc",
+ false},
+ {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
+ {"google.xn--comevil-v04f.jp",
+ L"google.com\x30ce"
+ L"evil.jp",
+ false},
+ // Padlock icon spoof.
+ {"xn--google-hj64e", L"\U0001f512google.com", false},
+ // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
+ // all strings with the surrogate '\xdd12'.
+ {"xn--fk9c.com", L"\U00010912.com", false},
+ {"xn--g6h.com", L"\x2665.com", true},
+ {"xn--2ci.com", L"\x272a.com", true},
#if 0
// These two cases are special. We need a separate test.
// U+3000 and U+3002 are normalized to ASCII space and dot.
@@ -372,24 +164,12 @@ struct AdjustOffsetCase {
struct UrlTestData {
const char* const description;
const char* const input;
- const char* const languages;
FormatUrlTypes format_types;
UnescapeRule::Type escape_rules;
const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
size_t prefix_len;
};
-// A helper for IDN*{Fast,Slow}.
-// Append "::<language list>" to |expected| and |actual| to make it
-// easy to tell which sub-case fails without debugging.
-void AppendLanguagesToOutputs(const char* languages,
- base::string16* expected,
- base::string16* actual) {
- base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
- expected->append(to_append);
- actual->append(to_append);
-}
-
// A pair of helpers for the FormatUrlWithOffsets() test.
void VerboseExpect(size_t expected,
size_t actual,
@@ -412,8 +192,8 @@ void CheckAdjustedOffsets(const std::string& url_string,
offsets.push_back(i);
offsets.push_back(500000); // Something larger than any input length.
offsets.push_back(std::string::npos);
- base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
- format_types, unescape_rules, NULL, NULL, &offsets);
+ base::string16 formatted_url = FormatUrlWithOffsets(
+ url, std::string(), format_types, unescape_rules, NULL, NULL, &offsets);
for (size_t i = 0; i < url_length; ++i)
VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
@@ -426,51 +206,14 @@ void CheckAdjustedOffsets(const std::string& url_string,
} // anonymous namespace
-TEST(NetUtilTest, IDNToUnicodeFast) {
+TEST(NetUtilTest, IDNToUnicode) {
for (size_t i = 0; i < arraysize(idn_cases); i++) {
- for (size_t j = 0; j < arraysize(kLanguages); j++) {
- // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
- if (j == 3 || j == 17 || j == 18)
- continue;
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
- base::string16 expected(idn_cases[i].unicode_allowed[j] ?
- WideToUTF16(idn_cases[i].unicode_output) :
- ASCIIToUTF16(idn_cases[i].input));
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
- << "\", languages: \"" << kLanguages[j]
- << "\"";
- }
- }
-}
-
-TEST(NetUtilTest, IDNToUnicodeSlow) {
- for (size_t i = 0; i < arraysize(idn_cases); i++) {
- for (size_t j = 0; j < arraysize(kLanguages); j++) {
- // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
- if (!(j == 3 || j == 17 || j == 18))
- continue;
- base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
- base::string16 expected(idn_cases[i].unicode_allowed[j] ?
- WideToUTF16(idn_cases[i].unicode_output) :
- ASCIIToUTF16(idn_cases[i].input));
- AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
- EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
- << "\", languages: \"" << kLanguages[j]
- << "\"";
- }
- }
-}
-
-// ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
-// te), which was causing a crash (See http://crbug.com/510551). This may be an
-// icu bug, but regardless, that should not cause a crash.
-TEST(NetUtilTest, IDNToUnicodeNeverCrashes) {
- for (char c1 = 'a'; c1 <= 'z'; c1++) {
- for (char c2 = 'a'; c2 <= 'z'; c2++) {
- std::string lang = base::StringPrintf("%c%c", c1, c2);
- base::string16 output(IDNToUnicode("xn--74h", lang));
- }
+ base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
+ base::string16 expected(idn_cases[i].unicode_allowed
+ ? WideToUTF16(idn_cases[i].unicode_output)
+ : ASCIIToUTF16(idn_cases[i].input));
+ EXPECT_EQ(expected, output) << "input # " << i << ": \""
+ << idn_cases[i].input << "\"";
}
}
@@ -554,181 +297,149 @@ TEST(NetUtilTest, GetDirectoryListingEntry) {
TEST(NetUtilTest, FormatUrl) {
FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
const UrlTestData tests[] = {
- {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
-
- {"Simple URL",
- "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
- L"http://www.google.com/", 7},
-
- {"With a port number and a reference",
- "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
- UnescapeRule::NORMAL,
- L"http://www.google.com:8080/#\x30B0", 7},
-
- // -------- IDN tests --------
- {"Japanese IDN with ja",
- "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
- UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
-
- {"Japanese IDN with en",
- "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
- UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
-
- {"Japanese IDN without any languages",
- "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
- UnescapeRule::NORMAL,
- // Single script is safe for empty languages.
- L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
-
- {"mailto: with Japanese IDN",
- "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
- UnescapeRule::NORMAL,
- // GURL doesn't assume an email address's domain part as a host name.
- L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
-
- {"file: with Japanese IDN",
- "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
- UnescapeRule::NORMAL,
- L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
-
- {"ftp: with Japanese IDN",
- "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
- UnescapeRule::NORMAL,
- L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
-
- // -------- omit_username_password flag tests --------
- {"With username and password, omit_username_password=false",
- "http://user:passwd@example.com/foo", "",
- kFormatUrlOmitNothing, UnescapeRule::NORMAL,
- L"http://user:passwd@example.com/foo", 19},
-
- {"With username and password, omit_username_password=true",
- "http://user:passwd@example.com/foo", "", default_format_type,
- UnescapeRule::NORMAL, L"http://example.com/foo", 7},
-
- {"With username and no password",
- "http://user@example.com/foo", "", default_format_type,
- UnescapeRule::NORMAL, L"http://example.com/foo", 7},
-
- {"Just '@' without username and password",
- "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
- L"http://example.com/foo", 7},
-
- // GURL doesn't think local-part of an email address is username for URL.
- {"mailto:, omit_username_password=true",
- "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
- L"mailto:foo@example.com", 7},
-
- // -------- unescape flag tests --------
- {"Do not unescape",
- "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
- "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
- UnescapeRule::NONE,
- // GURL parses %-encoded hostnames into Punycode.
- L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
-
- {"Unescape normally",
- "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
- "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
- UnescapeRule::NORMAL,
- L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
- L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
-
- {"Unescape normally with BiDi control character",
- "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type,
- UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
-
- {"Unescape normally including unescape spaces",
- "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
- UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
-
- /*
- {"unescape=true with some special characters",
- "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
- kFormatUrlOmitNothing, UnescapeRule::NORMAL,
- L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
- */
- // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
-
- // -------- omit http: --------
- {"omit http with user name",
- "http://user@example.com/foo", "", kFormatUrlOmitAll,
- UnescapeRule::NORMAL, L"example.com/foo", 0},
-
- {"omit http",
- "http://www.google.com/", "en", kFormatUrlOmitHTTP,
- UnescapeRule::NORMAL, L"www.google.com/",
- 0},
-
- {"omit http with https",
- "https://www.google.com/", "en", kFormatUrlOmitHTTP,
- UnescapeRule::NORMAL, L"https://www.google.com/",
- 8},
-
- {"omit http starts with ftp.",
- "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
- UnescapeRule::NORMAL, L"http://ftp.google.com/",
- 7},
-
- // -------- omit trailing slash on bare hostname --------
- {"omit slash when it's the entire path",
- "http://www.google.com/", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
- L"http://www.google.com", 7},
- {"omit slash when there's a ref",
- "http://www.google.com/#ref", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
- L"http://www.google.com/#ref", 7},
- {"omit slash when there's a query",
- "http://www.google.com/?", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
- L"http://www.google.com/?", 7},
- {"omit slash when it's not the entire path",
- "http://www.google.com/foo", "en",
- kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
- L"http://www.google.com/foo", 7},
- {"omit slash for nonstandard URLs",
- "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
- UnescapeRule::NORMAL, L"data:/", 5},
- {"omit slash for file URLs",
- "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
- UnescapeRule::NORMAL, L"file:///", 7},
-
- // -------- view-source: --------
- {"view-source",
- "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
- UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
- 19},
-
- {"view-source of view-source",
- "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
- default_format_type, UnescapeRule::NORMAL,
- L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
-
- // view-source should omit http and trailing slash where non-view-source
- // would.
- {"view-source omit http",
- "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
- UnescapeRule::NORMAL, L"view-source:a.b/c",
- 12},
- {"view-source omit http starts with ftp.",
- "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
- UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
- 19},
- {"view-source omit slash when it's the entire path",
- "view-source:http://a.b/", "en", kFormatUrlOmitAll,
- UnescapeRule::NORMAL, L"view-source:a.b",
- 12},
+ {"Empty URL", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
+
+ {"Simple URL", "http://www.google.com/", default_format_type,
+ UnescapeRule::NORMAL, L"http://www.google.com/", 7},
+
+ {"With a port number and a reference",
+ "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
+ UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
+
+ // -------- IDN tests --------
+ {"Japanese IDN", "http://xn--l8jvb1ey91xtjb.jp", default_format_type,
+ UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
+
+ {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
+ default_format_type, UnescapeRule::NORMAL,
+ // GURL doesn't assume an email address's domain part as a host name.
+ L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
+
+ {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
+ default_format_type, UnescapeRule::NORMAL,
+ L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
+
+ {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
+ default_format_type, UnescapeRule::NORMAL,
+ L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
+
+ // -------- omit_username_password flag tests --------
+ {"With username and password, omit_username_password=false",
+ "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
+ UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
+
+ {"With username and password, omit_username_password=true",
+ "http://user:passwd@example.com/foo", default_format_type,
+ UnescapeRule::NORMAL, L"http://example.com/foo", 7},
+
+ {"With username and no password", "http://user@example.com/foo",
+ default_format_type, UnescapeRule::NORMAL, L"http://example.com/foo", 7},
+
+ {"Just '@' without username and password", "http://@example.com/foo",
+ default_format_type, UnescapeRule::NORMAL, L"http://example.com/foo", 7},
+
+ // GURL doesn't think local-part of an email address is username for URL.
+ {"mailto:, omit_username_password=true", "mailto:foo@example.com",
+ default_format_type, UnescapeRule::NORMAL, L"mailto:foo@example.com", 7},
+
+ // -------- unescape flag tests --------
+ {"Do not unescape",
+ "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
+ "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
+ default_format_type, UnescapeRule::NONE,
+ // GURL parses %-encoded hostnames into Punycode.
+ L"http://\x30B0\x30FC\x30B0\x30EB.jp/"
+ L"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
+ 7},
+
+ {"Unescape normally",
+ "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
+ "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
+ default_format_type, UnescapeRule::NORMAL,
+ L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
+ L"?q=\x30B0\x30FC\x30B0\x30EB",
+ 7},
+
+ {"Unescape normally with BiDi control character",
+ "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", default_format_type,
+ UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
+ 7},
+
+ {"Unescape normally including unescape spaces",
+ "http://www.google.com/search?q=Hello%20World", default_format_type,
+ UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
+
+ /*
+ {"unescape=true with some special characters",
+ "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
+ kFormatUrlOmitNothing, UnescapeRule::NORMAL,
+ L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
+ */
+ // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
+
+ // -------- omit http: --------
+ {"omit http with user name", "http://user@example.com/foo",
+ kFormatUrlOmitAll, UnescapeRule::NORMAL, L"example.com/foo", 0},
+
+ {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
+ UnescapeRule::NORMAL, L"www.google.com/", 0},
+
+ {"omit http with https", "https://www.google.com/", kFormatUrlOmitHTTP,
+ UnescapeRule::NORMAL, L"https://www.google.com/", 8},
+
+ {"omit http starts with ftp.", "http://ftp.google.com/",
+ kFormatUrlOmitHTTP, UnescapeRule::NORMAL, L"http://ftp.google.com/", 7},
+
+ // -------- omit trailing slash on bare hostname --------
+ {"omit slash when it's the entire path", "http://www.google.com/",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com", 7},
+ {"omit slash when there's a ref", "http://www.google.com/#ref",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/#ref", 7},
+ {"omit slash when there's a query", "http://www.google.com/?",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/?", 7},
+ {"omit slash when it's not the entire path", "http://www.google.com/foo",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"http://www.google.com/foo", 7},
+ {"omit slash for nonstandard URLs", "data:/",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"data:/", 5},
+ {"omit slash for file URLs", "file:///",
+ kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
+ L"file:///", 7},
+
+ // -------- view-source: --------
+ {"view-source", "view-source:http://xn--qcka1pmc.jp/",
+ default_format_type, UnescapeRule::NORMAL,
+ L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
+
+ {"view-source of view-source",
+ "view-source:view-source:http://xn--qcka1pmc.jp/", default_format_type,
+ UnescapeRule::NORMAL, L"view-source:view-source:http://xn--qcka1pmc.jp/",
+ 12},
+
+ // view-source should omit http and trailing slash where non-view-source
+ // would.
+ {"view-source omit http", "view-source:http://a.b/c", kFormatUrlOmitAll,
+ UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
+ {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
+ kFormatUrlOmitAll, UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
+ 19},
+ {"view-source omit slash when it's the entire path",
+ "view-source:http://a.b/", kFormatUrlOmitAll, UnescapeRule::NORMAL,
+ L"view-source:a.b", 12},
};
for (size_t i = 0; i < arraysize(tests); ++i) {
size_t prefix_len;
- base::string16 formatted = FormatUrl(
- GURL(tests[i].input), tests[i].languages, tests[i].format_types,
- tests[i].escape_rules, NULL, &prefix_len, NULL);
+ base::string16 formatted =
+ FormatUrl(GURL(tests[i].input), std::string(), tests[i].format_types,
+ tests[i].escape_rules, NULL, &prefix_len, NULL);
EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
}
@@ -737,11 +448,11 @@ TEST(NetUtilTest, FormatUrl) {
TEST(NetUtilTest, FormatUrlParsed) {
// No unescape case.
url::Parsed parsed;
- base::string16 formatted = FormatUrl(
- GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
- NULL);
+ base::string16 formatted =
+ FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ std::string(), kFormatUrlOmitNothing, UnescapeRule::NONE,
+ &parsed, NULL, NULL);
EXPECT_EQ(WideToUTF16(
L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
@@ -761,11 +472,11 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted.substr(parsed.ref.begin, parsed.ref.len));
// Unescape case.
- formatted = FormatUrl(
- GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
- NULL);
+ formatted =
+ FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ std::string(), kFormatUrlOmitNothing, UnescapeRule::NORMAL,
+ &parsed, NULL, NULL);
EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
EXPECT_EQ(WideToUTF16(L"\x30B0"),
@@ -784,11 +495,11 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted.substr(parsed.ref.begin, parsed.ref.len));
// Omit_username_password + unescape case.
- formatted = FormatUrl(
- GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
- "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
- NULL, NULL);
+ formatted =
+ FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ std::string(), kFormatUrlOmitUsernamePassword,
+ UnescapeRule::NORMAL, &parsed, NULL, NULL);
EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
EXPECT_FALSE(parsed.username.is_valid());
« net/base/net_util_icu.cc ('K') | « net/base/net_util_icu.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698