| Index: components/url_formatter/url_formatter_unittest.cc
|
| diff --git a/components/url_formatter/url_formatter_unittest.cc b/components/url_formatter/url_formatter_unittest.cc
|
| index 7f6285aaf1652a797a631dc6f29f39c4cd73bd3e..d65cd6828d11be170782f9420a6be68c9287b655 100644
|
| --- a/components/url_formatter/url_formatter_unittest.cc
|
| +++ b/components/url_formatter/url_formatter_unittest.cc
|
| @@ -26,343 +26,322 @@ using base::ASCIIToUTF16;
|
|
|
| const size_t kNpos = base::string16::npos;
|
|
|
| -const char* const kLanguages[] = {
|
| - "", "en", "zh-CN", "ja", "ko",
|
| - "he", "ar", "ru", "el", "fr",
|
| - "de", "pt", "sv", "th", "hi",
|
| - "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
|
| - "zh,ru,en"
|
| -};
|
| -
|
| struct IDNTestCase {
|
| const char* const input;
|
| const wchar_t* unicode_output;
|
| - const bool unicode_allowed[arraysize(kLanguages)];
|
| + const bool unicode_allowed;
|
| };
|
|
|
| -// TODO(jungshik) This is just a random sample of languages and is far
|
| -// from exhaustive. We may have to generate all the combinations
|
| -// of languages (powerset of a set of all the languages).
|
| const IDNTestCase idn_cases[] = {
|
| // No IDN
|
| - {"www.google.com", L"www.google.com",
|
| - {true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true}},
|
| - {"www.google.com.", L"www.google.com.",
|
| - {true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true}},
|
| - {".", L".",
|
| - {true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true}},
|
| - {"", L"",
|
| - {true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true}},
|
| + {"www.google.com", L"www.google.com", true},
|
| + {"www.google.com.", L"www.google.com.", true},
|
| + {".", L".", true},
|
| + {"", L"", true},
|
| // IDN
|
| // Hanzi (Traditional Chinese)
|
| - {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
|
| - {true, false, true, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, true, true, false,
|
| - true}},
|
| - // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
|
| - {"xn--cy2a840a.com", L"\x89c6\x9891.com",
|
| - {true, false, true, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - true}},
|
| + {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
|
| + // Hanzi ('video' in Simplified Chinese
|
| + {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
|
| // Hanzi + '123'
|
| - {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
|
| - {true, false, true, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, true, true, false,
|
| - true}},
|
| - // Hanzi + Latin : U+56FD is simplified and is regarded
|
| - // as not supported in zh-TW.
|
| - {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
|
| - {false, false, true, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - true}},
|
| + {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", true},
|
| + // Hanzi + Latin : U+56FD is simplified
|
| + {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
|
| // Kanji + Kana (Japanese)
|
| - {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
|
| - {true, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - false}},
|
| + {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
|
| // Katakana including U+30FC
|
| - {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
|
| - {true, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - }},
|
| - {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
|
| - {true, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - }},
|
| + {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
|
| + {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
|
| // Katakana + Latin (Japanese)
|
| - // TODO(jungshik): Change 'false' in the first element to 'true'
|
| - // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
|
| - // of our IsIDNComponentInSingleScript().
|
| - {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
|
| - {false, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - }},
|
| - {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
|
| - {false, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - }},
|
| + {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
|
| + {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
|
| // Hangul (Korean)
|
| - {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
|
| - {true, false, false, false, true,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - false}},
|
| + {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
|
| // b<u-umlaut>cher (German)
|
| - {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, true,
|
| - true, false, false, false, false,
|
| - true, false, false, false, false,
|
| - false}},
|
| + {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", true},
|
| // a with diaeresis
|
| - {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - true, false, true, false, false,
|
| - true, false, false, false, false,
|
| - false}},
|
| + {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
|
| // c-cedilla (French)
|
| - {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, true,
|
| - false, true, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"www.xn--alliancefranaise-npb.fr",
|
| + L"www.alliancefran\x00e7" L"aise.fr", true},
|
| // caf'e with acute accent' (French)
|
| - {"xn--caf-dma.fr", L"caf\x00e9.fr",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, true,
|
| - false, true, true, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
|
| // c-cedillla and a with tilde (Portuguese)
|
| - {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, true, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
|
| // s with caron
|
| - {"xn--achy-f6a.com", L"\x0161" L"achy.com",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| - // TODO(jungshik) : Add examples with Cyrillic letters
|
| - // only used in some languages written in Cyrillic.
|
| - // Eutopia (Greek)
|
| + {"xn--achy-f6a.com", L"\x0161" L"achy.com", true},
|
| {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
|
| - {true, false, false, false, false,
|
| - false, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, true, false, false, false,
|
| - false}},
|
| + true},
|
| // Eutopia + 123 (Greek)
|
| {"xn---123-pldm0haj2bk.gr",
|
| - L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
|
| - {true, false, false, false, false,
|
| - false, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false, true, false, false, false,
|
| - false}},
|
| + L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
|
| // Cyrillic (Russian)
|
| - {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
|
| - {true, false, false, false, false,
|
| - false, false, true, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, true,
|
| - true}},
|
| + {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
|
| // Cyrillic + 123 (Russian)
|
| - {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
|
| - {true, false, false, false, false,
|
| - false, false, true, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, true,
|
| - true}},
|
| + {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
|
| + // 'president' in Russian. Is a wholescript confusable, but allowed.
|
| + {"xn--d1abbgf6aiiy.xn--p1ai",
|
| + L"\x043f\x0440\x0435\x0437\x0438\x0434\x0435\x043d\x0442.\x0440\x0444",
|
| + true},
|
| // Arabic
|
| - {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
|
| - {true, false, false, false, false,
|
| - false, true, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
|
| // Hebrew
|
| - {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
|
| - {true, false, false, false, false,
|
| - true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, true,
|
| - false}},
|
| + {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
|
| + // Hebrew + Common
|
| + {"xn---123-ptf2c5c6bt.il", L"\x05e2\x05d1\x05e8\x05d9\x05ea-123.il", true},
|
| // Thai
|
| {"xn--12c2cc4ag3b4ccu.th",
|
| - L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, true, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
|
| + // Thai + Common
|
| + {"xn---123-9goxcp8c9db2r.th",
|
| + L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true},
|
| // Devangari (Hindi)
|
| - {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, true,
|
| - false, false, false, false, false,
|
| - false}},
|
| - // Invalid IDN
|
| - {"xn--hello?world.com", NULL,
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| - // Unsafe IDNs
|
| + {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
|
| + // Devanagari + Common
|
| + {"xn---123-kbjl2j0bl2k.in",
|
| + L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true},
|
| +
|
| + // 5 Aspirational scripts
|
| + // Unifieid Canadian Syllabary
|
| + {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true},
|
| + // Tifinagh
|
| + {"xn--4ljxa2bb4a6bxb.ma",
|
| + L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true},
|
| + // Tifinagh with a disallowed character(U+2D6F)
|
| + {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false},
|
| + // Yi
|
| + {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true},
|
| + // Mongolian - 'ordu' (place, camp)
|
| + {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true},
|
| + // Mongolian with a disallowed character
|
| + {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false},
|
| + // Miao/Pollad
|
| + {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true},
|
| +
|
| + // Script mixing tests
|
| + // The following script combinations are allowed.
|
| + // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin.
|
| + // ASCII-Latin + Japn (Kana + Han)
|
| + // ASCII-Latin + Kore (Hangul + Han)
|
| + // ASCII-Latin + Han + Bopomofo
|
| + // ASCII-Latin + any allowed script other than Cyrillic, Greek and Cherokee
|
| // "payp<alpha>l.com"
|
| - {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
|
| // google.gr with Greek omicron and epsilon
|
| - {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
|
| // google.ru with Cyrillic o
|
| - {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
|
| // h<e with acute>llo<China in Han>.cn
|
| - {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
|
| // <Greek rho><Cyrillic a><Cyrillic u>.ru
|
| - {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| + {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
|
| + // Hangul + Latin
|
| + {"xn--han-eb9ll88m.kr", L"\xd55c\xae00han.kr", true},
|
| + // Hangul + Latin + Han with IDN ccTLD
|
| + {"xn--han-or0kq92gkm3c.xn--3e0b707e",
|
| + L"\xd55c\xae00han\x97d3.\xd55c\xad6d", true},
|
| + // non-ASCII Latin + Hangul
|
| + {"xn--caf-dma9024xvpg.kr", L"caf\x00e9\xce74\xd398.kr", false},
|
| + // Hangul + Hiragana
|
| + {"xn--y9j3b9855e.kr", L"\xd55c\x3072\x3089.kr", false},
|
| + // <Hiragana>.<Hangul> is allowed because script mixing check is per label.
|
| + {"xn--y9j3b.xn--3e0b707e", L"\x3072\x3089.\xd55c\xad6d", true},
|
| + // Traditional Han + Latin
|
| + {"xn--hanzi-u57ii69i.tw", L"\x6f22\x5b57hanzi.tw", true},
|
| + // Simplified Han + Latin
|
| + {"xn--hanzi-u57i952h.cn", L"\x6c49\x5b57hanzi.cn", true},
|
| + // Simplified Han + Traditonal Han
|
| + {"xn--hanzi-if9kt8n.cn", L"\x6c49\x6f22hanzi.cn", true},
|
| + // Han + Hiragana + Katakana + Latin
|
| + {"xn--kanji-ii4dpizfq59yuykqr4b.jp",
|
| + L"\x632f\x308a\x4eee\x540d\x30ab\x30bfkanji.jp", true},
|
| + // Han + Bopomofo
|
| + {"xn--5ekcde0577e87tc.tw", L"\x6ce8\x97f3\x3105\x3106\x3107\x3108.tw", true},
|
| + // Han + Latin + Bopomofo
|
| + {"xn--bopo-ty4cghi8509kk7xd.tw",
|
| + L"\x6ce8\x97f3" L"bopo\x3105\x3106\x3107\x3108.tw", true},
|
| + // Latin + Bopomofo
|
| + {"xn--bopomofo-hj5gkalm.tw", L"bopomofo\x3105\x3106\x3107\x3108.tw", true},
|
| + // Bopomofo + Katakana
|
| + {"xn--lcka3d1bztghi.tw",
|
| + L"\x3105\x3106\x3107\x3108\x30ab\x30bf\x30ab\x30ca.tw", false},
|
| + // Bopomofo + Hangul
|
| + {"xn--5ekcde4543qbec.tw", L"\x3105\x3106\x3107\x3108\xc8fc\xc74c.tw", false},
|
| + // Devanagari + Latin
|
| + {"xn--ab-3ofh8fqbj6h.in", L"ab\x0939\x093f\x0928\x094d\x0926\x0940.in", true},
|
| + // Thai + Latin
|
| + {"xn--ab-jsi9al4bxdb6n.th",
|
| + L"ab\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22.th", true},
|
| + // <vitamin in Katakana>b1.com
|
| + {"xn--b1-xi4a7cvc9f.com", L"\x30d3\x30bf\x30df\x30f3" L"b1.com", true},
|
| + // Devanagari + Han
|
| + {"xn--t2bes3ds6749n.com", L"\x0930\x094b\x0932\x0947\x76e7\x0938.com", false},
|
| + // Devanagari + Bengali
|
| + {"xn--11b0x.in", L"\x0915\x0995.in", false},
|
| +
|
| + // Invisibility check
|
| + // Thai tone mark malek(U+0E48) repeated
|
| + {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false},
|
| + // Accute accent repeated
|
| + {"xn--a-xbba.com", L"a\x0301\x0301.com", false},
|
| + // 'a' with acuted accent + another acute accent
|
| + {"xn--1ca20i.com", L"\x00e1\x0301.com", false},
|
| +
|
| + // Mixed script confusable
|
| + // google with Armenian Small Letter Oh(U+0585)
|
| + {"xn--gogle-lkg.com", L"g\x0585ogle.com", false},
|
| + // Hiragana HE(U+3078) mixed with Katakana
|
| + {"xn--49jxi3as0d0fpc.com",
|
| + L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false},
|
| + // U+30FC + Han
|
| + {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false},
|
| + // Han + U+30FC + Han
|
| + {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false},
|
| + // Latin + U+30FC + Latin
|
| + {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false},
|
| + // Latin + U+30FB + Latin
|
| + {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false},
|
| + // U+30FB + Latin
|
| + {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false},
|
| +
|
| + // Mixed digits: the first two will also fail mixed script test
|
| + // Latin + ASCII digit + Deva digit
|
| + {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false},
|
| + // Latin + Deva digit + Beng digit
|
| + {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false},
|
| + // ASCII digit + Deva digit
|
| + {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false},
|
| + // Deva digit + Beng digit
|
| + {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false},
|
| + // U+4E00 (CJK Ideograph One) is not a digit
|
| + {"xn--d12-s18d.cn", L"d12\x4e00.cn", true},
|
| // One that's really long that will force a buffer realloc
|
| {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
| - "aaaaaaa",
|
| + "aaaaaaa",
|
| L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
| - L"aaaaaaaa",
|
| - {true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true, true, true, true, true,
|
| - true}},
|
| - // Test cases for characters we blacklisted although allowed in IDN.
|
| - // Embedded spaces will be turned to %20 in the display.
|
| - // TODO(jungshik): We need to have more cases. This is a typical
|
| - // data-driven trap. The following test cases need to be separated
|
| - // and tested only for a couple of languages.
|
| - {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false}},
|
| - {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| - {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| - {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| - {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| + L"aaaaaaaa",
|
| + true},
|
| +
|
| + // Not allowed; characters outside [:Identifier_Status=Allowed:]
|
| + // Limited Use Scripts: UTS 31 Table 7.
|
| + // Vai
|
| + {"xn--sn8a.com", L"\xa50b.com", false},
|
| + // 'CARD' look-alike in Cherokee
|
| + {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false},
|
| + // Scripts excluded from Identifiers: UTS 31 Table 4
|
| + // Coptic
|
| + {"xn--5ya.com", L"\x03e7.com", false},
|
| + // Old Italic
|
| + {"xn--097cc.com", L"\U00010300\U00010301.com", false},
|
| +
|
| + // U+115F (Hangul Filler)
|
| + {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
|
| + {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
|
| + // Latin small capital w
|
| + {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false},
|
| + // Minus Sign(U+2212)
|
| + {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false},
|
| + // Latin Small Letter Script G
|
| + {"xn--0naa.com", L"\x0261\x0261.com", false},
|
| + // Hangul Jamo(U+11xx)
|
| + {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false},
|
| + // degree sign
|
| + {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false},
|
| + // Pound sign
|
| + {"xn--5free-9ga.com", L"5free\x00a8.com", false},
|
| + // Hebrew points (U+05B0, U+05B6)
|
| + {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false},
|
| + // Danda(U+0964)
|
| + {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false},
|
| + // Small letter script G(U+0261)
|
| + {"xn--oogle-qmc.com", L"\x0261oogle.com", false},
|
| + // Small Katakana Extension(U+31F1)
|
| + {"xn--wlk.com", L"\x31f1.com", false},
|
| + // Heart symbol
|
| + {"xn--ab-u0x.com", L"ab\x2665.com", false},
|
| + // Emoji
|
| + {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false},
|
| + // Registered trade mark
|
| + {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false},
|
| + // Latin Letter Retroflex Click
|
| + {"xn--registered-25c.com", L"registered\x01c3.com", false},
|
| + // ASCII '!' not allowed in IDN
|
| + {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false},
|
| + // 'GOOGLE' in IPA extension
|
| + {"xn--1naa7pn51hcbaa.com",
|
| + L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false},
|
| // Padlock icon spoof.
|
| - {"xn--google-hj64e", L"\U0001f512google.com",
|
| - {false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| - // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
|
| - // all strings with the surrogate '\xdd12'.
|
| - {"xn--fk9c.com", L"\U00010912.com",
|
| - {true, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - }},
|
| -#if 0
|
| - // These two cases are special. We need a separate test.
|
| - // U+3000 and U+3002 are normalized to ASCII space and dot.
|
| - {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
|
| - {false, false, true, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, true, false, false,
|
| - true}},
|
| - {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
|
| - {false, false, true, false, false,
|
| - false, false, false, false, false,
|
| - false, false, false, false, false,
|
| - false, false, true, false, false,
|
| - true}},
|
| -#endif
|
| + {"xn--google-hj64e", L"\U0001f512google.com", false},
|
| +
|
| + // Custom black list
|
| + // Combining Long Solidus Overlay
|
| + {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false},
|
| + // Hyphenation Point instead of Katakana Middle dot
|
| + {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false},
|
| + // Gershayim with other Hebrew characters is allowed.
|
| + {"xn--5db6bh9b.il", L"\x05e9\x05d1\x05f4\x05e6.il", true},
|
| + // Hebrew Gershayim with Latin is disallowed.
|
| + {"xn--ab-yod.com", L"a\x05f4" L"b.com", false},
|
| + // Hebrew Gershayim with Arabic is disallowed.
|
| + {"xn--5eb7h.eg", L"\x0628\x05f4.eg", false},
|
| +
|
| + // Custom dangerous patterns
|
| + // Two Katakana-Hiragana combining mark in a row
|
| + {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
|
| + // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}.
|
| + {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", false},
|
| + // TODO(jshin): Review the danger of allowing the following two.
|
| + // Hiragana 'No' by itself is allowed.
|
| + {"xn--ldk.jp", L"\x30ce.jp", true},
|
| + // Hebrew Gershayim used by itself is allowed.
|
| + {"xn--5eb.il", L"\x05f4.il", true},
|
| +
|
| + // 4 Deviation characters between IDNA 2003 and IDNA 2008
|
| + // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma
|
| + // and the latter two are mapped away. However, the punycode form should
|
| + // remain in punycode.
|
| + // U+00DF(sharp-s)
|
| + {"xn--fu-hia.de", L"fu\x00df.de", false},
|
| + // U+03C2(final-sigma)
|
| + {"xn--mxac2c.gr", L"\x03b1\x03b2\x03c2.gr", false},
|
| + // U+200C(ZWNJ)
|
| + {"xn--h2by8byc123p.in", L"\x0924\x094d\x200c\x0930\x093f.in", false},
|
| + // U+200C(ZWJ)
|
| + {"xn--11b6iy14e.in", L"\x0915\x094d\x200d.in", false},
|
| +
|
| + // Math Monospace Small A. When entered in Unicode, it's canonicalized to
|
| + // 'a'. The punycode form should remain in punycode.
|
| + {"xn--bc-9x80a.xyz", L"\U0001d68a" L"bc.xyz", false},
|
| + // Math Sans Bold Capital Alpha
|
| + {"xn--bc-rg90a.xyz", L"\U0001d756" L"bc.xyz", false},
|
| + // U+3000 is canonicalized to a space(U+0020), but the punycode form
|
| + // should remain in punycode.
|
| + {"xn--p6j412gn7f.cn", L"\x4e2d\x56fd\x3000", false},
|
| + // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form
|
| + // should remain in punycode.
|
| + {"xn--r6j012gn7f.cn", L"\x4e2d\x56fd\x3002", false},
|
| + // Invalid punycode
|
| + // Has a codepoint beyond U+10FFFF.
|
| + {"xn--krank-kg706554a", nullptr, false},
|
| + // '?' in punycode.
|
| + {"xn--hello?world.com", nullptr, false},
|
| +
|
| + // Not allowed in UTS46/IDNA 2008
|
| + // Georgian Capital Letter(U+10BD)
|
| + {"xn--1nd.com", L"\x10bd.com", false},
|
| + // 3rd and 4th characters are '-'.
|
| + {"xn-----8kci4dhsd", L"\x0440\x0443--\x0430\x0432\x0442\x043e", false},
|
| + // Leading combining mark
|
| + {"xn--72b.com", L"\x093e.com", false},
|
| + // BiDi check per IDNA 2008/UTS 46
|
| + // Cannot starts with AN(Arabic-Indic Number)
|
| + {"xn--8hbae.eg", L"\x0662\x0660\x0660.eg", false},
|
| + // Cannot start with a RTL character and ends with a LTR
|
| + {"xn--x-ymcov.eg", L"\x062c\x0627\x0631x.eg", false},
|
| + // Can start with a RTL character and ends with EN(European Number)
|
| + {"xn--2-ymcov.eg", L"\x062c\x0627\x0631" L"2.eg", true},
|
| + // Can start with a RTL and end with AN
|
| + {"xn--mgbjq0r.eg", L"\x062c\x0627\x0631\x0662.eg", true},
|
| };
|
|
|
| struct AdjustOffsetCase {
|
| @@ -373,24 +352,12 @@ struct AdjustOffsetCase {
|
| struct UrlTestData {
|
| const char* const description;
|
| const char* const input;
|
| - const char* const languages;
|
| FormatUrlTypes format_types;
|
| net::UnescapeRule::Type escape_rules;
|
| const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
|
| size_t prefix_len;
|
| };
|
|
|
| -// A helper for IDN*{Fast,Slow}.
|
| -// Append "::<language list>" to |expected| and |actual| to make it
|
| -// easy to tell which sub-case fails without debugging.
|
| -void AppendLanguagesToOutputs(const char* languages,
|
| - base::string16* expected,
|
| - base::string16* actual) {
|
| - base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
|
| - expected->append(to_append);
|
| - actual->append(to_append);
|
| -}
|
| -
|
| // A pair of helpers for the FormatUrlWithOffsets() test.
|
| void VerboseExpect(size_t expected,
|
| size_t actual,
|
| @@ -402,7 +369,6 @@ void VerboseExpect(size_t expected,
|
| }
|
|
|
| void CheckAdjustedOffsets(const std::string& url_string,
|
| - const std::string& languages,
|
| FormatUrlTypes format_types,
|
| net::UnescapeRule::Type unescape_rules,
|
| const size_t* output_offsets) {
|
| @@ -413,7 +379,7 @@ void CheckAdjustedOffsets(const std::string& url_string,
|
| offsets.push_back(i);
|
| offsets.push_back(500000); // Something larger than any input length.
|
| offsets.push_back(std::string::npos);
|
| - base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
|
| + base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(),
|
| format_types, unescape_rules, NULL, NULL, &offsets);
|
| for (size_t i = 0; i < url_length; ++i)
|
| VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
|
| @@ -425,113 +391,67 @@ void CheckAdjustedOffsets(const std::string& url_string,
|
| std::string::npos, formatted_url);
|
| }
|
|
|
| -TEST(UrlFormatterTest, IDNToUnicodeFast) {
|
| - for (size_t i = 0; i < arraysize(idn_cases); i++) {
|
| - for (size_t j = 0; j < arraysize(kLanguages); j++) {
|
| - // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
|
| - if (j == 3 || j == 17 || j == 18)
|
| - continue;
|
| - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
|
| - base::string16 expected(idn_cases[i].unicode_allowed[j] ?
|
| - WideToUTF16(idn_cases[i].unicode_output) :
|
| - ASCIIToUTF16(idn_cases[i].input));
|
| - AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
|
| - EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
|
| - << "\", languages: \"" << kLanguages[j]
|
| - << "\"";
|
| - }
|
| - }
|
| -}
|
| -
|
| -TEST(UrlFormatterTest, IDNToUnicodeSlow) {
|
| +TEST(UrlFormatterTest, IDNToUnicode) {
|
| for (size_t i = 0; i < arraysize(idn_cases); i++) {
|
| - for (size_t j = 0; j < arraysize(kLanguages); j++) {
|
| - // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
|
| - if (!(j == 3 || j == 17 || j == 18))
|
| - continue;
|
| - base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
|
| - base::string16 expected(idn_cases[i].unicode_allowed[j] ?
|
| - WideToUTF16(idn_cases[i].unicode_output) :
|
| - ASCIIToUTF16(idn_cases[i].input));
|
| - AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
|
| - EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
|
| - << "\", languages: \"" << kLanguages[j]
|
| - << "\"";
|
| - }
|
| - }
|
| -}
|
| -
|
| -// ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
|
| -// te), which was causing a crash (See http://crbug.com/510551). This may be an
|
| -// icu bug, but regardless, that should not cause a crash.
|
| -TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
|
| - for (char c1 = 'a'; c1 <= 'z'; c1++) {
|
| - for (char c2 = 'a'; c2 <= 'z'; c2++) {
|
| - std::string lang = base::StringPrintf("%c%c", c1, c2);
|
| - base::string16 output(IDNToUnicode("xn--74h", lang));
|
| - }
|
| + base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
|
| + base::string16 expected(idn_cases[i].unicode_allowed
|
| + ? WideToUTF16(idn_cases[i].unicode_output)
|
| + : ASCIIToUTF16(idn_cases[i].input));
|
| + EXPECT_EQ(expected, output) << "input # " << i << ": \""
|
| + << idn_cases[i].input << "\"";
|
| }
|
| }
|
|
|
| TEST(UrlFormatterTest, FormatUrl) {
|
| FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
|
| const UrlTestData tests[] = {
|
| - {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"",
|
| + {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"",
|
| 0},
|
|
|
| - {"Simple URL", "http://www.google.com/", "", default_format_type,
|
| + {"Simple URL", "http://www.google.com/", default_format_type,
|
| net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
|
|
|
| {"With a port number and a reference",
|
| - "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
|
| + "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
|
| net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
|
|
|
| // -------- IDN tests --------
|
| - {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja",
|
| + {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
|
|
|
| - {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en",
|
| - default_format_type, net::UnescapeRule::NORMAL,
|
| - L"http://xn--l8jvb1ey91xtjb.jp/", 7},
|
| -
|
| - {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
|
| - default_format_type, net::UnescapeRule::NORMAL,
|
| - // Single script is safe for empty languages.
|
| - L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
|
| -
|
| - {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
|
| + {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| // GURL doesn't assume an email address's domain part as a host name.
|
| L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
|
|
|
| {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
|
| - "ja", default_format_type, net::UnescapeRule::NORMAL,
|
| + default_format_type, net::UnescapeRule::NORMAL,
|
| L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
|
|
|
| - {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja",
|
| + {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
|
|
|
| // -------- omit_username_password flag tests --------
|
| {"With username and password, omit_username_password=false",
|
| - "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing,
|
| + "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
|
| net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
|
|
|
| {"With username and password, omit_username_password=true",
|
| - "http://user:passwd@example.com/foo", "", default_format_type,
|
| + "http://user:passwd@example.com/foo", default_format_type,
|
| net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
|
|
|
| - {"With username and no password", "http://user@example.com/foo", "",
|
| + {"With username and no password", "http://user@example.com/foo",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"http://example.com/foo", 7},
|
|
|
| - {"Just '@' without username and password", "http://@example.com/foo", "",
|
| + {"Just '@' without username and password", "http://@example.com/foo",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"http://example.com/foo", 7},
|
|
|
| // GURL doesn't think local-part of an email address is username for URL.
|
| - {"mailto:, omit_username_password=true", "mailto:foo@example.com", "",
|
| + {"mailto:, omit_username_password=true", "mailto:foo@example.com",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"mailto:foo@example.com", 7},
|
|
|
| @@ -540,9 +460,9 @@ TEST(UrlFormatterTest, FormatUrl) {
|
| "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
|
| "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
|
| "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
|
| - "en", default_format_type, net::UnescapeRule::NONE,
|
| + default_format_type, net::UnescapeRule::NONE,
|
| // GURL parses %-encoded hostnames into Punycode.
|
| - L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
|
| + L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
|
| L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
|
| 7},
|
|
|
| @@ -550,91 +470,91 @@ TEST(UrlFormatterTest, FormatUrl) {
|
| "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
|
| "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
|
| "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
|
| - "en", default_format_type, net::UnescapeRule::NORMAL,
|
| - L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
|
| + default_format_type, net::UnescapeRule::NORMAL,
|
| + L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
|
| L"?q=\x30B0\x30FC\x30B0\x30EB",
|
| 7},
|
|
|
| {"Unescape normally with BiDi control character",
|
| - "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en",
|
| + "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
|
|
|
| {"Unescape normally including unescape spaces",
|
| - "http://www.google.com/search?q=Hello%20World", "en",
|
| + "http://www.google.com/search?q=Hello%20World",
|
| default_format_type, net::UnescapeRule::SPACES,
|
| L"http://www.google.com/search?q=Hello World", 7},
|
|
|
| /*
|
| {"unescape=true with some special characters",
|
| - "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
|
| + "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z",
|
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
|
| L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
|
| */
|
| // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
|
|
|
| // -------- omit http: --------
|
| - {"omit http with user name", "http://user@example.com/foo", "",
|
| + {"omit http with user name", "http://user@example.com/foo",
|
| kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
|
|
|
| - {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP,
|
| + {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
|
| net::UnescapeRule::NORMAL, L"www.google.com/", 0},
|
|
|
| - {"omit http with https", "https://www.google.com/", "en",
|
| + {"omit http with https", "https://www.google.com/",
|
| kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
|
| L"https://www.google.com/", 8},
|
|
|
| - {"omit http starts with ftp.", "http://ftp.google.com/", "en",
|
| + {"omit http starts with ftp.", "http://ftp.google.com/",
|
| kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
|
| 7},
|
|
|
| // -------- omit trailing slash on bare hostname --------
|
| - {"omit slash when it's the entire path", "http://www.google.com/", "en",
|
| + {"omit slash when it's the entire path", "http://www.google.com/",
|
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
|
| L"http://www.google.com", 7},
|
| - {"omit slash when there's a ref", "http://www.google.com/#ref", "en",
|
| + {"omit slash when there's a ref", "http://www.google.com/#ref",
|
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
|
| L"http://www.google.com/#ref", 7},
|
| - {"omit slash when there's a query", "http://www.google.com/?", "en",
|
| + {"omit slash when there's a query", "http://www.google.com/?",
|
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
|
| L"http://www.google.com/?", 7},
|
| {"omit slash when it's not the entire path", "http://www.google.com/foo",
|
| - "en", kFormatUrlOmitTrailingSlashOnBareHostname,
|
| + kFormatUrlOmitTrailingSlashOnBareHostname,
|
| net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
|
| - {"omit slash for nonstandard URLs", "data:/", "en",
|
| + {"omit slash for nonstandard URLs", "data:/",
|
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
|
| L"data:/", 5},
|
| - {"omit slash for file URLs", "file:///", "en",
|
| + {"omit slash for file URLs", "file:///",
|
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
|
| L"file:///", 7},
|
|
|
| // -------- view-source: --------
|
| - {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja",
|
| + {"view-source", "view-source:http://xn--qcka1pmc.jp/",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
|
|
|
| {"view-source of view-source",
|
| - "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
|
| + "view-source:view-source:http://xn--qcka1pmc.jp/",
|
| default_format_type, net::UnescapeRule::NORMAL,
|
| L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
|
|
|
| // view-source should omit http and trailing slash where non-view-source
|
| // would.
|
| - {"view-source omit http", "view-source:http://a.b/c", "en",
|
| + {"view-source omit http", "view-source:http://a.b/c",
|
| kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
|
| {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
|
| - "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
|
| + kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
|
| L"view-source:http://ftp.b/c", 19},
|
| {"view-source omit slash when it's the entire path",
|
| - "view-source:http://a.b/", "en", kFormatUrlOmitAll,
|
| + "view-source:http://a.b/", kFormatUrlOmitAll,
|
| net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
|
| };
|
|
|
| for (size_t i = 0; i < arraysize(tests); ++i) {
|
| size_t prefix_len;
|
| base::string16 formatted = FormatUrl(
|
| - GURL(tests[i].input), tests[i].languages, tests[i].format_types,
|
| - tests[i].escape_rules, NULL, &prefix_len, NULL);
|
| + GURL(tests[i].input), std::string(), tests[i].format_types,
|
| + tests[i].escape_rules, NULL, &prefix_len, NULL);
|
| EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
|
| EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
|
| }
|
| @@ -646,8 +566,8 @@ TEST(UrlFormatterTest, FormatUrlParsed) {
|
| base::string16 formatted =
|
| FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
|
| "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
|
| - "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed,
|
| - NULL, NULL);
|
| + std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE,
|
| + &parsed, NULL, NULL);
|
| EXPECT_EQ(WideToUTF16(
|
| L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
|
| L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
|
| @@ -862,14 +782,14 @@ TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) {
|
| }
|
|
|
| TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| - CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
|
| + CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing,
|
| net::UnescapeRule::NORMAL, NULL);
|
|
|
| const size_t basic_offsets[] = {
|
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
| 21, 22, 23, 24, 25
|
| };
|
| - CheckAdjustedOffsets("http://www.google.com/foo/", "en",
|
| + CheckAdjustedOffsets("http://www.google.com/foo/",
|
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
|
| basic_offsets);
|
|
|
| @@ -877,7 +797,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
|
| 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
| };
|
| - CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
|
| + CheckAdjustedOffsets("http://foo:bar@www.google.com/",
|
| kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::NORMAL, omit_auth_offsets_1);
|
|
|
| @@ -885,7 +805,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
|
| 15, 16, 17, 18, 19, 20, 21
|
| };
|
| - CheckAdjustedOffsets("http://foo@www.google.com/", "en",
|
| + CheckAdjustedOffsets("http://foo@www.google.com/",
|
| kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::NORMAL, omit_auth_offsets_2);
|
|
|
| @@ -896,7 +816,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 30, 31
|
| };
|
| // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
|
| - CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
|
| + CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/",
|
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
|
| dont_omit_auth_offsets);
|
|
|
| @@ -904,7 +824,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
|
| kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
|
| };
|
| - CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
|
| + CheckAdjustedOffsets("view-source:http://foo@www.google.com/",
|
| kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::NORMAL, view_source_offsets);
|
|
|
| @@ -914,7 +834,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 13, 14, 15, 16, 17, 18, 19
|
| };
|
| // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
|
| - CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
|
| + CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/",
|
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
|
| idn_hostname_offsets_1);
|
|
|
| @@ -927,7 +847,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| // Convert punycode to
|
| // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
|
| CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
|
| - "zh-CN", kFormatUrlOmitNothing,
|
| + kFormatUrlOmitNothing,
|
| net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
|
|
|
| const size_t unescape_offsets[] = {
|
| @@ -940,7 +860,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
|
| CheckAdjustedOffsets(
|
| "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
|
| - "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
|
| + kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
|
|
|
| const size_t ref_offsets[] = {
|
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
| @@ -949,20 +869,20 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| };
|
| // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
|
| CheckAdjustedOffsets(
|
| - "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
|
| + "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z",
|
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
|
|
|
| const size_t omit_http_offsets[] = {
|
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
| 10, 11, 12, 13, 14
|
| };
|
| - CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
|
| + CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP,
|
| net::UnescapeRule::NORMAL, omit_http_offsets);
|
|
|
| const size_t omit_http_start_with_ftp_offsets[] = {
|
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
| };
|
| - CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
|
| + CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP,
|
| net::UnescapeRule::NORMAL,
|
| omit_http_start_with_ftp_offsets);
|
|
|
| @@ -970,7 +890,7 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {
|
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
|
| 0, 1, 2, 3, 4, 5, 6, 7
|
| };
|
| - CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
|
| + CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
|
| net::UnescapeRule::NORMAL, omit_all_offsets);
|
| }
|
|
|
|
|